xref: /src/contrib/ncurses/ncurses/tinfo/make_hash.c (revision 68ad2b0d7af2a3571c4abac9afa712f9b09b721c)
1 /****************************************************************************
2  * Copyright 2018-2024,2025 Thomas E. Dickey                                *
3  * Copyright 2009-2013,2017 Free Software Foundation, Inc.                  *
4  *                                                                          *
5  * Permission is hereby granted, free of charge, to any person obtaining a  *
6  * copy of this software and associated documentation files (the            *
7  * "Software"), to deal in the Software without restriction, including      *
8  * without limitation the rights to use, copy, modify, merge, publish,      *
9  * distribute, distribute with modifications, sublicense, and/or sell       *
10  * copies of the Software, and to permit persons to whom the Software is    *
11  * furnished to do so, subject to the following conditions:                 *
12  *                                                                          *
13  * The above copyright notice and this permission notice shall be included  *
14  * in all copies or substantial portions of the Software.                   *
15  *                                                                          *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS  *
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF               *
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.   *
19  * IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,   *
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR    *
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR    *
22  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.                               *
23  *                                                                          *
24  * Except as contained in this notice, the name(s) of the above copyright   *
25  * holders shall not be used in advertising or otherwise to promote the     *
26  * sale, use or other dealings in this Software without prior written       *
27  * authorization.                                                           *
28  ****************************************************************************/
29 
30 /****************************************************************************
31  *  Author: Zeyd M. Ben-Halim <zmbenhal@netcom.com> 1992,1995               *
32  *     and: Eric S. Raymond <esr@snark.thyrsus.com>                         *
33  *     and: Thomas E. Dickey                        1996-on                 *
34  ****************************************************************************/
35 
36 /*
37  *	make_hash.c --- build-time program for constructing comp_captab.c
38  */
39 
40 #include <build.priv.h>
41 
42 #include <tic.h>
43 #include <hashsize.h>
44 
45 #include <ctype.h>
46 
47 MODULE_ID("$Id: make_hash.c,v 1.37 2025/10/18 15:39:43 tom Exp $")
48 
49 /*
50  *	_nc_make_hash_table()
51  *
52  *	Takes the entries in table[] and hashes them into hash_table[]
53  *	by name.  There are CAPTABSIZE entries in the predefined table[]
54  *	and HASHTABSIZE slots in hash_table[].
55  *
56  */
57 
58 #undef MODULE_ID
59 #define MODULE_ID(id)		/*nothing */
60 #include <tinfo/doalloc.c>
61 
62 #define L_PAREN "("
63 #define R_PAREN ")"
64 #define L_BRACE "{"
65 #define R_BRACE "}"
66 
67 static const char *typenames[] =
68 {"BOOLEAN", "NUMBER", "STRING"};
69 
70 static void
failed(const char * s)71 failed(const char *s)
72 {
73     perror(s);
74     exit(EXIT_FAILURE);
75 }
76 
77 static char *
strmalloc(char * s)78 strmalloc(char *s)
79 {
80     size_t need = strlen(s) + 1;
81     char *result = malloc(need);
82     if (result == NULL)
83 	failed("strmalloc");
84     _nc_STRCPY(result, s, need);
85     return result;
86 }
87 
88 /*
89  *	int hash_function(string)
90  *
91  *	Computes the hashing function on the given string.
92  *
93  *	The current hash function is the sum of each consecutive pair
94  *	of characters, taken as two-byte integers, mod HASHTABSIZE.
95  *
96  */
97 
98 static int
hash_function(const char * string)99 hash_function(const char *string)
100 {
101     long sum = 0;
102 
103     while (*string) {
104 	sum += (long) (UChar(*string) + (UChar(*(string + 1)) << 8));
105 	string++;
106     }
107 
108     return (int) (sum % HASHTABSIZE);
109 }
110 
111 #define UNUSED -1
112 
113 static void
_nc_make_hash_table(struct user_table_entry * table,HashValue * hash_table,unsigned tablesize)114 _nc_make_hash_table(struct user_table_entry *table,
115 		    HashValue * hash_table,
116 		    unsigned tablesize)
117 {
118     unsigned i;
119     int hashvalue;
120     int collisions = 0;
121 
122     for (i = 0; i < HASHTABSIZE; i++) {
123 	hash_table[i] = UNUSED;
124     }
125     for (i = 0; i < tablesize; i++) {
126 	hashvalue = hash_function(table[i].ute_name);
127 
128 	if (hash_table[hashvalue] >= 0)
129 	    collisions++;
130 
131 	if (hash_table[hashvalue] != UNUSED) {
132 	    table[i].ute_link = hash_table[hashvalue];
133 	}
134 	hash_table[hashvalue] = (HashValue) i;
135     }
136 
137     printf("/* %d collisions out of %d entries */\n", collisions, tablesize);
138 }
139 
140 /*
141  * This filter reads from standard input a list of tab-delimited columns,
142  * (e.g., from Caps.filtered) computes the hash-value of a specified column and
143  * writes the hashed tables to standard output.
144  *
145  * By compiling the hash table at build time, we're able to make the entire
146  * set of terminfo and termcap tables readonly (and also provide some runtime
147  * performance enhancement).
148  */
149 
150 #define MAX_COLUMNS BUFSIZ	/* this _has_ to be worst-case */
151 
152 static int
count_columns(char ** list)153 count_columns(char **list)
154 {
155     int result = 0;
156     if (list != NULL) {
157 	while (*list++) {
158 	    ++result;
159 	}
160     }
161     return result;
162 }
163 
164 static char **
parse_columns(char * buffer)165 parse_columns(char *buffer)
166 {
167     static char **list;
168 
169     int col = 0;
170 
171     if (buffer == NULL) {
172 	free(list);
173 	list = NULL;
174 	return NULL;
175     }
176 
177     if (*buffer != '#') {
178 	if (list == NULL) {
179 	    list = typeCalloc(char *, (MAX_COLUMNS + 1));
180 	    if (list == NULL)
181 		return (NULL);
182 	}
183 	while (*buffer != '\0') {
184 	    char *s;
185 	    for (s = buffer; (*s != '\0') && !isspace(UChar(*s)); s++)
186 		/*EMPTY */ ;
187 	    if (s != buffer) {
188 		char mark = *s;
189 		*s = '\0';
190 		if ((s - buffer) > 1
191 		    && (*buffer == '"')
192 		    && (s[-1] == '"')) {	/* strip the quotes */
193 		    assert(s > buffer + 1);
194 		    s[-1] = '\0';
195 		    buffer++;
196 		}
197 		list[col] = buffer;
198 		col++;
199 		if (mark == '\0')
200 		    break;
201 		while (*++s && isspace(UChar(*s)))
202 		    /*EMPTY */ ;
203 		buffer = s;
204 	    } else
205 		break;
206 	}
207     }
208     return col ? list : NULL;
209 }
210 
211 #define SetType(n,t) \
212 	if (is_user) \
213 	    name_table[n].ute_type |= (int)(1 << (t)); \
214 	else \
215 	    name_table[n].ute_type = (t)
216 
217 #define GetType(n) \
218 	(is_user \
219 	 ? get_type(name_table[n].ute_type) \
220 	 : typenames[name_table[n].ute_type])
221 
222 static char *
get_type(int type_mask)223 get_type(int type_mask)
224 {
225     static char result[80];
226     unsigned n;
227     _nc_STRCPY(result, L_PAREN, sizeof(result));
228     for (n = 0; n < 3; ++n) {
229 	if ((1 << n) & type_mask) {
230 	    size_t want = 5 + strlen(typenames[n]);
231 	    if (want > sizeof(result)) {
232 		fprintf(stderr, "Buffer is not large enough for %s + %s\n",
233 			result, typenames[n]);
234 		exit(EXIT_FAILURE);
235 	    }
236 	    if (result[1])
237 		_nc_STRCAT(result, "|", sizeof(result));
238 	    _nc_STRCAT(result, "1<<", sizeof(result));
239 	    _nc_STRCAT(result, typenames[n], sizeof(result));
240 	}
241     }
242     _nc_STRCAT(result, R_PAREN, sizeof(result));
243     return result;
244 }
245 
246 int
main(int argc,char ** argv)247 main(int argc, char **argv)
248 {
249     unsigned tablesize = CAPTABSIZE;
250     struct user_table_entry *name_table;
251     HashValue *hash_table;
252     const char *root_name;
253     int column = 0;
254     int bigstring = 0;
255     unsigned n;
256     unsigned nn;
257     unsigned tableused = 0;
258     bool is_user;
259     const char *table_name;
260     char buffer[BUFSIZ];
261 
262     short BoolCount = 0;
263     short NumCount = 0;
264     short StrCount = 0;
265 
266     if (argc == 2 && !strcmp(argv[1], "-?"))
267 	return EXIT_SUCCESS;
268 
269     name_table = typeCalloc(struct user_table_entry, tablesize);
270     hash_table = typeCalloc(HashValue, HASHTABSIZE);
271 
272     /* The first argument is the column-number (starting with 0).
273      * The second is the root name of the tables to generate.
274      */
275     if (argc <= 3
276 	|| (column = atoi(argv[1])) <= 0
277 	|| (column >= MAX_COLUMNS)
278 	|| *(root_name = argv[2]) == 0
279 	|| (bigstring = atoi(argv[3])) < 0
280 	|| name_table == NULL
281 	|| hash_table == NULL) {
282 	fprintf(stderr, "usage: make_hash column root_name bigstring\n");
283 	exit(EXIT_FAILURE);
284     }
285     is_user = (*root_name == 'u');
286     table_name = (is_user ? "user" : "name");
287 
288     /*
289      * Read the table into our arrays.
290      */
291     for (n = 0; (n < tablesize) && fgets(buffer, BUFSIZ, stdin);) {
292 	char **list;
293 	char *nlp = strchr(buffer, '\n');
294 	if (nlp)
295 	    *nlp = '\0';
296 	else
297 	    buffer[sizeof(buffer) - 2] = '\0';
298 	list = parse_columns(buffer);
299 	if (list == NULL)	/* blank or comment */
300 	    continue;
301 	if (is_user) {
302 	    if (strcmp(list[0], "userdef"))
303 		continue;
304 	} else if (!strcmp(list[0], "userdef")) {
305 	    continue;
306 	}
307 	if (column < 0 || column > count_columns(list)) {
308 	    fprintf(stderr, "expected %d columns, have %d:\n%s\n",
309 		    column,
310 		    count_columns(list),
311 		    buffer);
312 	    exit(EXIT_FAILURE);
313 	}
314 	nn = tableused;
315 	if (is_user) {
316 	    unsigned j;
317 	    for (j = 0; j < tableused; ++j) {
318 		if (!strcmp(list[column], name_table[j].ute_name)) {
319 		    nn = j;
320 		    break;
321 		}
322 	    }
323 	}
324 	if (nn == tableused) {
325 	    name_table[nn].ute_link = -1;	/* end-of-hash */
326 	    name_table[nn].ute_name = strmalloc(list[column]);
327 	    ++tableused;
328 	}
329 
330 	if (!strcmp(list[2], "bool")) {
331 	    SetType(nn, BOOLEAN);
332 	    name_table[nn].ute_index = BoolCount++;
333 	} else if (!strcmp(list[2], "num")) {
334 	    SetType(nn, NUMBER);
335 	    name_table[nn].ute_index = NumCount++;
336 	} else if (!strcmp(list[2], "str")) {
337 	    SetType(nn, STRING);
338 	    name_table[nn].ute_index = StrCount++;
339 	    if (is_user) {
340 		if (*list[3] != '-') {
341 		    unsigned j;
342 		    name_table[nn].ute_argc = (unsigned) strlen(list[3]);
343 		    for (j = 0; j < name_table[nn].ute_argc; ++j) {
344 			if (list[3][j] == 's') {
345 			    name_table[nn].ute_args |= (1U << j);
346 			}
347 		    }
348 		}
349 	    }
350 	} else {
351 	    fprintf(stderr, "Unknown type: %s\n", list[2]);
352 	    exit(EXIT_FAILURE);
353 	}
354 	n++;
355     }
356     if (tablesize > tableused)
357 	tablesize = tableused;
358     _nc_make_hash_table(name_table, hash_table, tablesize);
359 
360     /*
361      * Write the compiled tables to standard output
362      */
363     if (bigstring) {
364 	int len = 0;
365 	int nxt;
366 
367 	printf("static const char %s_names_text[] = \\\n", root_name);
368 	for (n = 0; n < tablesize; n++) {
369 	    nxt = (int) strlen(name_table[n].ute_name) + 5;
370 	    if (nxt + len > 72) {
371 		printf("\\\n");
372 		len = 0;
373 	    }
374 	    printf("\"%s\\0\" ", name_table[n].ute_name);
375 	    len += nxt;
376 	}
377 	printf(";\n\n");
378 
379 	len = 0;
380 	printf("static %s_table_data const %s_names_data[] =\n",
381 	       table_name,
382 	       root_name);
383 	printf("%s\n", L_BRACE);
384 	for (n = 0; n < tablesize; n++) {
385 	    printf("\t%s %15d,\t%10s,", L_BRACE, len, GetType(n));
386 	    if (is_user)
387 		printf("\t%d,%d,",
388 		       name_table[n].ute_argc,
389 		       name_table[n].ute_args);
390 	    printf("\t%3d, %3d %s%s\n",
391 		   name_table[n].ute_index,
392 		   name_table[n].ute_link,
393 		   R_BRACE,
394 		   n < tablesize - 1 ? "," : "");
395 	    len += (int) strlen(name_table[n].ute_name) + 1;
396 	}
397 	printf("%s;\n\n", R_BRACE);
398 	printf("static struct %s_table_entry *_nc_%s_table = NULL;\n\n",
399 	       table_name,
400 	       root_name);
401     } else {
402 
403 	printf("static struct %s_table_entry const _nc_%s_table[] =\n",
404 	       table_name,
405 	       root_name);
406 	printf("%s\n", L_BRACE);
407 	for (n = 0; n < tablesize; n++) {
408 	    _nc_SPRINTF(buffer, _nc_SLIMIT(sizeof(buffer)) "\"%s\"",
409 			name_table[n].ute_name);
410 	    printf("\t%s %15s,\t%10s,", L_BRACE, buffer, GetType(n));
411 	    if (is_user)
412 		printf("\t%d,%d,",
413 		       name_table[n].ute_argc,
414 		       name_table[n].ute_args);
415 	    printf("\t%3d, %3d %s%s\n",
416 		   name_table[n].ute_index,
417 		   name_table[n].ute_link,
418 		   R_BRACE,
419 		   n < tablesize - 1 ? "," : "");
420 	}
421 	printf("%s;\n\n", R_BRACE);
422     }
423 
424     printf("static const HashValue _nc_%s_hash_table[%d] =\n",
425 	   root_name,
426 	   HASHTABSIZE + 1);
427     printf("%s\n", L_BRACE);
428     for (n = 0; n < HASHTABSIZE; n++) {
429 	printf("\t%3d,\n", hash_table[n]);
430     }
431     printf("\t0\t/* base-of-table */\n");
432     printf("%s;\n\n", R_BRACE);
433 
434     if (!is_user) {
435 	printf("#if (BOOLCOUNT!=%d)||(NUMCOUNT!=%d)||(STRCOUNT!=%d)\n",
436 	       BoolCount, NumCount, StrCount);
437 	printf("#error\t--> term.h and comp_captab.c disagree about the <--\n");
438 	printf("#error\t--> numbers of booleans, numbers and/or strings <--\n");
439 	printf("#endif\n\n");
440     }
441 
442     free(hash_table);
443     for (n = 0; (n < tablesize); ++n) {
444 	free((void *) name_table[n].ute_name);
445     }
446     free(name_table);
447     parse_columns(NULL);
448 
449     return EXIT_SUCCESS;
450 }
451