xref: /openbsd/usr.bin/lex/tables.c (revision d9a51c35)
1 /* $OpenBSD: tables.c,v 1.5 2022/12/26 19:16:01 jmc Exp $ */
2 
3 /*  tables.c - tables serialization code
4  *
5  *  Copyright (c) 1990 The Regents of the University of California.
6  *  All rights reserved.
7  *
8  *  This code is derived from software contributed to Berkeley by
9  *  Vern Paxson.
10  *
11  *  The United States Government has rights in this work pursuant
12  *  to contract no. DE-AC03-76SF00098 between the United States
13  *  Department of Energy and the University of California.
14  *
15  *  This file is part of flex.
16  *
17  *  Redistribution and use in source and binary forms, with or without
18  *  modification, are permitted provided that the following conditions
19  *  are met:
20  *
21  *  1. Redistributions of source code must retain the above copyright
22  *     notice, this list of conditions and the following disclaimer.
23  *  2. Redistributions in binary form must reproduce the above copyright
24  *     notice, this list of conditions and the following disclaimer in the
25  *     documentation and/or other materials provided with the distribution.
26  *
27  *  Neither the name of the University nor the names of its contributors
28  *  may be used to endorse or promote products derived from this software
29  *  without specific prior written permission.
30  *
31  *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
32  *  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
33  *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
34  *  PURPOSE.
35  */
36 
37 
38 #include "flexdef.h"
39 #include "tables.h"
40 
41 /** Convert size_t to t_flag.
42  *  @param n in {1,2,4}
43  *  @return YYTD_DATA*.
44  */
45 #define BYTES2TFLAG(n)\
46     (((n) == sizeof(flex_int8_t))\
47         ? YYTD_DATA8\
48         :(((n)== sizeof(flex_int16_t))\
49             ? YYTD_DATA16\
50             : YYTD_DATA32))
51 
52 /** Clear YYTD_DATA* bit flags
53  * @return the flag with the YYTD_DATA* bits cleared
54  */
55 #define TFLAGS_CLRDATA(flg) ((flg) & ~(YYTD_DATA8 | YYTD_DATA16 | YYTD_DATA32))
56 
57 int     yytbl_write32 (struct yytbl_writer *wr, flex_uint32_t v);
58 int     yytbl_write16 (struct yytbl_writer *wr, flex_uint16_t v);
59 int     yytbl_write8 (struct yytbl_writer *wr, flex_uint8_t v);
60 int     yytbl_writen (struct yytbl_writer *wr, void *v, flex_int32_t len);
61 static flex_int32_t yytbl_data_geti (const struct yytbl_data *tbl, int i);
62 
63 /** Initialize the table writer.
64  *  @param wr an uninitialized writer
65  *  @param the output file
66  *  @return 0 on success
67  */
yytbl_writer_init(struct yytbl_writer * wr,FILE * out)68 int yytbl_writer_init (struct yytbl_writer *wr, FILE * out)
69 {
70 	wr->out = out;
71 	wr->total_written = 0;
72 	return 0;
73 }
74 
75 /** Initialize a table header.
76  *  @param th  The uninitialized structure
77  *  @param version_str the  version string
78  *  @param name the name of this table set
79  */
yytbl_hdr_init(struct yytbl_hdr * th,const char * version_str,const char * name)80 int yytbl_hdr_init (struct yytbl_hdr *th, const char *version_str,
81 		    const char *name)
82 {
83 	memset (th, 0, sizeof (struct yytbl_hdr));
84 
85 	th->th_magic = YYTBL_MAGIC;
86 	th->th_hsize = 14 + strlen (version_str) + 1 + strlen (name) + 1;
87 	th->th_hsize += yypad64 (th->th_hsize);
88 	th->th_ssize = 0;	// Not known at this point.
89 	th->th_flags = 0;
90 	th->th_version = copy_string (version_str);
91 	th->th_name = copy_string (name);
92 	return 0;
93 }
94 
95 /** Allocate and initialize a table data structure.
96  *  @param tbl a pointer to an uninitialized table
97  *  @param id  the table identifier
98  *  @return 0 on success
99  */
yytbl_data_init(struct yytbl_data * td,enum yytbl_id id)100 int yytbl_data_init (struct yytbl_data *td, enum yytbl_id id)
101 {
102 
103 	memset (td, 0, sizeof (struct yytbl_data));
104 	td->td_id = id;
105 	td->td_flags = YYTD_DATA32;
106 	return 0;
107 }
108 
109 /** Clean up table and data array.
110  *  @param td will be destroyed
111  *  @return 0 on success
112  */
yytbl_data_destroy(struct yytbl_data * td)113 int yytbl_data_destroy (struct yytbl_data *td)
114 {
115 	free(td->td_data);
116 	td->td_data = 0;
117 	free (td);
118 	return 0;
119 }
120 
121 /** Write enough padding to bring the file pointer to a 64-bit boundary. */
yytbl_write_pad64(struct yytbl_writer * wr)122 static int yytbl_write_pad64 (struct yytbl_writer *wr)
123 {
124 	int     pad, bwritten = 0;
125 
126 	pad = yypad64 (wr->total_written);
127 	while (pad-- > 0)
128 		if (yytbl_write8 (wr, 0) < 0)
129 			return -1;
130 		else
131 			bwritten++;
132 	return bwritten;
133 }
134 
135 /** write the header.
136  *  @param out the output stream
137  *  @param th table header to be written
138  *  @return -1 on error, or bytes written on success.
139  */
yytbl_hdr_fwrite(struct yytbl_writer * wr,const struct yytbl_hdr * th)140 int yytbl_hdr_fwrite (struct yytbl_writer *wr, const struct yytbl_hdr *th)
141 {
142 	int  sz, rv;
143 	int     bwritten = 0;
144 
145 	if (yytbl_write32 (wr, th->th_magic) < 0
146 	    || yytbl_write32 (wr, th->th_hsize) < 0)
147 		flex_die (_("th_magic|th_hsize write32 failed"));
148 	bwritten += 8;
149 
150 	if (fgetpos (wr->out, &(wr->th_ssize_pos)) != 0)
151 		flex_die (_("fgetpos failed"));
152 
153 	if (yytbl_write32 (wr, th->th_ssize) < 0
154 	    || yytbl_write16 (wr, th->th_flags) < 0)
155 		flex_die (_("th_ssize|th_flags write failed"));
156 	bwritten += 6;
157 
158 	sz = strlen (th->th_version) + 1;
159 	if ((rv = yytbl_writen (wr, th->th_version, sz)) != sz)
160 		flex_die (_("th_version written failed"));
161 	bwritten += rv;
162 
163 	sz = strlen (th->th_name) + 1;
164 	if ((rv = yytbl_writen (wr, th->th_name, sz)) != sz)
165 		flex_die (_("th_name written failed"));
166 	bwritten += rv;
167 
168 	/* add padding */
169 	if ((rv = yytbl_write_pad64 (wr)) < 0)
170 		flex_die (_("pad64 failed"));
171 	bwritten += rv;
172 
173 	/* Sanity check */
174 	if (bwritten != (int) th->th_hsize)
175 		flex_die (_("pad64 failed"));
176 
177 	return bwritten;
178 }
179 
180 
181 /** Write this table.
182  *  @param out the file writer
183  *  @param td table data to be written
184  *  @return -1 on error, or bytes written on success.
185  */
yytbl_data_fwrite(struct yytbl_writer * wr,struct yytbl_data * td)186 int yytbl_data_fwrite (struct yytbl_writer *wr, struct yytbl_data *td)
187 {
188 	int  rv;
189 	flex_int32_t bwritten = 0;
190 	flex_int32_t i, total_len;
191 	fpos_t  pos;
192 
193 	if ((rv = yytbl_write16 (wr, td->td_id)) < 0)
194 		return -1;
195 	bwritten += rv;
196 
197 	if ((rv = yytbl_write16 (wr, td->td_flags)) < 0)
198 		return -1;
199 	bwritten += rv;
200 
201 	if ((rv = yytbl_write32 (wr, td->td_hilen)) < 0)
202 		return -1;
203 	bwritten += rv;
204 
205 	if ((rv = yytbl_write32 (wr, td->td_lolen)) < 0)
206 		return -1;
207 	bwritten += rv;
208 
209 	total_len = yytbl_calc_total_len (td);
210 	for (i = 0; i < total_len; i++) {
211 		switch (YYTDFLAGS2BYTES (td->td_flags)) {
212 		case sizeof (flex_int8_t):
213 			rv = yytbl_write8 (wr, yytbl_data_geti (td, i));
214 			break;
215 		case sizeof (flex_int16_t):
216 			rv = yytbl_write16 (wr, yytbl_data_geti (td, i));
217 			break;
218 		case sizeof (flex_int32_t):
219 			rv = yytbl_write32 (wr, yytbl_data_geti (td, i));
220 			break;
221 		default:
222 			flex_die (_("invalid td_flags detected"));
223 		}
224 		if (rv < 0) {
225 			flex_die (_("error while writing tables"));
226 			return -1;
227 		}
228 		bwritten += rv;
229 	}
230 
231 	/* Sanity check */
232 	if (bwritten != (int) (12 + total_len * YYTDFLAGS2BYTES (td->td_flags))) {
233 		flex_die (_("insanity detected"));
234 		return -1;
235 	}
236 
237 	/* add padding */
238 	if ((rv = yytbl_write_pad64 (wr)) < 0) {
239 		flex_die (_("pad64 failed"));
240 		return -1;
241 	}
242 	bwritten += rv;
243 
244 	/* Now go back and update the th_hsize member */
245 	if (fgetpos (wr->out, &pos) != 0
246 	    || fsetpos (wr->out, &(wr->th_ssize_pos)) != 0
247 	    || yytbl_write32 (wr, wr->total_written) < 0
248 	    || fsetpos (wr->out, &pos)) {
249 		flex_die (_("get|set|fwrite32 failed"));
250 		return -1;
251 	}
252 	else
253 		/* Don't count the int we just wrote. */
254 		wr->total_written -= sizeof (flex_int32_t);
255 	return bwritten;
256 }
257 
258 /** Write n bytes.
259  *  @param  wr   the table writer
260  *  @param  v    data to be written
261  *  @param  len  number of bytes
262  *  @return  -1 on error. number of bytes written on success.
263  */
yytbl_writen(struct yytbl_writer * wr,void * v,flex_int32_t len)264 int yytbl_writen (struct yytbl_writer *wr, void *v, flex_int32_t len)
265 {
266 	int  rv;
267 
268 	rv = fwrite (v, 1, len, wr->out);
269 	if (rv != len)
270 		return -1;
271 	wr->total_written += len;
272 	return len;
273 }
274 
275 /** Write four bytes in network byte order
276  *  @param  wr  the table writer
277  *  @param  v    a dword in host byte order
278  *  @return  -1 on error. number of bytes written on success.
279  */
yytbl_write32(struct yytbl_writer * wr,flex_uint32_t v)280 int yytbl_write32 (struct yytbl_writer *wr, flex_uint32_t v)
281 {
282 	flex_uint32_t vnet;
283 	size_t  bytes, rv;
284 
285 	vnet = htonl (v);
286 	bytes = sizeof (flex_uint32_t);
287 	rv = fwrite (&vnet, bytes, 1, wr->out);
288 	if (rv != 1)
289 		return -1;
290 	wr->total_written += bytes;
291 	return bytes;
292 }
293 
294 /** Write two bytes in network byte order.
295  *  @param  wr  the table writer
296  *  @param  v    a word in host byte order
297  *  @return  -1 on error. number of bytes written on success.
298  */
yytbl_write16(struct yytbl_writer * wr,flex_uint16_t v)299 int yytbl_write16 (struct yytbl_writer *wr, flex_uint16_t v)
300 {
301 	flex_uint16_t vnet;
302 	size_t  bytes, rv;
303 
304 	vnet = htons (v);
305 	bytes = sizeof (flex_uint16_t);
306 	rv = fwrite (&vnet, bytes, 1, wr->out);
307 	if (rv != 1)
308 		return -1;
309 	wr->total_written += bytes;
310 	return bytes;
311 }
312 
313 /** Write a byte.
314  *  @param  wr  the table writer
315  *  @param  v    the value to be written
316  *  @return  -1 on error. number of bytes written on success.
317  */
yytbl_write8(struct yytbl_writer * wr,flex_uint8_t v)318 int yytbl_write8 (struct yytbl_writer *wr, flex_uint8_t v)
319 {
320 	size_t  bytes, rv;
321 
322 	bytes = sizeof (flex_uint8_t);
323 	rv = fwrite (&v, bytes, 1, wr->out);
324 	if (rv != 1)
325 		return -1;
326 	wr->total_written += bytes;
327 	return bytes;
328 }
329 
330 /** Extract data element [i] from array data tables treated as a single flat array of integers.
331  * Be careful for 2-dimensional arrays or for YYTD_ID_TRANSITION, which is an array
332  * of structs.
333  * @param tbl data table
334  * @param i index into array.
335  * @return data[i]
336  */
yytbl_data_geti(const struct yytbl_data * tbl,int i)337 static flex_int32_t yytbl_data_geti (const struct yytbl_data *tbl, int i)
338 {
339 
340 	switch (YYTDFLAGS2BYTES (tbl->td_flags)) {
341 	case sizeof (flex_int8_t):
342 		return ((flex_int8_t *) (tbl->td_data))[i];
343 	case sizeof (flex_int16_t):
344 		return ((flex_int16_t *) (tbl->td_data))[i];
345 	case sizeof (flex_int32_t):
346 		return ((flex_int32_t *) (tbl->td_data))[i];
347 	default:
348 		flex_die (_("invalid td_flags detected"));
349 		break;
350 	}
351 	return 0;
352 }
353 
354 /** Set data element [i] in array data tables treated as a single flat array of integers.
355  * Be careful for 2-dimensional arrays or for YYTD_ID_TRANSITION, which is an array
356  * of structs.
357  * @param tbl data table
358  * @param i index into array.
359  * @param newval new value for data[i]
360  */
yytbl_data_seti(const struct yytbl_data * tbl,int i,flex_int32_t newval)361 static void yytbl_data_seti (const struct yytbl_data *tbl, int i,
362 			     flex_int32_t newval)
363 {
364 
365 	switch (YYTDFLAGS2BYTES (tbl->td_flags)) {
366 	case sizeof (flex_int8_t):
367 		((flex_int8_t *) (tbl->td_data))[i] = (flex_int8_t) newval;
368 		break;
369 	case sizeof (flex_int16_t):
370 		((flex_int16_t *) (tbl->td_data))[i] = (flex_int16_t) newval;
371 		break;
372 	case sizeof (flex_int32_t):
373 		((flex_int32_t *) (tbl->td_data))[i] = (flex_int32_t) newval;
374 		break;
375 	default:
376 		flex_die (_("invalid td_flags detected"));
377 		break;
378 	}
379 }
380 
381 /** Calculate the number of bytes  needed to hold the largest
382  *  absolute value in this data array.
383  *  @param tbl  the data table
384  *  @return sizeof(n) where n in {flex_int8_t, flex_int16_t, flex_int32_t}
385  */
min_int_size(struct yytbl_data * tbl)386 static size_t min_int_size (struct yytbl_data *tbl)
387 {
388 	flex_uint32_t i, total_len;
389 	flex_int32_t max = 0;
390 
391 	total_len = yytbl_calc_total_len (tbl);
392 
393 	for (i = 0; i < total_len; i++) {
394 		flex_int32_t n;
395 
396 		n = abs (yytbl_data_geti (tbl, i));
397 
398 		if (n > max)
399 			max = n;
400 	}
401 
402 	if (max <= INT8_MAX)
403 		return sizeof (flex_int8_t);
404 	else if (max <= INT16_MAX)
405 		return sizeof (flex_int16_t);
406 	else
407 		return sizeof (flex_int32_t);
408 }
409 
410 /** Transform data to smallest possible of (int32, int16, int8).
411  * For example, we may have generated an int32 array due to user options
412  * (e.g., %option align), but if the maximum value in that array
413  * is 80 (for example), then we can serialize it with only 1 byte per int.
414  * This is NOT the same as compressed DFA tables. We're just trying
415  * to save storage space here.
416  *
417  * @param tbl the table to be compressed
418  */
yytbl_data_compress(struct yytbl_data * tbl)419 void yytbl_data_compress (struct yytbl_data *tbl)
420 {
421 	flex_int32_t i, newsz, total_len;
422 	struct yytbl_data newtbl;
423 
424 	yytbl_data_init (&newtbl, tbl->td_id);
425 	newtbl.td_hilen = tbl->td_hilen;
426 	newtbl.td_lolen = tbl->td_lolen;
427 	newtbl.td_flags = tbl->td_flags;
428 
429 	newsz = min_int_size (tbl);
430 
431 
432 	if (newsz == (int) YYTDFLAGS2BYTES (tbl->td_flags))
433 		/* No change in this table needed. */
434 		return;
435 
436 	if (newsz > (int) YYTDFLAGS2BYTES (tbl->td_flags)) {
437 		flex_die (_("detected negative compression"));
438 		return;
439 	}
440 
441 	total_len = yytbl_calc_total_len (tbl);
442 	newtbl.td_data = calloc (total_len, newsz);
443 	newtbl.td_flags =
444 		TFLAGS_CLRDATA (newtbl.td_flags) | BYTES2TFLAG (newsz);
445 
446 	for (i = 0; i < total_len; i++) {
447 		flex_int32_t g;
448 
449 		g = yytbl_data_geti (tbl, i);
450 		yytbl_data_seti (&newtbl, i, g);
451 	}
452 
453 
454 	/* Now copy over the old table */
455 	free (tbl->td_data);
456 	*tbl = newtbl;
457 }
458