1 /*
2 Copyright (C) 2015-2021, Dirk Krause
3 SPDX-License-Identifier: BSD-3-Clause
4 */
5 
6 /*
7 	WARNING: This file was generated by the dkct program (see
8 	http://dktools.sourceforge.net/ for details).
9 	Changes you make here will be lost if dkct is run again!
10 	You should modify the original source and run dkct on it.
11 	Original source: dk4tsp32.ctr
12 */
13 
14 /**	@file dk4tsp32.c The dk4tsp32 module.
15 */
16 
17 
18 #include "dk4conf.h"
19 #include <libdk4c/dk4tsp32.h>
20 #include <libdk4c/dk4enc.h>
21 #include <libdk4base/dk4mem.h>
22 #include <libdk4c/dk4ansi.h>
23 #include <libdk4c/dk4utf8.h>
24 #include <libdk4c/dk4utf16.h>
25 #include <libdk4c/dk4c32.h>
26 
27 #if DK4_HAVE_ASSERT_H
28 #ifndef	ASSERT_H_INCLUDED
29 #include <assert.h>
30 #define	ASSERT_H_INCLUDED 1
31 #endif
32 #endif
33 
34 
35 
36 
37 
38 
39 /**	Initialize text stream processing structure.
40 	@param	tsp	Text stream processor.
41 	@param	eie	Expected input encoding.
42 */
43 static
44 void
dk4tsp32_init(dk4_tsp32_t * tsp,int eie)45 dk4tsp32_init(dk4_tsp32_t *tsp, int eie)
46 {
47 #if	DK4_USE_ASSERT
48 	assert(NULL != tsp);
49 #endif
50   DK4_MEMRES(tsp, sizeof(dk4_tsp32_t));
51   dk4bom_detect_init(&(tsp->bomd), eie);
52   dk4error_init(&(tsp->er_en));
53   dk4error_init(&(tsp->er_pr));
54   tsp->inbuf = NULL;
55   tsp->obj = NULL;
56   tsp->in_sz = 0;
57   tsp->in_us = 0;
58   tsp->ief = eie;
59   tsp->iee = eie;
60   tsp->pst = 0;
61   (tsp->pos).bytes  = (dk4_um_t)0UL;
62   (tsp->pos).chars  = (dk4_um_t)1UL;
63   (tsp->pos).lineno = (dk4_um_t)1UL;
64   (tsp->pos).charil = (dk4_um_t)1UL;
65 }
66 
67 
68 
69 /**	Process a 32 bit character.
70 	@param	tsp	Text stream processor.
71 	@param	chr	Character to process.
72 	@return	DK4_TSP_RES_OK		if the characters were processed
73 					successfully,
74 		DK4_TSP_RES_ERROR	if there was an error but we can
75 					continue,
76 		DK4_TSP_RES_FATAL	if there was a fatal error so
77 					we should abort processing.
78 */
79 static
80 int
dk4tsp32_process_character(dk4_tsp32_t * tsp,dk4_c32_t chr)81 dk4tsp32_process_character(
82   dk4_tsp32_t	*tsp,
83   dk4_c32_t	 chr
84 )
85 {
86   int		 back	=	DK4_TSP_RES_FATAL;
87 
88 #if	DK4_USE_ASSERT
89 	assert(NULL != tsp);
90 #endif
91   /* Increase position */
92   (tsp->pos).chars  += (dk4_um_t)1UL;
93   (tsp->pos).charil += (dk4_um_t)1UL;
94   /* Check for line buffering or direct processing */
95   if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) {
96 
97     if (tsp->in_us < tsp->in_sz) {
98       (tsp->inbuf)[tsp->in_us] = chr;
99       tsp->in_us += 1;
100       back = DK4_TSP_RES_OK;
101       if ((dk4_c32_t)'\n' == chr) {
102         back = DK4_TSP_RES_FATAL;
103 	if (tsp->in_us < tsp->in_sz) {
104 	  (tsp->inbuf)[tsp->in_us] = (dk4_c32_t)0UL;
105 	  back = (*((tsp->fct).lh))(
106 	    tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr)
107 	  );
108 	} else {
109 	  dk4error_set_with_position(
110 	    &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
111 	    (tsp->pos).bytes, (tsp->pos).lineno,
112 	    (tsp->pos).chars, (tsp->pos).charil
113 	  );
114 	}
115 	/*	2015-07-18
116 		Bugfix: We must reset the number of used characters to 0
117 		after processing and flushing the line buffer.
118 	*/
119 	tsp->in_us = 0;
120       }
121     } else {
122       dk4error_set_with_position(
123         &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
124         (tsp->pos).bytes, (tsp->pos).lineno,
125         (tsp->pos).chars, (tsp->pos).charil
126       );
127     }
128   } else {
129 
130     if (NULL != (tsp->fct).ch) {
131       back = (*((tsp->fct).ch))(tsp->obj, chr, &(tsp->pos), &(tsp->er_pr));
132     }
133   }
134   if ((dk4_c32_t)('\n') == chr) {
135     (tsp->pos).lineno += (dk4_um_t)1UL;
136     (tsp->pos).charil  = (dk4_um_t)1UL;
137   }
138 
139   return back;
140 }
141 
142 
143 
144 /**	Normal processing for one byte.
145 	Retrieve a 32 bit character first, either by decoding directly
146 	or by adding to a decoder.
147 	@param	tsp	Text stream processor.
148 	@param	inbyte	Byte to process.
149 	@return	Operation result, one from DK4_TSP_RES_OK,
150 	DK4_TSP_RES_ERROR or DK4_TSP_RES_FATAL.
151 */
152 static
153 int
dk4tsp32_process_byte(dk4_tsp32_t * tsp,unsigned char inbyte)154 dk4tsp32_process_byte(
155   dk4_tsp32_t	*tsp,
156   unsigned char	 inbyte
157 )
158 {
159   dk4_c32_t	 c32	= (dk4_c32_t)0UL;	/* 32 bit character */
160   int		 back	= DK4_TSP_RES_FATAL;	/* Function result */
161   int		 cuc32	= 0;			/* Flag: Have 32 bit char */
162   int		 res	= 0;			/* Operation result */
163 #if	DK4_USE_ASSERT
164 	assert(NULL != tsp);
165 #endif
166   switch (tsp->ief) {
167     case DK4_FILE_ENCODING_PLAIN: {
168       c32 = (dk4_c32_t)inbyte;
169       cuc32 = 1;
170     } break;
171     case DK4_FILE_ENCODING_WIN1252: {
172       if (0 != dk4ansi_decode(&c32, inbyte)) {
173         cuc32 = 1;
174       } else {
175 	dk4error_set_with_position(
176 	  &(tsp->er_en), DK4_E_DECODING_FAILED,
177 	  (tsp->pos).bytes, (tsp->pos).lineno,
178 	  (tsp->pos).chars, (tsp->pos).charil
179 	);
180       }
181     } break;
182     case DK4_FILE_ENCODING_UTF8: {
183       res = dk4utf8_add(&((tsp->dec).u08), inbyte);
184       switch (res) {
185         case DK4_EDSTM_ERROR: {
186 	  dk4error_set_with_position(
187 	    &(tsp->er_en), DK4_E_DECODING_FAILED,
188 	    (tsp->pos).bytes, (tsp->pos).lineno,
189 	    (tsp->pos).chars, (tsp->pos).charil
190 	  );
191 	} break;
192 	case DK4_EDSTM_FINISHED: {
193 	  c32 = dk4utf8_get(&((tsp->dec).u08));
194 	  cuc32 = 1;
195 	  dk4utf8_init(&((tsp->dec).u08));
196 	} break;
197 	case DK4_EDSTM_ACCEPT: {
198 	  back = DK4_TSP_RES_OK;
199 	} break;
200       }
201     } break;
202     case DK4_FILE_ENCODING_UTF16_LE: {
203       res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte);
204       switch (res) {
205         case DK4_EDSTM_ERROR: {
206 	  dk4error_set_with_position(
207 	    &(tsp->er_en), DK4_E_DECODING_FAILED,
208 	    (tsp->pos).bytes, (tsp->pos).lineno,
209 	    (tsp->pos).chars, (tsp->pos).charil
210 	  );
211 	} break;
212 	case DK4_EDSTM_FINISHED: {
213 	  c32 = dk4utf16_byte_get(&((tsp->dec).u16));
214 	  cuc32 = 1;
215 	  dk4utf16_byte_init(&((tsp->dec).u16), 0);
216 	} break;
217 	case DK4_EDSTM_ACCEPT: {
218 	  back = DK4_TSP_RES_OK;
219 	} break;
220       }
221     } break;
222     case DK4_FILE_ENCODING_UTF16_BE: {
223       res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte);
224       switch (res) {
225         case DK4_EDSTM_ERROR: {
226 	  dk4error_set_with_position(
227 	    &(tsp->er_en), DK4_E_DECODING_FAILED,
228 	    (tsp->pos).bytes, (tsp->pos).lineno,
229 	    (tsp->pos).chars, (tsp->pos).charil
230 	  );
231 	} break;
232 	case DK4_EDSTM_FINISHED: {
233 	  c32 = dk4utf16_byte_get(&((tsp->dec).u16));
234 	  cuc32 = 1;
235 	  dk4utf16_byte_init(&((tsp->dec).u16), 1);
236 	} break;
237 	case DK4_EDSTM_ACCEPT: {
238 	  back = DK4_TSP_RES_OK;
239 	} break;
240       }
241     } break;
242     case DK4_FILE_ENCODING_32_LE: {
243       res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte);
244       switch (res) {
245         case DK4_EDSTM_ERROR: {
246 	  dk4error_set_with_position(
247 	    &(tsp->er_en), DK4_E_DECODING_FAILED,
248 	    (tsp->pos).bytes, (tsp->pos).lineno,
249 	    (tsp->pos).chars, (tsp->pos).charil
250 	  );
251 	} break;
252 	case DK4_EDSTM_FINISHED: {
253 	  c32 = dk4c32_decoder_get(&((tsp->dec).c32));
254 	  cuc32 = 1;
255 	  dk4c32_decoder_init(&((tsp->dec).c32), 0);
256 	} break;
257 	case DK4_EDSTM_ACCEPT: {
258 	  back = DK4_TSP_RES_OK;
259 	} break;
260       }
261     } break;
262     case DK4_FILE_ENCODING_32_BE: {
263       res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte);
264       switch (res) {
265         case DK4_EDSTM_ERROR: {
266 	  dk4error_set_with_position(
267 	    &(tsp->er_en), DK4_E_DECODING_FAILED,
268 	    (tsp->pos).bytes, (tsp->pos).lineno,
269 	    (tsp->pos).chars, (tsp->pos).charil
270 	  );
271 	} break;
272 	case DK4_EDSTM_FINISHED: {
273 	  c32 = dk4c32_decoder_get(&((tsp->dec).c32));
274 	  cuc32 = 1;
275 	  dk4c32_decoder_init(&((tsp->dec).c32), 1);
276 	} break;
277 	case DK4_EDSTM_ACCEPT: {
278 	  back = DK4_TSP_RES_OK;
279 	} break;
280       }
281     } break;
282   }
283   if (0 != cuc32) {
284     back = dk4tsp32_process_character(tsp, c32);
285   }
286   if (DK4_TSP_RES_FATAL == back) {
287     tsp->pst = 2;
288   }
289   return back;
290 }
291 
292 
293 
294 /**	Initialize decoder for found input encoding.
295 	@param	tsp	Text stream processor.
296 */
297 static
298 void
dk4tsp32_initialize_decoder(dk4_tsp32_t * tsp)299 dk4tsp32_initialize_decoder(dk4_tsp32_t *tsp)
300 {
301 #if	DK4_USE_ASSERT
302 	assert(NULL != tsp);
303 #endif
304   switch (tsp->ief) {
305     case DK4_FILE_ENCODING_UTF8: {
306       dk4utf8_init(&((tsp->dec).u08));
307     } break;
308     case DK4_FILE_ENCODING_UTF16_LE: {
309       dk4utf16_byte_init(&((tsp->dec).u16), 0);
310     } break;
311     case DK4_FILE_ENCODING_UTF16_BE: {
312       dk4utf16_byte_init(&((tsp->dec).u16), 1);
313     } break;
314     case DK4_FILE_ENCODING_32_LE: {
315       dk4c32_decoder_init(&((tsp->dec).c32), 0);
316     } break;
317     case DK4_FILE_ENCODING_32_BE: {
318       dk4c32_decoder_init(&((tsp->dec).c32), 1);
319     } break;
320   }
321 }
322 
323 
324 
325 #if TRACE_DEBUG
326 static unsigned long byte_number = 0UL;
327 #endif
328 
329 /**	Add one single byte.
330 	@param	tsp	Text stream processor.
331 	@param	inbyte	Byte to process.
332 	@return	DK4_TSP_RES_OK		if the character was processed
333 					successfully,
334 		DK4_TSP_RES_ERROR	if there was an error but we can
335 					continue,
336 		DK4_TSP_RES_FATAL	if there was a fata error so we
337 					should abort processing.
338 */
339 static
340 int
dk4tsp32_i_add_one_byte(dk4_tsp32_t * tsp,unsigned char inbyte)341 dk4tsp32_i_add_one_byte(
342   dk4_tsp32_t	*tsp,
343   unsigned char	 inbyte
344 )
345 {
346   size_t	 nrej;				/* Number of unused bytes */
347   size_t	 i;				/* Current unused byte index */
348   int		 back = DK4_TSP_RES_FATAL;	/* Function result */
349   int		 res;				/* Operation result */
350   unsigned char	 uc;				/* Current unused byte */
351 
352 #if	DK4_USE_ASSERT
353 	assert(NULL != tsp);
354 #endif
355   (tsp->pos).bytes += (dk4_um_t)1UL;
356   switch (tsp->pst) {
357     case 0: {
358       res = dk4bom_detect_add(&(tsp->bomd), inbyte);
359       switch (res) {
360         case DK4_EDSTM_ACCEPT: {
361 	  back = DK4_TSP_RES_OK;
362 	} break;
363 	case DK4_EDSTM_FINISHED: case DK4_EDSTM_FINISHED_WITH_UNUSED: {
364 	  back = DK4_TSP_RES_OK;
365 	  tsp->ief = dk4bom_detect_get_encoding(&(tsp->bomd));
366 	  dk4tsp32_initialize_decoder(tsp);
367 	  tsp->pst = 1;
368 	  if (DK4_EDSTM_FINISHED_WITH_UNUSED == res) {
369 	    nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd));
370 	    if (0 < nrej) {
371 	      for (i = 0; ((i < nrej) && (DK4_TSP_RES_FATAL != back)); i++) {
372 	        uc = dk4bom_detect_unused_byte(&(tsp->bomd), i);
373 		switch (dk4tsp32_process_byte(tsp, uc)) {
374 		  case DK4_TSP_RES_FATAL: {
375 		    back = DK4_TSP_RES_FATAL;
376 		  } break;
377 		  case DK4_TSP_RES_ERROR: {
378 		    if (DK4_TSP_RES_OK == back) {
379 		      back = DK4_TSP_RES_ERROR;
380 		    }
381 		  } break;
382 		}
383 	      }
384 	    }
385 	  }
386 	} break;
387       }
388     } break;
389     case 1: {
390       back = dk4tsp32_process_byte(tsp, inbyte);
391     } break;
392     /*
393     	Processing stage 2 indicates there was a serious error
394 	before, we must skip further processing.
395 	This situation is covered by the initialization value
396 	DK4_TSP_RES_FATAL, so we do not need a case branch here.
397     */
398   }
399 
400   return back;
401 }
402 
403 
404 
405 int
dk4tsp32_setup_char(dk4_tsp32_t * tsp,void * obj,dk4_c32_handler_t * fct,int eie,dk4_er_t * erp)406 dk4tsp32_setup_char(
407   dk4_tsp32_t		*tsp,
408   void			*obj,
409   dk4_c32_handler_t	*fct,
410   int			 eie,
411   dk4_er_t		*erp
412 )
413 {
414   int		 back = 0;
415 #if	DK4_USE_ASSERT
416 	assert(NULL != tsp);
417 #endif
418   if (NULL != tsp) {
419     dk4tsp32_init(tsp, eie);
420     if (NULL != fct) {
421       tsp->obj = obj;
422       (tsp->fct).ch = fct;
423       tsp->iee = eie;
424       tsp->ief = eie;
425       tsp->pst = 0;
426       back = 1;
427     } else {
428       dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
429       tsp->pst = 2;
430     }
431   } else {
432     dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
433   }
434   return back;
435 }
436 
437 
438 
439 int
dk4tsp32_setup_line(dk4_tsp32_t * tsp,void * obj,dk4_c32_line_handler_t * fct,dk4_c32_t * inbuf,size_t szin,int eie,dk4_er_t * erp)440 dk4tsp32_setup_line(
441   dk4_tsp32_t			*tsp,
442   void				*obj,
443   dk4_c32_line_handler_t	*fct,
444   dk4_c32_t			*inbuf,
445   size_t			 szin,
446   int				 eie,
447   dk4_er_t			*erp
448 )
449 {
450   int		 back	=	0;
451 #if	DK4_USE_ASSERT
452 	assert(NULL != tsp);
453 	assert(NULL != inbuf);
454 	assert(0 < szin);
455 #endif
456   if (NULL != tsp) {
457     dk4tsp32_init(tsp, eie);
458     if ((NULL != fct) && (NULL != inbuf) && (0 < szin)) {
459       tsp->obj = obj;
460       (tsp->fct).lh = fct;
461       tsp->inbuf = inbuf;
462       tsp->in_sz = szin;
463       tsp->in_us = 0;
464       tsp->iee = eie;
465       tsp->ief = eie;
466       tsp->pst = 0;
467       back = 1;
468     } else {
469       dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
470       tsp->pst = 2;
471     }
472   } else {
473     dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
474   }
475   return back;
476 }
477 
478 
479 
480 int
dk4tsp32_add_one_byte(dk4_tsp32_t * tsp,unsigned char inbyte)481 dk4tsp32_add_one_byte(
482   dk4_tsp32_t	*tsp,
483   unsigned char	 inbyte
484 )
485 {
486   int		 back = DK4_TSP_RES_FATAL;
487 
488 #if	DK4_USE_ASSERT
489 	assert(NULL != tsp);
490 #endif
491   if (NULL != tsp) {
492     if (2 > tsp->pst) {
493       back = dk4tsp32_i_add_one_byte(tsp, inbyte);
494     }
495   }
496 
497   return back;
498 }
499 
500 
501 
502 int
dk4tsp32_add_bytes(dk4_tsp32_t * tsp,const unsigned char * buffer,size_t sz)503 dk4tsp32_add_bytes(
504   dk4_tsp32_t		*tsp,
505   const unsigned char	*buffer,
506   size_t		 sz
507 )
508 {
509   int		back	=	DK4_TSP_RES_FATAL;	/* Function result */
510   int		res;					/* Processing result */
511 
512 #if	DK4_USE_ASSERT
513 	assert(NULL != tsp);
514 	assert(NULL != buffer);
515 	assert(0 < sz);
516 #endif
517   if ((NULL != tsp) && (NULL != buffer) && (0 < sz)) {
518     if (2 > tsp->pst) {
519       back = DK4_TSP_RES_OK;
520       while ((sz--) && (2 > tsp->pst)) {
521         res = dk4tsp32_i_add_one_byte(tsp, *(buffer++));
522 	switch (res) {
523 	  case DK4_TSP_RES_FATAL: {
524 	    back = DK4_TSP_RES_FATAL;
525 	  } break;
526 	  case DK4_TSP_RES_ERROR: {
527 	    if (DK4_TSP_RES_OK == back) {
528 	      back = DK4_TSP_RES_ERROR;
529 	    }
530 	  } break;
531 	}
532       }
533     }
534   }
535 
536   return back;
537 }
538 
539 
540 
541 int
dk4tsp32_finish(dk4_tsp32_t * tsp)542 dk4tsp32_finish(dk4_tsp32_t *tsp)
543 {
544   size_t	 nrej =	0;			/* Number of unused bytes */
545   size_t	 i    =	0;			/* Current unused byte index */
546   int		 back = DK4_TSP_RES_FATAL;	/* Function result */
547   int		 res;				/* Operation result */
548   unsigned char	 uc;				/* Current unused byte */
549 
550 #if	DK4_USE_ASSERT
551 	assert(NULL != tsp);
552 #endif
553   if (NULL != tsp) {
554     /*	Flush all unprocessed output
555 	1. If there are unprocessed bytes in the BOM detector as BOM
556 	   detection was not completed, process these bytes.
557 	2. If there are characters in the line buffer,
558 	   finalize the line buffer text and process it.
559     */
560     back = DK4_TSP_RES_OK;
561     /* Retrieve an process bytes stored in BOM detector */
562     if (0 == tsp->pst) {
563       tsp->pst = 1;
564       nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd));
565       if (0 < nrej) {
566         dk4tsp32_initialize_decoder(tsp);
567 	for (i = 0; ((i < nrej) && (2 > tsp->pst)); i++) {
568 	  uc = dk4bom_detect_unused_byte(&(tsp->bomd), i);
569 	  switch (dk4tsp32_process_byte(tsp, uc)) {
570 	    case DK4_TSP_RES_FATAL: {
571 	      back = DK4_TSP_RES_FATAL;
572 	      tsp->pst = 2;
573 	    } break;
574 	    case DK4_TSP_RES_ERROR: {
575 	      if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
576 	    } break;
577 	  }
578 	}
579       }
580     }
581     /* Process final line */
582     if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) {
583       if (0 < tsp->in_us) {
584         if (2 > tsp->pst) {
585 	  if (tsp->in_us < tsp->in_sz) {
586 	    (tsp->inbuf)[tsp->in_us] = (dk4_c32_t)0UL;
587 	    res =
588 	    (*((tsp->fct).lh))(
589 	      tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr)
590 	    );
591 	    switch (res) {
592 	      case DK4_TSP_RES_FATAL: {
593 	        back = DK4_TSP_RES_FATAL;
594 		tsp->pst = 2;
595 	      } break;
596 	      case DK4_TSP_RES_ERROR: {
597 	        if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
598 	      } break;
599 	    }
600 	  } else {
601             back = DK4_TSP_RES_FATAL;
602 	    tsp->pst = 2;
603             dk4error_set_with_position(
604               &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
605               (tsp->pos).bytes, (tsp->pos).lineno,
606               (tsp->pos).chars, (tsp->pos).charil
607             );
608 	  }
609         }
610       }
611     }
612   }
613 
614   return back;
615 }
616 
617 
618 
619 void
dk4tsp32_get_errors(dk4_er_t * er_en,dk4_er_t * er_pr,dk4_tsp32_t const * tsp)620 dk4tsp32_get_errors(dk4_er_t *er_en, dk4_er_t *er_pr, dk4_tsp32_t const *tsp)
621 {
622 #if	DK4_USE_ASSERT
623 	assert(NULL != tsp);
624 #endif
625   if (NULL != tsp) {
626     if (NULL != er_en) {
627       DK4_MEMCPY(er_en, &(tsp->er_en), sizeof(dk4_er_t));
628     }
629     if (NULL != er_pr) {
630       DK4_MEMCPY(er_pr, &(tsp->er_pr), sizeof(dk4_er_t));
631     }
632   }
633 }
634 
635 
636 
637