1%%	options
2
3copyright owner	=	Dirk Krause
4copyright year	=	2015-xxxx
5SPDX-License-Identifier:	BSD-3-Clause
6
7
8
9%%	header
10
11/**	@file
12	Text stream processing for 32 bit characters.
13
14	CRT on Windows: Optional.
15*/
16
17#ifndef DK4CONF_H_INCLUDED
18#if DK4_BUILDING_DKTOOLS4
19#include "dk4conf.h"
20#else
21#include <dktools-4/dk4conf.h>
22#endif
23#endif
24
25#ifndef DK4TYPES_H_INCLUDED
26#if DK4_BUILDING_DKTOOLS4
27#include <libdk4base/dk4types.h>
28#else
29#include <dktools-4/dk4types.h>
30#endif
31#endif
32
33#ifndef DK4ERROR_H_INCLUDED
34#if DK4_BUILDING_DKTOOLS4
35#include <libdk4base/dk4error.h>
36#else
37#include <dktools-4/dk4error.h>
38#endif
39#endif
40
41#ifndef DK4BOM_H_INCLUDED
42#if DK4_BUILDING_DKTOOLS4
43#include <libdk4c/dk4bom.h>
44#else
45#include <dktools-4/dk4bom.h>
46#endif
47#endif
48
49#ifndef DK4TSP_H_INCLUDED
50#if DK4_BUILDING_DKTOOLS4
51#include <libdk4c/dk4tsp.h>
52#else
53#include <dktools-4/dk4tsp.h>
54#endif
55#endif
56
57#ifndef DK4UTF8_H_INCLUDED
58#if DK4_BUILDING_DKTOOLS4
59#include <libdk4c/dk4utf8.h>
60#else
61#include <dktools-4/dk4utf8.h>
62#endif
63#endif
64
65#ifndef DK4UTF16_H_INCLUDED
66#if DK4_BUILDING_DKTOOLS4
67#include <libdk4c/dk4utf16.h>
68#else
69#include <dktools-4/dk4utf16.h>
70#endif
71#endif
72
73#ifndef DK4C32_H_INCLUDED
74#if DK4_BUILDING_DKTOOLS4
75#include <libdk4c/dk4c32.h>
76#else
77#include <dktools-4/dk4c32.h>
78#endif
79#endif
80
81/**	Handler function for single characters.
82	@param	obj	Object to modify while processing the character.
83	@param	c	Character to process.
84	@param	pos	Current position in file or data stream.
85	@param	erp	Error report, may be NULL.
86	@return	DK4_TSP_RES_OK		if the character was processed
87					successfully,
88		DK4_TSP_RES_ERROR	if there was an error but we can
89					continue,
90		DK4_TSP_RES_FATAL	if there was a fatal error so we
91					should abort processing.
92*/
93typedef int	dk4_c32_handler_t(
94  void				*obj,
95  dk4_c32_t 			 c,
96  dk4_text_stream_position_t	*pos,
97  dk4_er_t			*erp
98);
99
100/**	Handler function for text lines.
101	@param	obj	Object to modify while processing the character.
102	@param	line	Text line to process.
103	@param	lineno	Current line number.
104	@param	erp	Error report, may be NULL.
105	@return	DK4_TSP_RES_OK		if the character was processed
106					successfully,
107		DK4_TSP_RES_ERROR	if there was an error but we can
108					continue,
109		DK4_TSP_RES_FATAL	if there was a fata error so we
110					should abort processing.
111*/
112typedef int	dk4_c32_line_handler_t(
113  void		*obj,
114  dk4_c32_t	*line,
115  dk4_um_t	 lineno,
116  dk4_er_t	*erp
117);
118
119/**	Structure for 32 bit character processing.
120*/
121typedef struct {
122  union {
123    dk4_utf8_decoder_t		 u08;	/**< UTF-8 decoder. */
124    dk4_utf16_byte_decoder_t	 u16;	/**< UTF-16 decoder. */
125    dk4_c32_byte_decoder_t	 c32;	/**< 32 bit char decoder. */
126  } dec;				/**< Input decoder. */
127  dk4_bom_detector_t		 bomd;	/**< BOM detector. */
128  dk4_text_stream_position_t	 pos;	/**< Current position. */
129  dk4_er_t			 er_en;	/**< Errors in input decoding. */
130  dk4_er_t			 er_pr;	/**< Errors in processing. */
131  union {
132    dk4_c32_line_handler_t	*lh;	/**< Handler function for lines. */
133    dk4_c32_handler_t		*ch;	/**< Handler function for char. */
134  } fct;				/**< Handler function. */
135  dk4_c32_t			*inbuf;	/**< Buffer for input line. */
136  void				*obj;	/**< Object to modify in processing. */
137  size_t			 in_sz;	/**< Size of input line buffer. */
138  size_t			 in_us;	/**< Used bytes in input line buffer. */
139  int				 ief;	/**< Input encoding found. */
140  int				 iee;	/**< Input encoding expected. */
141  int				 pst;	/**< Processing stage. */
142} dk4_tsp32_t;
143
144
145
146#ifdef __cplusplus
147extern "C" {
148#endif
149
150/**	Set up processor for byte by byte processing.
151	@param	tsp	Processor to set up.
152	@param	obj	Object to modify when processing input, may be NULL.
153	@param	fct	Handler function to call for each character.
154	@param	eie	Expected input encoding.
155	@param	erp	Error report, may be NULL.
156	@return	1 on success, 0 on error.
157*/
158int
159dk4tsp32_setup_char(
160  dk4_tsp32_t		*tsp,
161  void			*obj,
162  dk4_c32_handler_t	*fct,
163  int			 eie,
164  dk4_er_t		*erp
165);
166
167/**	Set up processor for line processing.
168	@param	tsp	Processor to set up.
169	@param	obj	Object to modify when processing input, may be NULL.
170	@param	fct	Handler function to invoke for each line.
171	@param	inbuf	Input line buffer.
172	@param	szin	Size of input line buffer (number of dk4_c32_t).
173	@param	eie	Expected input encoding.
174	@param	erp	Error report, may be NULL.
175	@return	1 on success, 0 on error.
176*/
177int
178dk4tsp32_setup_line(
179  dk4_tsp32_t			*tsp,
180  void				*obj,
181  dk4_c32_line_handler_t	*fct,
182  dk4_c32_t			*inbuf,
183  size_t			 szin,
184  int				 eie,
185  dk4_er_t			*erp
186);
187
188/**	Add one single byte.
189	@param	tsp	Text stream processor.
190	@param	inbyte	Byte to process.
191	@return	DK4_TSP_RES_OK		if the character was processed
192					successfully,
193		DK4_TSP_RES_ERROR	if there was an error but we can
194					continue,
195		DK4_TSP_RES_FATAL	if there was a fata error so we
196					should abort processing.
197*/
198int
199dk4tsp32_add_one_byte(
200  dk4_tsp32_t	*tsp,
201  unsigned char	 inbyte
202);
203
204/**	Add multiple bytes.
205	@param	tsp	Text stream processor.
206	@param	buffer	Buffer start address.
207	@param	sz	Number of bytes in buffer.
208	@return	DK4_TSP_RES_OK		if the character was processed
209					successfully,
210		DK4_TSP_RES_ERROR	if there was an error but we can
211					continue,
212		DK4_TSP_RES_FATAL	if there was a fata error so we
213					should abort processing.
214*/
215int
216dk4tsp32_add_bytes(
217  dk4_tsp32_t		*tsp,
218  const unsigned char	*buffer,
219  size_t		 sz
220);
221
222/**	Finish processing.
223	@param	tsp	Text stream processor.
224	@return	DK4_TSP_RES_OK		if processing was finished
225					successfully,
226		DK4_TSP_RES_ERROR	if there was an error,
227		DK4_TSP_RES_FATAL	if there was a fatal error.
228*/
229int
230dk4tsp32_finish(dk4_tsp32_t *tsp);
231
232/**	Retrieve error reports for encoding/decoding and processing.
233	@param	er_en	Destination error report buffer for encoding/decoding.
234	@param	er_pr	Destination error report buffer for processing.
235	@param	tsp	Text stream processor to retrieve errors from.
236*/
237void
238dk4tsp32_get_errors(dk4_er_t *er_en, dk4_er_t *er_pr, dk4_tsp32_t const *tsp);
239
240#ifdef __cplusplus
241}
242#endif
243
244
245
246%%	module
247
248#include "dk4conf.h"
249#include <libdk4c/dk4tsp32.h>
250#include <libdk4c/dk4enc.h>
251#include <libdk4base/dk4mem.h>
252#include <libdk4c/dk4ansi.h>
253#include <libdk4c/dk4utf8.h>
254#include <libdk4c/dk4utf16.h>
255#include <libdk4c/dk4c32.h>
256
257#if DK4_HAVE_ASSERT_H
258#ifndef	ASSERT_H_INCLUDED
259#include <assert.h>
260#define	ASSERT_H_INCLUDED 1
261#endif
262#endif
263
264
265$!trace-include
266
267
268
269/**	Initialize text stream processing structure.
270	@param	tsp	Text stream processor.
271	@param	eie	Expected input encoding.
272*/
273static
274void
275dk4tsp32_init(dk4_tsp32_t *tsp, int eie)
276{
277#if	DK4_USE_ASSERT
278	assert(NULL != tsp);
279#endif
280  DK4_MEMRES(tsp, sizeof(dk4_tsp32_t));
281  dk4bom_detect_init(&(tsp->bomd), eie);
282  dk4error_init(&(tsp->er_en));
283  dk4error_init(&(tsp->er_pr));
284  tsp->inbuf = NULL;
285  tsp->obj = NULL;
286  tsp->in_sz = 0;
287  tsp->in_us = 0;
288  tsp->ief = eie;
289  tsp->iee = eie;
290  tsp->pst = 0;
291  (tsp->pos).bytes  = (dk4_um_t)0UL;
292  (tsp->pos).chars  = (dk4_um_t)1UL;
293  (tsp->pos).lineno = (dk4_um_t)1UL;
294  (tsp->pos).charil = (dk4_um_t)1UL;
295}
296
297
298
299/**	Process a 32 bit character.
300	@param	tsp	Text stream processor.
301	@param	chr	Character to process.
302	@return	DK4_TSP_RES_OK		if the characters were processed
303					successfully,
304		DK4_TSP_RES_ERROR	if there was an error but we can
305					continue,
306		DK4_TSP_RES_FATAL	if there was a fatal error so
307					we should abort processing.
308*/
309static
310int
311dk4tsp32_process_character(
312  dk4_tsp32_t	*tsp,
313  dk4_c32_t	 chr
314)
315{
316  int		 back	=	DK4_TSP_RES_FATAL;
317  $? "+ dk4tsp32_process_character"
318#if	DK4_USE_ASSERT
319	assert(NULL != tsp);
320#endif
321  /* Increase position */
322  (tsp->pos).chars  += (dk4_um_t)1UL;
323  (tsp->pos).charil += (dk4_um_t)1UL;
324  /* Check for line buffering or direct processing */
325  if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) {
326    $? ". line buffering %lu %lu", (unsigned long)(tsp->in_us), (unsigned long)(tsp->in_sz)
327    if (tsp->in_us < tsp->in_sz) {		$? ". used size < buffer size"
328      (tsp->inbuf)[tsp->in_us] = chr;
329      tsp->in_us += 1;
330      back = DK4_TSP_RES_OK;
331      if ((dk4_c32_t)'\n' == chr) {
332        back = DK4_TSP_RES_FATAL;
333	if (tsp->in_us < tsp->in_sz) {
334	  (tsp->inbuf)[tsp->in_us] = (dk4_c32_t)0UL;
335	  back = (*((tsp->fct).lh))(
336	    tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr)
337	  );
338	} else {
339	  dk4error_set_with_position(
340	    &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
341	    (tsp->pos).bytes, (tsp->pos).lineno,
342	    (tsp->pos).chars, (tsp->pos).charil
343	  );
344	}
345	/*	2015-07-18
346		Bugfix: We must reset the number of used characters to 0
347		after processing and flushing the line buffer.
348	*/
349	tsp->in_us = 0;
350      }
351    } else {					$? "! line buffer full"
352      dk4error_set_with_position(
353        &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
354        (tsp->pos).bytes, (tsp->pos).lineno,
355        (tsp->pos).chars, (tsp->pos).charil
356      );
357    }
358  } else {
359    $? ". direct char processing"
360    if (NULL != (tsp->fct).ch) {
361      back = (*((tsp->fct).ch))(tsp->obj, chr, &(tsp->pos), &(tsp->er_pr));
362    }
363  }
364  if ((dk4_c32_t)('\n') == chr) {
365    (tsp->pos).lineno += (dk4_um_t)1UL;
366    (tsp->pos).charil  = (dk4_um_t)1UL;
367  }
368  $? "- dk4tsp32_process_character %d", back
369  return back;
370}
371
372
373
374/**	Normal processing for one byte.
375	Retrieve a 32 bit character first, either by decoding directly
376	or by adding to a decoder.
377	@param	tsp	Text stream processor.
378	@param	inbyte	Byte to process.
379	@return	Operation result, one from DK4_TSP_RES_OK,
380	DK4_TSP_RES_ERROR or DK4_TSP_RES_FATAL.
381*/
382static
383int
384dk4tsp32_process_byte(
385  dk4_tsp32_t	*tsp,
386  unsigned char	 inbyte
387)
388{
389  dk4_c32_t	 c32	= (dk4_c32_t)0UL;	/* 32 bit character */
390  int		 back	= DK4_TSP_RES_FATAL;	/* Function result */
391  int		 cuc32	= 0;			/* Flag: Have 32 bit char */
392  int		 res	= 0;			/* Operation result */
393#if	DK4_USE_ASSERT
394	assert(NULL != tsp);
395#endif
396  switch (tsp->ief) {
397    case DK4_FILE_ENCODING_PLAIN: {
398      c32 = (dk4_c32_t)inbyte;
399      cuc32 = 1;
400    } break;
401    case DK4_FILE_ENCODING_WIN1252: {
402      if (0 != dk4ansi_decode(&c32, inbyte)) {
403        cuc32 = 1;
404      } else {
405	dk4error_set_with_position(
406	  &(tsp->er_en), DK4_E_DECODING_FAILED,
407	  (tsp->pos).bytes, (tsp->pos).lineno,
408	  (tsp->pos).chars, (tsp->pos).charil
409	);
410      }
411    } break;
412    case DK4_FILE_ENCODING_UTF8: {
413      res = dk4utf8_add(&((tsp->dec).u08), inbyte);
414      switch (res) {
415        case DK4_EDSTM_ERROR: {
416	  dk4error_set_with_position(
417	    &(tsp->er_en), DK4_E_DECODING_FAILED,
418	    (tsp->pos).bytes, (tsp->pos).lineno,
419	    (tsp->pos).chars, (tsp->pos).charil
420	  );
421	} break;
422	case DK4_EDSTM_FINISHED: {
423	  c32 = dk4utf8_get(&((tsp->dec).u08));
424	  cuc32 = 1;
425	  dk4utf8_init(&((tsp->dec).u08));
426	} break;
427	case DK4_EDSTM_ACCEPT: {
428	  back = DK4_TSP_RES_OK;
429	} break;
430      }
431    } break;
432    case DK4_FILE_ENCODING_UTF16_LE: {
433      res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte);
434      switch (res) {
435        case DK4_EDSTM_ERROR: {
436	  dk4error_set_with_position(
437	    &(tsp->er_en), DK4_E_DECODING_FAILED,
438	    (tsp->pos).bytes, (tsp->pos).lineno,
439	    (tsp->pos).chars, (tsp->pos).charil
440	  );
441	} break;
442	case DK4_EDSTM_FINISHED: {
443	  c32 = dk4utf16_byte_get(&((tsp->dec).u16));
444	  cuc32 = 1;
445	  dk4utf16_byte_init(&((tsp->dec).u16), 0);
446	} break;
447	case DK4_EDSTM_ACCEPT: {
448	  back = DK4_TSP_RES_OK;
449	} break;
450      }
451    } break;
452    case DK4_FILE_ENCODING_UTF16_BE: {
453      res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte);
454      switch (res) {
455        case DK4_EDSTM_ERROR: {
456	  dk4error_set_with_position(
457	    &(tsp->er_en), DK4_E_DECODING_FAILED,
458	    (tsp->pos).bytes, (tsp->pos).lineno,
459	    (tsp->pos).chars, (tsp->pos).charil
460	  );
461	} break;
462	case DK4_EDSTM_FINISHED: {
463	  c32 = dk4utf16_byte_get(&((tsp->dec).u16));
464	  cuc32 = 1;
465	  dk4utf16_byte_init(&((tsp->dec).u16), 1);
466	} break;
467	case DK4_EDSTM_ACCEPT: {
468	  back = DK4_TSP_RES_OK;
469	} break;
470      }
471    } break;
472    case DK4_FILE_ENCODING_32_LE: {
473      res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte);
474      switch (res) {
475        case DK4_EDSTM_ERROR: {
476	  dk4error_set_with_position(
477	    &(tsp->er_en), DK4_E_DECODING_FAILED,
478	    (tsp->pos).bytes, (tsp->pos).lineno,
479	    (tsp->pos).chars, (tsp->pos).charil
480	  );
481	} break;
482	case DK4_EDSTM_FINISHED: {
483	  c32 = dk4c32_decoder_get(&((tsp->dec).c32));
484	  cuc32 = 1;
485	  dk4c32_decoder_init(&((tsp->dec).c32), 0);
486	} break;
487	case DK4_EDSTM_ACCEPT: {
488	  back = DK4_TSP_RES_OK;
489	} break;
490      }
491    } break;
492    case DK4_FILE_ENCODING_32_BE: {
493      res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte);
494      switch (res) {
495        case DK4_EDSTM_ERROR: {
496	  dk4error_set_with_position(
497	    &(tsp->er_en), DK4_E_DECODING_FAILED,
498	    (tsp->pos).bytes, (tsp->pos).lineno,
499	    (tsp->pos).chars, (tsp->pos).charil
500	  );
501	} break;
502	case DK4_EDSTM_FINISHED: {
503	  c32 = dk4c32_decoder_get(&((tsp->dec).c32));
504	  cuc32 = 1;
505	  dk4c32_decoder_init(&((tsp->dec).c32), 1);
506	} break;
507	case DK4_EDSTM_ACCEPT: {
508	  back = DK4_TSP_RES_OK;
509	} break;
510      }
511    } break;
512  }
513  if (0 != cuc32) {
514    back = dk4tsp32_process_character(tsp, c32);
515  }
516  if (DK4_TSP_RES_FATAL == back) {
517    tsp->pst = 2;
518  }
519  return back;
520}
521
522
523
524/**	Initialize decoder for found input encoding.
525	@param	tsp	Text stream processor.
526*/
527static
528void
529dk4tsp32_initialize_decoder(dk4_tsp32_t *tsp)
530{
531#if	DK4_USE_ASSERT
532	assert(NULL != tsp);
533#endif
534  switch (tsp->ief) {
535    case DK4_FILE_ENCODING_UTF8: {
536      dk4utf8_init(&((tsp->dec).u08));
537    } break;
538    case DK4_FILE_ENCODING_UTF16_LE: {
539      dk4utf16_byte_init(&((tsp->dec).u16), 0);
540    } break;
541    case DK4_FILE_ENCODING_UTF16_BE: {
542      dk4utf16_byte_init(&((tsp->dec).u16), 1);
543    } break;
544    case DK4_FILE_ENCODING_32_LE: {
545      dk4c32_decoder_init(&((tsp->dec).c32), 0);
546    } break;
547    case DK4_FILE_ENCODING_32_BE: {
548      dk4c32_decoder_init(&((tsp->dec).c32), 1);
549    } break;
550  }
551}
552
553
554
555#if TRACE_DEBUG
556static unsigned long byte_number = 0UL;
557#endif
558
559/**	Add one single byte.
560	@param	tsp	Text stream processor.
561	@param	inbyte	Byte to process.
562	@return	DK4_TSP_RES_OK		if the character was processed
563					successfully,
564		DK4_TSP_RES_ERROR	if there was an error but we can
565					continue,
566		DK4_TSP_RES_FATAL	if there was a fata error so we
567					should abort processing.
568*/
569static
570int
571dk4tsp32_i_add_one_byte(
572  dk4_tsp32_t	*tsp,
573  unsigned char	 inbyte
574)
575{
576  size_t	 nrej;				/* Number of unused bytes */
577  size_t	 i;				/* Current unused byte index */
578  int		 back = DK4_TSP_RES_FATAL;	/* Function result */
579  int		 res;				/* Operation result */
580  unsigned char	 uc;				/* Current unused byte */
581  $? "+ dk4tsp32_i_add_one_byte index=%lu '%!8c' 0x%02x", byte_number++, inbyte, (unsigned)inbyte
582#if	DK4_USE_ASSERT
583	assert(NULL != tsp);
584#endif
585  (tsp->pos).bytes += (dk4_um_t)1UL;
586  switch (tsp->pst) {
587    case 0: {
588      res = dk4bom_detect_add(&(tsp->bomd), inbyte);
589      switch (res) {
590        case DK4_EDSTM_ACCEPT: {
591	  back = DK4_TSP_RES_OK;
592	} break;
593	case DK4_EDSTM_FINISHED: case DK4_EDSTM_FINISHED_WITH_UNUSED: {
594	  back = DK4_TSP_RES_OK;
595	  tsp->ief = dk4bom_detect_get_encoding(&(tsp->bomd));
596	  dk4tsp32_initialize_decoder(tsp);
597	  tsp->pst = 1;
598	  if (DK4_EDSTM_FINISHED_WITH_UNUSED == res) {
599	    nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd));
600	    if (0 < nrej) {
601	      for (i = 0; ((i < nrej) && (DK4_TSP_RES_FATAL != back)); i++) {
602	        uc = dk4bom_detect_unused_byte(&(tsp->bomd), i);
603		switch (dk4tsp32_process_byte(tsp, uc)) {
604		  case DK4_TSP_RES_FATAL: {
605		    back = DK4_TSP_RES_FATAL;
606		  } break;
607		  case DK4_TSP_RES_ERROR: {
608		    if (DK4_TSP_RES_OK == back) {
609		      back = DK4_TSP_RES_ERROR;
610		    }
611		  } break;
612		}
613	      }
614	    }
615	  }
616	} break;
617      }
618    } break;
619    case 1: {
620      back = dk4tsp32_process_byte(tsp, inbyte);
621    } break;
622    /*
623    	Processing stage 2 indicates there was a serious error
624	before, we must skip further processing.
625	This situation is covered by the initialization value
626	DK4_TSP_RES_FATAL, so we do not need a case branch here.
627    */
628  }
629  $? "- dk4tsp32_i_add_one_byte %d", back
630  return back;
631}
632
633
634
635int
636dk4tsp32_setup_char(
637  dk4_tsp32_t		*tsp,
638  void			*obj,
639  dk4_c32_handler_t	*fct,
640  int			 eie,
641  dk4_er_t		*erp
642)
643{
644  int		 back = 0;
645#if	DK4_USE_ASSERT
646	assert(NULL != tsp);
647#endif
648  if (NULL != tsp) {
649    dk4tsp32_init(tsp, eie);
650    if (NULL != fct) {
651      tsp->obj = obj;
652      (tsp->fct).ch = fct;
653      tsp->iee = eie;
654      tsp->ief = eie;
655      tsp->pst = 0;
656      back = 1;
657    } else {
658      dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
659      tsp->pst = 2;
660    }
661  } else {
662    dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
663  }
664  return back;
665}
666
667
668
669int
670dk4tsp32_setup_line(
671  dk4_tsp32_t			*tsp,
672  void				*obj,
673  dk4_c32_line_handler_t	*fct,
674  dk4_c32_t			*inbuf,
675  size_t			 szin,
676  int				 eie,
677  dk4_er_t			*erp
678)
679{
680  int		 back	=	0;
681#if	DK4_USE_ASSERT
682	assert(NULL != tsp);
683	assert(NULL != inbuf);
684	assert(0 < szin);
685#endif
686  if (NULL != tsp) {
687    dk4tsp32_init(tsp, eie);
688    if ((NULL != fct) && (NULL != inbuf) && (0 < szin)) {
689      tsp->obj = obj;
690      (tsp->fct).lh = fct;
691      tsp->inbuf = inbuf;
692      tsp->in_sz = szin;
693      tsp->in_us = 0;
694      tsp->iee = eie;
695      tsp->ief = eie;
696      tsp->pst = 0;
697      back = 1;
698    } else {
699      dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
700      tsp->pst = 2;
701    }
702  } else {
703    dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
704  }
705  return back;
706}
707
708
709
710int
711dk4tsp32_add_one_byte(
712  dk4_tsp32_t	*tsp,
713  unsigned char	 inbyte
714)
715{
716  int		 back = DK4_TSP_RES_FATAL;
717  $? "+ dk4tsp32_add_one_byte"
718#if	DK4_USE_ASSERT
719	assert(NULL != tsp);
720#endif
721  if (NULL != tsp) {
722    if (2 > tsp->pst) {
723      back = dk4tsp32_i_add_one_byte(tsp, inbyte);
724    }
725  }
726  $? "- dk4tsp32_add_one_byte %d", back
727  return back;
728}
729
730
731
732int
733dk4tsp32_add_bytes(
734  dk4_tsp32_t		*tsp,
735  const unsigned char	*buffer,
736  size_t		 sz
737)
738{
739  int		back	=	DK4_TSP_RES_FATAL;	/* Function result */
740  int		res;					/* Processing result */
741  $? "+ dk4tsp32_add_bytes"
742#if	DK4_USE_ASSERT
743	assert(NULL != tsp);
744	assert(NULL != buffer);
745	assert(0 < sz);
746#endif
747  if ((NULL != tsp) && (NULL != buffer) && (0 < sz)) {
748    if (2 > tsp->pst) {
749      back = DK4_TSP_RES_OK;
750      while ((sz--) && (2 > tsp->pst)) {
751        res = dk4tsp32_i_add_one_byte(tsp, *(buffer++));
752	switch (res) {
753	  case DK4_TSP_RES_FATAL: {
754	    back = DK4_TSP_RES_FATAL;
755	  } break;
756	  case DK4_TSP_RES_ERROR: {
757	    if (DK4_TSP_RES_OK == back) {
758	      back = DK4_TSP_RES_ERROR;
759	    }
760	  } break;
761	}
762      }
763    }
764  }
765  $? "- dk4tsp32_add_bytes %d", back
766  return back;
767}
768
769
770
771int
772dk4tsp32_finish(dk4_tsp32_t *tsp)
773{
774  size_t	 nrej =	0;			/* Number of unused bytes */
775  size_t	 i    =	0;			/* Current unused byte index */
776  int		 back = DK4_TSP_RES_FATAL;	/* Function result */
777  int		 res;				/* Operation result */
778  unsigned char	 uc;				/* Current unused byte */
779  $? "+ dk4tsp32_finish"
780#if	DK4_USE_ASSERT
781	assert(NULL != tsp);
782#endif
783  if (NULL != tsp) {
784    /*	Flush all unprocessed output
785	1. If there are unprocessed bytes in the BOM detector as BOM
786	   detection was not completed, process these bytes.
787	2. If there are characters in the line buffer,
788	   finalize the line buffer text and process it.
789    */
790    back = DK4_TSP_RES_OK;
791    /* Retrieve an process bytes stored in BOM detector */
792    if (0 == tsp->pst) {
793      tsp->pst = 1;
794      nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd));
795      if (0 < nrej) {
796        dk4tsp32_initialize_decoder(tsp);
797	for (i = 0; ((i < nrej) && (2 > tsp->pst)); i++) {
798	  uc = dk4bom_detect_unused_byte(&(tsp->bomd), i);
799	  switch (dk4tsp32_process_byte(tsp, uc)) {
800	    case DK4_TSP_RES_FATAL: {
801	      back = DK4_TSP_RES_FATAL;
802	      tsp->pst = 2;
803	    } break;
804	    case DK4_TSP_RES_ERROR: {
805	      if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
806	    } break;
807	  }
808	}
809      }
810    }
811    /* Process final line */
812    if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) {
813      if (0 < tsp->in_us) {
814        if (2 > tsp->pst) {
815	  if (tsp->in_us < tsp->in_sz) {
816	    (tsp->inbuf)[tsp->in_us] = (dk4_c32_t)0UL;
817	    res =
818	    (*((tsp->fct).lh))(
819	      tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr)
820	    );
821	    switch (res) {
822	      case DK4_TSP_RES_FATAL: {
823	        back = DK4_TSP_RES_FATAL;
824		tsp->pst = 2;
825	      } break;
826	      case DK4_TSP_RES_ERROR: {
827	        if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
828	      } break;
829	    }
830	  } else {
831            back = DK4_TSP_RES_FATAL;
832	    tsp->pst = 2;
833            dk4error_set_with_position(
834              &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
835              (tsp->pos).bytes, (tsp->pos).lineno,
836              (tsp->pos).chars, (tsp->pos).charil
837            );
838	  }
839        }
840      }
841    }
842  }
843  $? "- dk4tsp32_finish %d", back
844  return back;
845}
846
847
848
849void
850dk4tsp32_get_errors(dk4_er_t *er_en, dk4_er_t *er_pr, dk4_tsp32_t const *tsp)
851{
852#if	DK4_USE_ASSERT
853	assert(NULL != tsp);
854#endif
855  if (NULL != tsp) {
856    if (NULL != er_en) {
857      DK4_MEMCPY(er_en, &(tsp->er_en), sizeof(dk4_er_t));
858    }
859    if (NULL != er_pr) {
860      DK4_MEMCPY(er_pr, &(tsp->er_pr), sizeof(dk4_er_t));
861    }
862  }
863}
864
865
866
867