1%%	options
2
3copyright owner	=	Dirk Krause
4copyright year	=	2015-xxxx
5SPDX-License-Identifier:	BSD-3-Clause
6
7
8%%	header
9
10/**	@file
11	Text stream processing for 16 bit characters.
12
13	CRT on Windows: Optional.
14*/
15
16#ifndef DK4CONF_H_INCLUDED
17#if DK4_BUILDING_DKTOOLS4
18#include "dk4conf.h"
19#else
20#include <dktools-4/dk4conf.h>
21#endif
22#endif
23
24#ifndef DK4TYPES_H_INCLUDED
25#if DK4_BUILDING_DKTOOLS4
26#include <libdk4base/dk4types.h>
27#else
28#include <dktools-4/dk4types.h>
29#endif
30#endif
31
32#ifndef DK4ERROR_H_INCLUDED
33#if DK4_BUILDING_DKTOOLS4
34#include <libdk4base/dk4error.h>
35#else
36#include <dktools-4/dk4error.h>
37#endif
38#endif
39
40#ifndef DK4BOM_H_INCLUDED
41#if DK4_BUILDING_DKTOOLS4
42#include <libdk4c/dk4bom.h>
43#else
44#include <dktools-4/dk4bom.h>
45#endif
46#endif
47
48#ifndef DK4TSP_H_INCLUDED
49#if DK4_BUILDING_DKTOOLS4
50#include <libdk4c/dk4tsp.h>
51#else
52#include <dktools-4/dk4tsp.h>
53#endif
54#endif
55
56#ifndef DK4UTF8_H_INCLUDED
57#if DK4_BUILDING_DKTOOLS4
58#include <libdk4c/dk4utf8.h>
59#else
60#include <dktools-4/dk4utf8.h>
61#endif
62#endif
63
64#ifndef DK4UTF16_H_INCLUDED
65#if DK4_BUILDING_DKTOOLS4
66#include <libdk4c/dk4utf16.h>
67#else
68#include <dktools-4/dk4utf16.h>
69#endif
70#endif
71
72#ifndef DK4C32_H_INCLUDED
73#if DK4_BUILDING_DKTOOLS4
74#include <libdk4c/dk4c32.h>
75#else
76#include <dktools-4/dk4c32.h>
77#endif
78#endif
79
80/**	Handler function for single characters.
81	@param	obj	Object to modify while processing the character.
82	@param	c	Character to process.
83	@param	pos	Current position in file or data stream.
84	@param	erp	Error report, may be NULL.
85	@return	DK4_TSP_RES_OK		if the character was processed
86					successfully,
87		DK4_TSP_RES_ERROR	if there was an error but we can
88					continue,
89		DK4_TSP_RES_FATAL	if there was a fatal error so we
90					should abort processing.
91*/
92typedef int	dk4_c16_handler_t(
93  void				*obj,
94  dk4_c16_t 			 c,
95  dk4_text_stream_position_t	*pos,
96  dk4_er_t			*erp
97);
98
99/**	Handler function for text lines.
100	@param	obj	Object to modify while processing the character.
101	@param	line	Text line to process.
102	@param	lineno	Current line number.
103	@param	erp	Error report, may be NULL.
104	@return	DK4_TSP_RES_OK		if the character was processed
105					successfully,
106		DK4_TSP_RES_ERROR	if there was an error but we can
107					continue,
108		DK4_TSP_RES_FATAL	if there was a fata error so we
109					should abort processing.
110*/
111typedef int	dk4_c16_line_handler_t(
112  void		*obj,
113  dk4_c16_t	*line,
114  dk4_um_t	 lineno,
115  dk4_er_t	*erp
116);
117
118/**	Structure for 16 bit character processing.
119*/
120typedef struct {
121  union {
122    dk4_utf8_decoder_t		 u08;	/**< UTF-8 decoder. */
123    dk4_utf16_byte_decoder_t	 u16;	/**< UTF-16 decoder. */
124    dk4_c32_byte_decoder_t	 c32;	/**< 32 bit char decoder. */
125  } dec;				/**< Input decoder. */
126  dk4_bom_detector_t		 bomd;	/**< BOM detector. */
127  dk4_text_stream_position_t	 pos;	/**< Current position. */
128  dk4_er_t			 er_en;	/**< Errors in input decoding. */
129  dk4_er_t			 er_pr;	/**< Errors in processing. */
130  union {
131    dk4_c16_line_handler_t	*lh;	/**< Handler function for lines. */
132    dk4_c16_handler_t		*ch;	/**< Handler function for char. */
133  } fct;				/**< Handler function. */
134  dk4_c16_t			*inbuf;	/**< Buffer for input line. */
135  void				*obj;	/**< Object to modify in processing. */
136  size_t			 in_sz;	/**< Size of input line buffer. */
137  size_t			 in_us;	/**< Used bytes in input line buffer. */
138  int				 ief;	/**< Input encoding found. */
139  int				 iee;	/**< Input encoding expected. */
140  int				 pst;	/**< Processing stage. */
141} dk4_tsp16_t;
142
143
144
145#ifdef __cplusplus
146extern "C" {
147#endif
148
149/**	Set up processor for byte by byte processing.
150	@param	tsp	Processor to set up.
151	@param	obj	Object to modify when processing input, may be NULL.
152	@param	fct	Handler function to call for each character.
153	@param	eie	Expected input encoding.
154	@param	erp	Error report, may be NULL.
155	@return	1 on success, 0 on error.
156*/
157int
158dk4tsp16_setup_char(
159  dk4_tsp16_t		*tsp,
160  void			*obj,
161  dk4_c16_handler_t	*fct,
162  int			 eie,
163  dk4_er_t		*erp
164);
165
166/**	Set up processor for line processing.
167	@param	tsp	Processor to set up.
168	@param	obj	Object to modify when processing input, may be NULL.
169	@param	fct	Handler function to invoke for each line.
170	@param	inbuf	Input line buffer.
171	@param	szin	Size of input line buffer (number of dk4_c16_t).
172	@param	eie	Expected input encoding.
173	@param	erp	Error report, may be NULL.
174	@return	1 on success, 0 on error.
175*/
176int
177dk4tsp16_setup_line(
178  dk4_tsp16_t			*tsp,
179  void				*obj,
180  dk4_c16_line_handler_t	*fct,
181  dk4_c16_t			*inbuf,
182  size_t			 szin,
183  int				 eie,
184  dk4_er_t			*erp
185);
186
187/**	Add one single byte.
188	@param	tsp	Text stream processor.
189	@param	inbyte	Byte to process.
190	@return	DK4_TSP_RES_OK		if the character was processed
191					successfully,
192		DK4_TSP_RES_ERROR	if there was an error but we can
193					continue,
194		DK4_TSP_RES_FATAL	if there was a fata error so we
195					should abort processing.
196*/
197int
198dk4tsp16_add_one_byte(
199  dk4_tsp16_t	*tsp,
200  unsigned char	 inbyte
201);
202
203/**	Add multiple bytes.
204	@param	tsp	Text stream processor.
205	@param	buffer	Buffer start address.
206	@param	sz	Number of bytes in buffer.
207	@return	DK4_TSP_RES_OK		if the character was processed
208					successfully,
209		DK4_TSP_RES_ERROR	if there was an error but we can
210					continue,
211		DK4_TSP_RES_FATAL	if there was a fata error so we
212					should abort processing.
213*/
214int
215dk4tsp16_add_bytes(
216  dk4_tsp16_t		*tsp,
217  const unsigned char	*buffer,
218  size_t		 sz
219);
220
221/**	Finish processing.
222	@param	tsp	Text stream processor.
223	@return	DK4_TSP_RES_OK		if processing was finished
224					successfully,
225		DK4_TSP_RES_ERROR	if there was an error,
226		DK4_TSP_RES_FATAL	if there was a fatal error.
227*/
228int
229dk4tsp16_finish(dk4_tsp16_t *tsp);
230
231/**	Retrieve error reports for encoding/decoding and processing.
232	@param	er_en	Destination error report buffer for encoding/decoding.
233	@param	er_pr	Destination error report buffer for processing.
234	@param	tsp	Text stream processor to retrieve errors from.
235*/
236void
237dk4tsp16_get_errors(dk4_er_t *er_en, dk4_er_t *er_pr, dk4_tsp16_t const *tsp);
238
239#ifdef __cplusplus
240}
241#endif
242
243
244
245%%	module
246
247#include "dk4conf.h"
248#include <libdk4c/dk4tsp16.h>
249#include <libdk4c/dk4enc.h>
250#include <libdk4base/dk4mem.h>
251#include <libdk4c/dk4ansi.h>
252#include <libdk4c/dk4utf8.h>
253#include <libdk4c/dk4utf16.h>
254#include <libdk4c/dk4c32.h>
255
256#if DK4_HAVE_ASSERT_H
257#ifndef	ASSERT_H_INCLUDED
258#include <assert.h>
259#define	ASSERT_H_INCLUDED 1
260#endif
261#endif
262
263
264$!trace-include
265
266
267
268/**	Initialize text stream processing structure.
269	@param	tsp	Text stream processor.
270	@param	eie	Expected input encoding.
271*/
272static
273void
274dk4tsp16_init(dk4_tsp16_t *tsp, int eie)
275{
276#if	DK4_USE_ASSERT
277	assert(NULL != tsp);
278#endif
279  DK4_MEMRES(tsp, sizeof(dk4_tsp16_t));
280  dk4bom_detect_init(&(tsp->bomd), eie);
281  dk4error_init(&(tsp->er_en));
282  dk4error_init(&(tsp->er_pr));
283  tsp->inbuf = NULL;
284  tsp->obj = NULL;
285  tsp->in_sz = 0;
286  tsp->in_us = 0;
287  tsp->ief = eie;
288  tsp->iee = eie;
289  tsp->pst = 0;
290  (tsp->pos).bytes  = (dk4_um_t)0UL;
291  (tsp->pos).chars  = (dk4_um_t)1UL;
292  (tsp->pos).lineno = (dk4_um_t)1UL;
293  (tsp->pos).charil = (dk4_um_t)1UL;
294}
295
296
297
298/**	Initialize decoder for found input encoding.
299	@param	tsp	Text stream processor.
300*/
301static
302void
303dk4tsp16_initialize_decoder(dk4_tsp16_t *tsp)
304{
305#if	DK4_USE_ASSERT
306	assert(NULL != tsp);
307#endif
308  switch (tsp->ief) {
309    case DK4_FILE_ENCODING_UTF8: {
310      dk4utf8_init(&((tsp->dec).u08));
311    } break;
312    case DK4_FILE_ENCODING_UTF16_LE: {
313      dk4utf16_byte_init(&((tsp->dec).u16), 0);
314    } break;
315    case DK4_FILE_ENCODING_UTF16_BE: {
316      dk4utf16_byte_init(&((tsp->dec).u16), 1);
317    } break;
318    case DK4_FILE_ENCODING_32_LE: {
319      dk4c32_decoder_init(&((tsp->dec).c32), 0);
320    } break;
321    case DK4_FILE_ENCODING_32_BE: {
322      dk4c32_decoder_init(&((tsp->dec).c32), 1);
323    } break;
324  }
325}
326
327
328
329#if 0
330static
331int
332dk4tsp16_process_character(
333  dk4_tsp16_t	*tsp,
334  dk4_c16_t	 chr
335)
336{
337  int		 back	=	DK4_TSP_RES_FATAL;
338  $? "+ dk4tsp16_process_character %04x", (unsigned)chr
339  /* Increase position */
340  (tsp->pos).chars  += (dk4_um_t)1UL;
341  (tsp->pos).charil += (dk4_um_t)1UL;
342  /* Check for line buffering or direct processing */
343  if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) {
344    $? ". line buffering"
345    if (tsp->in_us < tsp->in_sz) {
346      (tsp->inbuf)[tsp->in_us] = chr;
347      tsp->in_us += 1;
348      back = DK4_TSP_RES_OK;
349      if ((dk4_c16_t)'\n' == chr) {
350        back = DK4_TSP_RES_FATAL;
351        if (tsp->in_us < tsp->in_sz) {
352	  (tsp->inbuf)[tsp->in_us] = (dk4_c16_t)'\0';
353	  back = (*((tsp->fct).lh))(
354	    tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr)
355	  );
356	} else {
357	  dk4error_set_with_position(
358	    &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
359	    (tsp->pos).bytes, (tsp->pos).lineno,
360	    (tsp->pos).chars, (tsp->pos).charil
361	  );
362	}
363	tsp->in_us = 0;
364      }
365    } else {
366      dk4error_set_with_position(
367        &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
368        (tsp->pos).bytes, (tsp->pos).lineno,
369        (tsp->pos).chars, (tsp->pos).charil
370      );
371    }
372  } else {
373    $? ". direct processing"
374    if (NULL != (tsp->fct).ch) {
375      back = (*((tsp->fct).ch))(tsp->obj, chr, &(tsp->pos), &(tsp->er_pr));
376    }
377  }
378  /* Increase line number for newline characters */
379  if ((dk4_c16_t)('\n') == chr) {
380    (tsp->pos).lineno += (dk4_um_t)1UL;
381    (tsp->pos).charil  = (dk4_um_t)1UL;
382  } $? "- dk4tsp16_process_character %d", back
383  return back;
384}
385#endif
386
387
388
389/**	Process a group of 16 bit characters, the group represents
390	one 32 bit character.
391	@param	tsp	Text stream processor.
392	@param	buf	Buffer of 16 bit characters.
393	@param	sz	Number of 16 bit characters.
394	@return	DK4_TSP_RES_OK		if the characters were processed
395					successfully,
396		DK4_TSP_RES_ERROR	if there was an error but we can
397					continue,
398		DK4_TSP_RES_FATAL	if there was a fatal error so
399					we should abort processing.
400*/
401static
402int
403dk4tsp16_process_group(dk4_tsp16_t *tsp, dk4_c16_t *buf, size_t sz)
404{
405  size_t	 i	= 	0;
406  int		 back	=	DK4_TSP_RES_FATAL;
407  int		 res	=	DK4_TSP_RES_FATAL;
408  $? "+ dk4tsp16_process_group"
409#if	DK4_USE_ASSERT
410	assert(NULL != tsp);
411	assert(NULL != buf);
412	assert(0 < sz);
413#endif
414  /* Increase position */
415  (tsp->pos).chars  += (dk4_um_t)1UL;
416  (tsp->pos).charil += (dk4_um_t)1UL;
417  /* Check for line buffering or direct processing */
418  if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) {
419    $? ". line buffering"
420    if (sz < tsp->in_sz) {
421      if (tsp->in_us < (tsp->in_sz - sz)) {
422        /* Append group to line and increase used size */
423        DK4_MEMCPY(&((tsp->inbuf)[tsp->in_us]),buf,(sizeof(dk4_c16_t)*sz));
424	tsp->in_us += sz;
425	back = DK4_TSP_RES_OK;
426	/* On newline, process the line buffer */
427	if ((1 == sz) && ((dk4_c16_t)'\n' == buf[0])) {
428	  back = DK4_TSP_RES_FATAL;
429	  if (tsp->in_us < tsp->in_sz) {
430	    (tsp->inbuf)[tsp->in_us] = (dk4_c16_t)'\0';
431	    back = (*((tsp->fct).lh))(
432	      tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr)
433	    );
434	  } else {
435	    /* ERROR: Buffer too small */
436	    dk4error_set_with_position(
437	      &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
438	      (tsp->pos).bytes, (tsp->pos).lineno,
439	      (tsp->pos).chars, (tsp->pos).charil
440	    );
441	  }
442	  tsp->in_us = 0;
443	}
444      } else {
445        /* ERROR: Buffer too small */
446	back = DK4_TSP_RES_FATAL;
447	dk4error_set_with_position(
448	  &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
449	  (tsp->pos).bytes, (tsp->pos).lineno,
450	  (tsp->pos).chars, (tsp->pos).charil
451	);
452      }
453    } else {
454      /* ERROR: Buffer too small */
455      back = DK4_TSP_RES_FATAL;
456      dk4error_set_with_position(
457        &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
458        (tsp->pos).bytes, (tsp->pos).lineno,
459        (tsp->pos).chars, (tsp->pos).charil
460      );
461    }
462  } else {
463    $? ". direct char processing"
464    if (NULL != (tsp->fct).ch) {
465      back = DK4_TSP_RES_OK;
466      for (i = 0; i < sz; i++) {
467        res = (*((tsp->fct).ch))(tsp->obj, buf[i], &(tsp->pos), &(tsp->er_pr));
468	switch (res) {
469	  case DK4_TSP_RES_FATAL: {
470	    back = DK4_TSP_RES_FATAL;
471	  } break;
472	  case DK4_TSP_RES_ERROR: {
473	    if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
474	  } break;
475	}
476      }
477    } else {
478      dk4error_set_with_position(
479        &(tsp->er_en), DK4_E_INVALID_ARGUMENTS,
480        (tsp->pos).bytes, (tsp->pos).lineno,
481        (tsp->pos).chars, (tsp->pos).charil
482      );
483    }
484  }
485  /* If we have a newline, increase line number and reset char in line */
486  if (1 == sz) {
487    if ((dk4_c16_t)('\n') == buf[0]) {
488      (tsp->pos).lineno += (dk4_um_t)1UL;
489      (tsp->pos).charil  = (dk4_um_t)1UL;
490    }
491  }
492  $? "- dk4tsp16_process_group %d", back
493  return back;
494}
495
496
497
498/**	Normal processing for one byte.
499	Retrieve a 32 bit character first, either by decoding directly
500	or by adding to a decoder. In the next step encode the 32 bit
501	character in one or two 16 bit characters and process these.
502	@param	tsp	Text stream processor.
503	@param	inbyte	Byte to process.
504	@return	Operation result, one from DK4_TSP_RES_OK,
505	DK4_TSP_RES_ERROR or DK4_TSP_RES_FATAL.
506*/
507static
508int
509dk4tsp16_process_byte(dk4_tsp16_t *tsp, unsigned char inbyte)
510{
511  dk4_c16_t	 buf[8];
512  dk4_c32_t	 c32	=	dkC32(0);
513  size_t	 sz;
514  int		 back	=	DK4_TSP_RES_FATAL;
515  int		 cuc32	=	0;
516  int		 res	=	0;
517  /* Add byte to decoder, attempt to retrieve a 32 bit character */
518#if	DK4_USE_ASSERT
519	assert(NULL != tsp);
520#endif
521  switch (tsp->ief) {
522    case DK4_FILE_ENCODING_PLAIN: {
523      c32 = (dk4_c32_t)inbyte;
524      cuc32 = 1;
525    } break;
526    case DK4_FILE_ENCODING_WIN1252: {
527      if (0 != dk4ansi_decode(&c32, inbyte)) {
528        cuc32 = 1;
529      } else {
530	dk4error_set_with_position(
531	  &(tsp->er_en), DK4_E_DECODING_FAILED,
532	  (tsp->pos).bytes, (tsp->pos).lineno,
533	  (tsp->pos).chars, (tsp->pos).charil
534	);
535      }
536    } break;
537    case DK4_FILE_ENCODING_UTF8: {
538      res = dk4utf8_add(&((tsp->dec).u08), inbyte);
539      switch (res) {
540        case DK4_EDSTM_ERROR: {
541	  dk4error_set_with_position(
542	    &(tsp->er_en), DK4_E_DECODING_FAILED,
543	    (tsp->pos).bytes, (tsp->pos).lineno,
544	    (tsp->pos).chars, (tsp->pos).charil
545	  );
546	} break;
547	case DK4_EDSTM_FINISHED: {
548	  c32 = dk4utf8_get(&((tsp->dec).u08));
549	  cuc32 = 1;
550	  dk4utf8_init(&((tsp->dec).u08));
551	} break;
552	case DK4_EDSTM_ACCEPT: {
553	  back = DK4_TSP_RES_OK;
554	} break;
555      }
556    } break;
557    case DK4_FILE_ENCODING_UTF16_LE: {
558      res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte);
559      switch (res) {
560        case DK4_EDSTM_ERROR: {
561	  dk4error_set_with_position(
562	    &(tsp->er_en), DK4_E_DECODING_FAILED,
563	    (tsp->pos).bytes, (tsp->pos).lineno,
564	    (tsp->pos).chars, (tsp->pos).charil
565	  );
566	} break;
567	case DK4_EDSTM_FINISHED: {
568	  c32 = dk4utf16_byte_get(&((tsp->dec).u16));
569	  cuc32 = 1;
570	  dk4utf16_byte_init(&((tsp->dec).u16), 0);
571	} break;
572	case DK4_EDSTM_ACCEPT: {
573	  back = DK4_TSP_RES_OK;
574	} break;
575      }
576    } break;
577    case DK4_FILE_ENCODING_UTF16_BE: {
578      res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte);
579      switch (res) {
580        case DK4_EDSTM_ERROR: {
581	  dk4error_set_with_position(
582	    &(tsp->er_en), DK4_E_DECODING_FAILED,
583	    (tsp->pos).bytes, (tsp->pos).lineno,
584	    (tsp->pos).chars, (tsp->pos).charil
585	  );
586	} break;
587	case DK4_EDSTM_FINISHED: {
588	  c32 = dk4utf16_byte_get(&((tsp->dec).u16));
589	  cuc32 = 1;
590	  dk4utf16_byte_init(&((tsp->dec).u16), 1);
591	} break;
592	case DK4_EDSTM_ACCEPT: {
593	  back = DK4_TSP_RES_OK;
594	} break;
595      }
596    } break;
597    case DK4_FILE_ENCODING_32_LE: {
598      res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte);
599      switch (res) {
600        case DK4_EDSTM_ERROR: {
601	  dk4error_set_with_position(
602	    &(tsp->er_en), DK4_E_DECODING_FAILED,
603	    (tsp->pos).bytes, (tsp->pos).lineno,
604	    (tsp->pos).chars, (tsp->pos).charil
605	  );
606	} break;
607	case DK4_EDSTM_FINISHED: {
608	  c32 = dk4c32_decoder_get(&((tsp->dec).c32));
609	  cuc32 = 1;
610	  dk4c32_decoder_init(&((tsp->dec).c32), 0);
611	} break;
612	case DK4_EDSTM_ACCEPT: {
613	  back = DK4_TSP_RES_OK;
614	} break;
615      }
616    } break;
617    case DK4_FILE_ENCODING_32_BE: {
618      res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte);
619      switch (res) {
620        case DK4_EDSTM_ERROR: {
621	  dk4error_set_with_position(
622	    &(tsp->er_en), DK4_E_DECODING_FAILED,
623	    (tsp->pos).bytes, (tsp->pos).lineno,
624	    (tsp->pos).chars, (tsp->pos).charil
625	  );
626	} break;
627	case DK4_EDSTM_FINISHED: {
628	  c32 = dk4c32_decoder_get(&((tsp->dec).c32));
629	  cuc32 = 1;
630	  dk4c32_decoder_init(&((tsp->dec).c32), 1);
631	} break;
632	case DK4_EDSTM_ACCEPT: {
633	  back = DK4_TSP_RES_OK;
634	} break;
635      }
636    } break;
637  }
638  /* Process 32 bit character if we have a 32 bit char to process */
639  if (0 != cuc32) {
640    sz = DK4_SIZEOF(buf,dk4_c16_t);
641    if (0 != dk4utf16_encode(buf, &sz, c32, NULL)) {
642#if 0
643      back = DK4_TSP_RES_OK;
644      for (i = 0; ((i < sz) && (DK4_TSP_RES_FATAL != back)); i++) {
645        switch (dk4tsp16_process_character(tsp, buf[i])) {
646	  case DK4_TSP_RES_FATAL: {
647	    back = DK4_TSP_RES_FATAL;
648	  } break;
649	  case DK4_TSP_RES_ERROR: {
650	    if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
651	  } break;
652	}
653      }
654#else
655      back = dk4tsp16_process_group(tsp, buf, sz);
656#endif
657    } else {
658      dk4error_set_with_position(
659        &(tsp->er_en), DK4_E_ENCODING_FAILED,
660        (tsp->pos).bytes, (tsp->pos).lineno,
661        (tsp->pos).chars, (tsp->pos).charil
662      );
663    }
664  }
665  if (DK4_TSP_RES_FATAL == back) {
666    tsp->pst = 2;
667  }
668  return back;
669}
670
671
672
673/**	Add one byte to internal data structures
674	(BOM detection and/or normal processing).
675	@param	tsp	Text stream processor.
676	@param	inbyte	Byte to add.
677	@return	Operation result, one from DK4_TSP_RES_OK,
678	DK4_TSP_RES_ERROR or DK4_TSP_RES_FATAL.
679*/
680static
681int
682dk4tsp16_i_add_one_byte(
683  dk4_tsp16_t	*tsp,
684  unsigned char	 inbyte
685)
686{
687  size_t	 nrej;
688  size_t	 i;
689  int		 back	=	DK4_TSP_RES_FATAL;
690  int		 res;
691  unsigned char	 uc;
692  $? "+ dk4tsp16_i_add_one_byte"
693#if	DK4_USE_ASSERT
694	assert(NULL != tsp);
695#endif
696  (tsp->pos).bytes += (dk4_um_t)1UL;
697  switch (tsp->pst) {
698    case 0: {			$? ". bom detection"
699      res = dk4bom_detect_add(&(tsp->bomd), inbyte);
700      switch (res) {
701        case DK4_EDSTM_ACCEPT: {
702	  back = DK4_TSP_RES_OK;
703	} break;
704	case DK4_EDSTM_FINISHED: case DK4_EDSTM_FINISHED_WITH_UNUSED: {
705	  /* Result is acceptable */
706	  back = DK4_TSP_RES_OK;
707	  /* Retrieve found encoding */
708	  tsp->ief = dk4bom_detect_get_encoding(&(tsp->bomd));
709	  /* If necessary, initialize decoder */
710	  dk4tsp16_initialize_decoder(tsp);
711	  /* Switch to normal processing */
712	  tsp->pst = 1;
713	  /* Process bytes stored in BOM detector */
714	  if (DK4_EDSTM_FINISHED_WITH_UNUSED == res) {
715	    nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd));
716	    if (0 < nrej) {
717	      for (i = 0; ((i < nrej) && (DK4_TSP_RES_FATAL != back)); i++) {
718	        uc = dk4bom_detect_unused_byte(&(tsp->bomd), i);
719		switch (dk4tsp16_process_byte(tsp, uc)) {
720		  case DK4_TSP_RES_FATAL: {
721		    back = DK4_TSP_RES_FATAL;
722		  } break;
723		  case DK4_TSP_RES_ERROR: {
724		    if (DK4_TSP_RES_OK == back) {
725		      back = DK4_TSP_RES_ERROR;
726		    }
727		  } break;
728		}
729	      }
730	    }
731	  }
732	} break;
733      }
734    } break;
735    case 1: {			$? ". normal processing"
736      back = dk4tsp16_process_byte(tsp, inbyte);
737    } break;
738    /*
739    	Processing stage 2 indicates there was a serious error
740	before, we must skip further processing.
741	This situation is covered by the initialization value
742	DK4_TSP_RES_FATAL, so we do not need a case branch here.
743    */
744  }
745  $? "- dk4tsp16_i_add_one_byte %d", back
746  return back;
747}
748
749
750
751int
752dk4tsp16_setup_char(
753  dk4_tsp16_t		*tsp,
754  void			*obj,
755  dk4_c16_handler_t	*fct,
756  int			 eie,
757  dk4_er_t		*erp
758)
759{
760  int		 back	=	0;
761#if	DK4_USE_ASSERT
762	assert(NULL != tsp);
763#endif
764  if (NULL != tsp) {
765    dk4tsp16_init(tsp, eie);
766    if (NULL != fct) {
767      tsp->obj = obj;
768      (tsp->fct).ch = fct;
769      tsp->iee = eie;
770      tsp->ief = eie;
771      tsp->pst = 0;
772      back = 1;
773    } else {
774      dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
775      tsp->pst = 2;
776    }
777  } else {
778    dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
779  }
780  return back;
781}
782
783
784
785int
786dk4tsp16_setup_line(
787  dk4_tsp16_t			*tsp,
788  void				*obj,
789  dk4_c16_line_handler_t	*fct,
790  dk4_c16_t			*inbuf,
791  size_t			 szin,
792  int				 eie,
793  dk4_er_t			*erp
794)
795{
796  int		 back	=	0;
797#if	DK4_USE_ASSERT
798	assert(NULL != tsp);
799	assert(NULL != inbuf);
800	assert(0 < szin);
801#endif
802  if (NULL != tsp) {
803    dk4tsp16_init(tsp, eie);
804    if ((NULL != fct) && (NULL != inbuf) && (0 < szin)) {
805      tsp->obj = obj;
806      (tsp->fct).lh = fct;
807      tsp->inbuf = inbuf;
808      tsp->in_sz = szin;
809      tsp->in_us = 0;
810      tsp->iee = eie;
811      tsp->ief = eie;
812      tsp->pst = 0;
813      back = 1;
814    } else {
815      dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
816      tsp->pst = 2;
817    }
818  } else {
819    dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
820  }
821  return back;
822}
823
824
825
826int
827dk4tsp16_add_one_byte(
828  dk4_tsp16_t	*tsp,
829  unsigned char	 inbyte
830)
831{
832  int		 back	=	DK4_TSP_RES_FATAL;
833#if	DK4_USE_ASSERT
834	assert(NULL != tsp);
835#endif
836  if (NULL != tsp) {
837    if (2 > tsp->pst) {
838      back = dk4tsp16_i_add_one_byte(tsp, inbyte);
839    }
840  }
841  return back;
842}
843
844
845
846int
847dk4tsp16_add_bytes(
848  dk4_tsp16_t		*tsp,
849  const unsigned char	*buffer,
850  size_t		 sz
851)
852{
853  int		back	=	DK4_TSP_RES_FATAL;	/* Function result */
854  int		res;					/* Processing result */
855#if	DK4_USE_ASSERT
856	assert(NULL != tsp);
857	assert(NULL != buffer);
858	assert(0 < sz);
859#endif
860  if ((NULL != tsp) && (NULL != buffer) && (0 < sz)) {
861    if (2 > tsp->pst) {
862      back = DK4_TSP_RES_OK;
863      while ((sz--) && (2 > tsp->pst)) {
864        res = dk4tsp16_i_add_one_byte(tsp, *(buffer++));
865	switch (res) {
866	  case DK4_TSP_RES_FATAL: {
867	    back = DK4_TSP_RES_FATAL;
868	  } break;
869	  case DK4_TSP_RES_ERROR: {
870	    if (DK4_TSP_RES_OK == back) {
871	      back = DK4_TSP_RES_ERROR;
872	    }
873	  } break;
874	}
875      }
876    }
877  }
878  return back;
879}
880
881
882
883int
884dk4tsp16_finish(dk4_tsp16_t *tsp)
885{
886  size_t	 nrej =	0;			/* Number of unused bytes */
887  size_t	 i    =	0;			/* Current unused byte index */
888  int		 back =	DK4_TSP_RES_FATAL;	/* Function result */
889  int		 res;				/* Operation result */
890  unsigned char	 uc;				/* Current unused byte */
891#if	DK4_USE_ASSERT
892	assert(NULL != tsp);
893#endif
894  if (NULL != tsp) {
895    /*	Flush all unprocessed output
896	1. If there are unprocessed bytes in the BOM detector as BOM
897	   detection was not completed, process these bytes.
898	2. If there are characters in the line buffer,
899	   finalize the line buffer text and process it.
900    */
901    back = DK4_TSP_RES_OK;
902    /* Retrieve an process bytes stored in BOM detector */
903    if (0 == tsp->pst) {
904      tsp->pst = 1;
905      nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd));
906      if (0 < nrej) {
907        dk4tsp16_initialize_decoder(tsp);
908	for (i = 0; ((i < nrej) && (2 > tsp->pst)); i++) {
909	  uc = dk4bom_detect_unused_byte(&(tsp->bomd), i);
910	  switch (dk4tsp16_process_byte(tsp, uc)) {
911	    case DK4_TSP_RES_FATAL: {
912	      back = DK4_TSP_RES_FATAL;
913	      tsp->pst = 2;
914	    } break;
915	    case DK4_TSP_RES_ERROR: {
916	      if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
917	    } break;
918	  }
919	}
920      }
921    }
922    /* Process final line */
923    if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) {
924      if (0 < tsp->in_us) {
925        if (2 > tsp->pst) {
926	  if (tsp->in_us < tsp->in_sz) {
927	    (tsp->inbuf)[tsp->in_us] = (dk4_c16_t)'\0';
928	    res =
929	    (*((tsp->fct).lh))(
930	      tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr)
931	    );
932	    switch (res) {
933	      case DK4_TSP_RES_FATAL: {
934	        back = DK4_TSP_RES_FATAL;
935		tsp->pst = 2;
936	      } break;
937	      case DK4_TSP_RES_ERROR: {
938	        if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
939	      } break;
940	    }
941	  } else {
942            back = DK4_TSP_RES_FATAL;
943	    tsp->pst = 2;
944            dk4error_set_with_position(
945              &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
946              (tsp->pos).bytes, (tsp->pos).lineno,
947              (tsp->pos).chars, (tsp->pos).charil
948            );
949	  }
950        }
951      }
952    }
953  }
954  return back;
955}
956
957
958
959void
960dk4tsp16_get_errors(dk4_er_t *er_en, dk4_er_t *er_pr, dk4_tsp16_t const *tsp)
961{
962#if	DK4_USE_ASSERT
963	assert(NULL != tsp);
964#endif
965  if (NULL != tsp) {
966    if (NULL != er_en) {
967      DK4_MEMCPY(er_en, &(tsp->er_en), sizeof(dk4_er_t));
968    }
969    if (NULL != er_pr) {
970      DK4_MEMCPY(er_pr, &(tsp->er_pr), sizeof(dk4_er_t));
971    }
972  }
973}
974
975
976