1 /*
2 Copyright (C) 2015-2021, Dirk Krause
3 SPDX-License-Identifier: BSD-3-Clause
4 */
5
6 /*
7 WARNING: This file was generated by the dkct program (see
8 http://dktools.sourceforge.net/ for details).
9 Changes you make here will be lost if dkct is run again!
10 You should modify the original source and run dkct on it.
11 Original source: dk4tsp32.ctr
12 */
13
14 /** @file dk4tsp32.c The dk4tsp32 module.
15 */
16
17
18 #include "dk4conf.h"
19 #include <libdk4c/dk4tsp32.h>
20 #include <libdk4c/dk4enc.h>
21 #include <libdk4base/dk4mem.h>
22 #include <libdk4c/dk4ansi.h>
23 #include <libdk4c/dk4utf8.h>
24 #include <libdk4c/dk4utf16.h>
25 #include <libdk4c/dk4c32.h>
26
27 #if DK4_HAVE_ASSERT_H
28 #ifndef ASSERT_H_INCLUDED
29 #include <assert.h>
30 #define ASSERT_H_INCLUDED 1
31 #endif
32 #endif
33
34
35
36
37
38
39 /** Initialize text stream processing structure.
40 @param tsp Text stream processor.
41 @param eie Expected input encoding.
42 */
43 static
44 void
dk4tsp32_init(dk4_tsp32_t * tsp,int eie)45 dk4tsp32_init(dk4_tsp32_t *tsp, int eie)
46 {
47 #if DK4_USE_ASSERT
48 assert(NULL != tsp);
49 #endif
50 DK4_MEMRES(tsp, sizeof(dk4_tsp32_t));
51 dk4bom_detect_init(&(tsp->bomd), eie);
52 dk4error_init(&(tsp->er_en));
53 dk4error_init(&(tsp->er_pr));
54 tsp->inbuf = NULL;
55 tsp->obj = NULL;
56 tsp->in_sz = 0;
57 tsp->in_us = 0;
58 tsp->ief = eie;
59 tsp->iee = eie;
60 tsp->pst = 0;
61 (tsp->pos).bytes = (dk4_um_t)0UL;
62 (tsp->pos).chars = (dk4_um_t)1UL;
63 (tsp->pos).lineno = (dk4_um_t)1UL;
64 (tsp->pos).charil = (dk4_um_t)1UL;
65 }
66
67
68
69 /** Process a 32 bit character.
70 @param tsp Text stream processor.
71 @param chr Character to process.
72 @return DK4_TSP_RES_OK if the characters were processed
73 successfully,
74 DK4_TSP_RES_ERROR if there was an error but we can
75 continue,
76 DK4_TSP_RES_FATAL if there was a fatal error so
77 we should abort processing.
78 */
79 static
80 int
dk4tsp32_process_character(dk4_tsp32_t * tsp,dk4_c32_t chr)81 dk4tsp32_process_character(
82 dk4_tsp32_t *tsp,
83 dk4_c32_t chr
84 )
85 {
86 int back = DK4_TSP_RES_FATAL;
87
88 #if DK4_USE_ASSERT
89 assert(NULL != tsp);
90 #endif
91 /* Increase position */
92 (tsp->pos).chars += (dk4_um_t)1UL;
93 (tsp->pos).charil += (dk4_um_t)1UL;
94 /* Check for line buffering or direct processing */
95 if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) {
96
97 if (tsp->in_us < tsp->in_sz) {
98 (tsp->inbuf)[tsp->in_us] = chr;
99 tsp->in_us += 1;
100 back = DK4_TSP_RES_OK;
101 if ((dk4_c32_t)'\n' == chr) {
102 back = DK4_TSP_RES_FATAL;
103 if (tsp->in_us < tsp->in_sz) {
104 (tsp->inbuf)[tsp->in_us] = (dk4_c32_t)0UL;
105 back = (*((tsp->fct).lh))(
106 tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr)
107 );
108 } else {
109 dk4error_set_with_position(
110 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
111 (tsp->pos).bytes, (tsp->pos).lineno,
112 (tsp->pos).chars, (tsp->pos).charil
113 );
114 }
115 /* 2015-07-18
116 Bugfix: We must reset the number of used characters to 0
117 after processing and flushing the line buffer.
118 */
119 tsp->in_us = 0;
120 }
121 } else {
122 dk4error_set_with_position(
123 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
124 (tsp->pos).bytes, (tsp->pos).lineno,
125 (tsp->pos).chars, (tsp->pos).charil
126 );
127 }
128 } else {
129
130 if (NULL != (tsp->fct).ch) {
131 back = (*((tsp->fct).ch))(tsp->obj, chr, &(tsp->pos), &(tsp->er_pr));
132 }
133 }
134 if ((dk4_c32_t)('\n') == chr) {
135 (tsp->pos).lineno += (dk4_um_t)1UL;
136 (tsp->pos).charil = (dk4_um_t)1UL;
137 }
138
139 return back;
140 }
141
142
143
144 /** Normal processing for one byte.
145 Retrieve a 32 bit character first, either by decoding directly
146 or by adding to a decoder.
147 @param tsp Text stream processor.
148 @param inbyte Byte to process.
149 @return Operation result, one from DK4_TSP_RES_OK,
150 DK4_TSP_RES_ERROR or DK4_TSP_RES_FATAL.
151 */
152 static
153 int
dk4tsp32_process_byte(dk4_tsp32_t * tsp,unsigned char inbyte)154 dk4tsp32_process_byte(
155 dk4_tsp32_t *tsp,
156 unsigned char inbyte
157 )
158 {
159 dk4_c32_t c32 = (dk4_c32_t)0UL; /* 32 bit character */
160 int back = DK4_TSP_RES_FATAL; /* Function result */
161 int cuc32 = 0; /* Flag: Have 32 bit char */
162 int res = 0; /* Operation result */
163 #if DK4_USE_ASSERT
164 assert(NULL != tsp);
165 #endif
166 switch (tsp->ief) {
167 case DK4_FILE_ENCODING_PLAIN: {
168 c32 = (dk4_c32_t)inbyte;
169 cuc32 = 1;
170 } break;
171 case DK4_FILE_ENCODING_WIN1252: {
172 if (0 != dk4ansi_decode(&c32, inbyte)) {
173 cuc32 = 1;
174 } else {
175 dk4error_set_with_position(
176 &(tsp->er_en), DK4_E_DECODING_FAILED,
177 (tsp->pos).bytes, (tsp->pos).lineno,
178 (tsp->pos).chars, (tsp->pos).charil
179 );
180 }
181 } break;
182 case DK4_FILE_ENCODING_UTF8: {
183 res = dk4utf8_add(&((tsp->dec).u08), inbyte);
184 switch (res) {
185 case DK4_EDSTM_ERROR: {
186 dk4error_set_with_position(
187 &(tsp->er_en), DK4_E_DECODING_FAILED,
188 (tsp->pos).bytes, (tsp->pos).lineno,
189 (tsp->pos).chars, (tsp->pos).charil
190 );
191 } break;
192 case DK4_EDSTM_FINISHED: {
193 c32 = dk4utf8_get(&((tsp->dec).u08));
194 cuc32 = 1;
195 dk4utf8_init(&((tsp->dec).u08));
196 } break;
197 case DK4_EDSTM_ACCEPT: {
198 back = DK4_TSP_RES_OK;
199 } break;
200 }
201 } break;
202 case DK4_FILE_ENCODING_UTF16_LE: {
203 res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte);
204 switch (res) {
205 case DK4_EDSTM_ERROR: {
206 dk4error_set_with_position(
207 &(tsp->er_en), DK4_E_DECODING_FAILED,
208 (tsp->pos).bytes, (tsp->pos).lineno,
209 (tsp->pos).chars, (tsp->pos).charil
210 );
211 } break;
212 case DK4_EDSTM_FINISHED: {
213 c32 = dk4utf16_byte_get(&((tsp->dec).u16));
214 cuc32 = 1;
215 dk4utf16_byte_init(&((tsp->dec).u16), 0);
216 } break;
217 case DK4_EDSTM_ACCEPT: {
218 back = DK4_TSP_RES_OK;
219 } break;
220 }
221 } break;
222 case DK4_FILE_ENCODING_UTF16_BE: {
223 res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte);
224 switch (res) {
225 case DK4_EDSTM_ERROR: {
226 dk4error_set_with_position(
227 &(tsp->er_en), DK4_E_DECODING_FAILED,
228 (tsp->pos).bytes, (tsp->pos).lineno,
229 (tsp->pos).chars, (tsp->pos).charil
230 );
231 } break;
232 case DK4_EDSTM_FINISHED: {
233 c32 = dk4utf16_byte_get(&((tsp->dec).u16));
234 cuc32 = 1;
235 dk4utf16_byte_init(&((tsp->dec).u16), 1);
236 } break;
237 case DK4_EDSTM_ACCEPT: {
238 back = DK4_TSP_RES_OK;
239 } break;
240 }
241 } break;
242 case DK4_FILE_ENCODING_32_LE: {
243 res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte);
244 switch (res) {
245 case DK4_EDSTM_ERROR: {
246 dk4error_set_with_position(
247 &(tsp->er_en), DK4_E_DECODING_FAILED,
248 (tsp->pos).bytes, (tsp->pos).lineno,
249 (tsp->pos).chars, (tsp->pos).charil
250 );
251 } break;
252 case DK4_EDSTM_FINISHED: {
253 c32 = dk4c32_decoder_get(&((tsp->dec).c32));
254 cuc32 = 1;
255 dk4c32_decoder_init(&((tsp->dec).c32), 0);
256 } break;
257 case DK4_EDSTM_ACCEPT: {
258 back = DK4_TSP_RES_OK;
259 } break;
260 }
261 } break;
262 case DK4_FILE_ENCODING_32_BE: {
263 res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte);
264 switch (res) {
265 case DK4_EDSTM_ERROR: {
266 dk4error_set_with_position(
267 &(tsp->er_en), DK4_E_DECODING_FAILED,
268 (tsp->pos).bytes, (tsp->pos).lineno,
269 (tsp->pos).chars, (tsp->pos).charil
270 );
271 } break;
272 case DK4_EDSTM_FINISHED: {
273 c32 = dk4c32_decoder_get(&((tsp->dec).c32));
274 cuc32 = 1;
275 dk4c32_decoder_init(&((tsp->dec).c32), 1);
276 } break;
277 case DK4_EDSTM_ACCEPT: {
278 back = DK4_TSP_RES_OK;
279 } break;
280 }
281 } break;
282 }
283 if (0 != cuc32) {
284 back = dk4tsp32_process_character(tsp, c32);
285 }
286 if (DK4_TSP_RES_FATAL == back) {
287 tsp->pst = 2;
288 }
289 return back;
290 }
291
292
293
294 /** Initialize decoder for found input encoding.
295 @param tsp Text stream processor.
296 */
297 static
298 void
dk4tsp32_initialize_decoder(dk4_tsp32_t * tsp)299 dk4tsp32_initialize_decoder(dk4_tsp32_t *tsp)
300 {
301 #if DK4_USE_ASSERT
302 assert(NULL != tsp);
303 #endif
304 switch (tsp->ief) {
305 case DK4_FILE_ENCODING_UTF8: {
306 dk4utf8_init(&((tsp->dec).u08));
307 } break;
308 case DK4_FILE_ENCODING_UTF16_LE: {
309 dk4utf16_byte_init(&((tsp->dec).u16), 0);
310 } break;
311 case DK4_FILE_ENCODING_UTF16_BE: {
312 dk4utf16_byte_init(&((tsp->dec).u16), 1);
313 } break;
314 case DK4_FILE_ENCODING_32_LE: {
315 dk4c32_decoder_init(&((tsp->dec).c32), 0);
316 } break;
317 case DK4_FILE_ENCODING_32_BE: {
318 dk4c32_decoder_init(&((tsp->dec).c32), 1);
319 } break;
320 }
321 }
322
323
324
325 #if TRACE_DEBUG
326 static unsigned long byte_number = 0UL;
327 #endif
328
329 /** Add one single byte.
330 @param tsp Text stream processor.
331 @param inbyte Byte to process.
332 @return DK4_TSP_RES_OK if the character was processed
333 successfully,
334 DK4_TSP_RES_ERROR if there was an error but we can
335 continue,
336 DK4_TSP_RES_FATAL if there was a fata error so we
337 should abort processing.
338 */
339 static
340 int
dk4tsp32_i_add_one_byte(dk4_tsp32_t * tsp,unsigned char inbyte)341 dk4tsp32_i_add_one_byte(
342 dk4_tsp32_t *tsp,
343 unsigned char inbyte
344 )
345 {
346 size_t nrej; /* Number of unused bytes */
347 size_t i; /* Current unused byte index */
348 int back = DK4_TSP_RES_FATAL; /* Function result */
349 int res; /* Operation result */
350 unsigned char uc; /* Current unused byte */
351
352 #if DK4_USE_ASSERT
353 assert(NULL != tsp);
354 #endif
355 (tsp->pos).bytes += (dk4_um_t)1UL;
356 switch (tsp->pst) {
357 case 0: {
358 res = dk4bom_detect_add(&(tsp->bomd), inbyte);
359 switch (res) {
360 case DK4_EDSTM_ACCEPT: {
361 back = DK4_TSP_RES_OK;
362 } break;
363 case DK4_EDSTM_FINISHED: case DK4_EDSTM_FINISHED_WITH_UNUSED: {
364 back = DK4_TSP_RES_OK;
365 tsp->ief = dk4bom_detect_get_encoding(&(tsp->bomd));
366 dk4tsp32_initialize_decoder(tsp);
367 tsp->pst = 1;
368 if (DK4_EDSTM_FINISHED_WITH_UNUSED == res) {
369 nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd));
370 if (0 < nrej) {
371 for (i = 0; ((i < nrej) && (DK4_TSP_RES_FATAL != back)); i++) {
372 uc = dk4bom_detect_unused_byte(&(tsp->bomd), i);
373 switch (dk4tsp32_process_byte(tsp, uc)) {
374 case DK4_TSP_RES_FATAL: {
375 back = DK4_TSP_RES_FATAL;
376 } break;
377 case DK4_TSP_RES_ERROR: {
378 if (DK4_TSP_RES_OK == back) {
379 back = DK4_TSP_RES_ERROR;
380 }
381 } break;
382 }
383 }
384 }
385 }
386 } break;
387 }
388 } break;
389 case 1: {
390 back = dk4tsp32_process_byte(tsp, inbyte);
391 } break;
392 /*
393 Processing stage 2 indicates there was a serious error
394 before, we must skip further processing.
395 This situation is covered by the initialization value
396 DK4_TSP_RES_FATAL, so we do not need a case branch here.
397 */
398 }
399
400 return back;
401 }
402
403
404
405 int
dk4tsp32_setup_char(dk4_tsp32_t * tsp,void * obj,dk4_c32_handler_t * fct,int eie,dk4_er_t * erp)406 dk4tsp32_setup_char(
407 dk4_tsp32_t *tsp,
408 void *obj,
409 dk4_c32_handler_t *fct,
410 int eie,
411 dk4_er_t *erp
412 )
413 {
414 int back = 0;
415 #if DK4_USE_ASSERT
416 assert(NULL != tsp);
417 #endif
418 if (NULL != tsp) {
419 dk4tsp32_init(tsp, eie);
420 if (NULL != fct) {
421 tsp->obj = obj;
422 (tsp->fct).ch = fct;
423 tsp->iee = eie;
424 tsp->ief = eie;
425 tsp->pst = 0;
426 back = 1;
427 } else {
428 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
429 tsp->pst = 2;
430 }
431 } else {
432 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
433 }
434 return back;
435 }
436
437
438
439 int
dk4tsp32_setup_line(dk4_tsp32_t * tsp,void * obj,dk4_c32_line_handler_t * fct,dk4_c32_t * inbuf,size_t szin,int eie,dk4_er_t * erp)440 dk4tsp32_setup_line(
441 dk4_tsp32_t *tsp,
442 void *obj,
443 dk4_c32_line_handler_t *fct,
444 dk4_c32_t *inbuf,
445 size_t szin,
446 int eie,
447 dk4_er_t *erp
448 )
449 {
450 int back = 0;
451 #if DK4_USE_ASSERT
452 assert(NULL != tsp);
453 assert(NULL != inbuf);
454 assert(0 < szin);
455 #endif
456 if (NULL != tsp) {
457 dk4tsp32_init(tsp, eie);
458 if ((NULL != fct) && (NULL != inbuf) && (0 < szin)) {
459 tsp->obj = obj;
460 (tsp->fct).lh = fct;
461 tsp->inbuf = inbuf;
462 tsp->in_sz = szin;
463 tsp->in_us = 0;
464 tsp->iee = eie;
465 tsp->ief = eie;
466 tsp->pst = 0;
467 back = 1;
468 } else {
469 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
470 tsp->pst = 2;
471 }
472 } else {
473 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
474 }
475 return back;
476 }
477
478
479
480 int
dk4tsp32_add_one_byte(dk4_tsp32_t * tsp,unsigned char inbyte)481 dk4tsp32_add_one_byte(
482 dk4_tsp32_t *tsp,
483 unsigned char inbyte
484 )
485 {
486 int back = DK4_TSP_RES_FATAL;
487
488 #if DK4_USE_ASSERT
489 assert(NULL != tsp);
490 #endif
491 if (NULL != tsp) {
492 if (2 > tsp->pst) {
493 back = dk4tsp32_i_add_one_byte(tsp, inbyte);
494 }
495 }
496
497 return back;
498 }
499
500
501
502 int
dk4tsp32_add_bytes(dk4_tsp32_t * tsp,const unsigned char * buffer,size_t sz)503 dk4tsp32_add_bytes(
504 dk4_tsp32_t *tsp,
505 const unsigned char *buffer,
506 size_t sz
507 )
508 {
509 int back = DK4_TSP_RES_FATAL; /* Function result */
510 int res; /* Processing result */
511
512 #if DK4_USE_ASSERT
513 assert(NULL != tsp);
514 assert(NULL != buffer);
515 assert(0 < sz);
516 #endif
517 if ((NULL != tsp) && (NULL != buffer) && (0 < sz)) {
518 if (2 > tsp->pst) {
519 back = DK4_TSP_RES_OK;
520 while ((sz--) && (2 > tsp->pst)) {
521 res = dk4tsp32_i_add_one_byte(tsp, *(buffer++));
522 switch (res) {
523 case DK4_TSP_RES_FATAL: {
524 back = DK4_TSP_RES_FATAL;
525 } break;
526 case DK4_TSP_RES_ERROR: {
527 if (DK4_TSP_RES_OK == back) {
528 back = DK4_TSP_RES_ERROR;
529 }
530 } break;
531 }
532 }
533 }
534 }
535
536 return back;
537 }
538
539
540
541 int
dk4tsp32_finish(dk4_tsp32_t * tsp)542 dk4tsp32_finish(dk4_tsp32_t *tsp)
543 {
544 size_t nrej = 0; /* Number of unused bytes */
545 size_t i = 0; /* Current unused byte index */
546 int back = DK4_TSP_RES_FATAL; /* Function result */
547 int res; /* Operation result */
548 unsigned char uc; /* Current unused byte */
549
550 #if DK4_USE_ASSERT
551 assert(NULL != tsp);
552 #endif
553 if (NULL != tsp) {
554 /* Flush all unprocessed output
555 1. If there are unprocessed bytes in the BOM detector as BOM
556 detection was not completed, process these bytes.
557 2. If there are characters in the line buffer,
558 finalize the line buffer text and process it.
559 */
560 back = DK4_TSP_RES_OK;
561 /* Retrieve an process bytes stored in BOM detector */
562 if (0 == tsp->pst) {
563 tsp->pst = 1;
564 nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd));
565 if (0 < nrej) {
566 dk4tsp32_initialize_decoder(tsp);
567 for (i = 0; ((i < nrej) && (2 > tsp->pst)); i++) {
568 uc = dk4bom_detect_unused_byte(&(tsp->bomd), i);
569 switch (dk4tsp32_process_byte(tsp, uc)) {
570 case DK4_TSP_RES_FATAL: {
571 back = DK4_TSP_RES_FATAL;
572 tsp->pst = 2;
573 } break;
574 case DK4_TSP_RES_ERROR: {
575 if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
576 } break;
577 }
578 }
579 }
580 }
581 /* Process final line */
582 if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) {
583 if (0 < tsp->in_us) {
584 if (2 > tsp->pst) {
585 if (tsp->in_us < tsp->in_sz) {
586 (tsp->inbuf)[tsp->in_us] = (dk4_c32_t)0UL;
587 res =
588 (*((tsp->fct).lh))(
589 tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr)
590 );
591 switch (res) {
592 case DK4_TSP_RES_FATAL: {
593 back = DK4_TSP_RES_FATAL;
594 tsp->pst = 2;
595 } break;
596 case DK4_TSP_RES_ERROR: {
597 if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
598 } break;
599 }
600 } else {
601 back = DK4_TSP_RES_FATAL;
602 tsp->pst = 2;
603 dk4error_set_with_position(
604 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
605 (tsp->pos).bytes, (tsp->pos).lineno,
606 (tsp->pos).chars, (tsp->pos).charil
607 );
608 }
609 }
610 }
611 }
612 }
613
614 return back;
615 }
616
617
618
619 void
dk4tsp32_get_errors(dk4_er_t * er_en,dk4_er_t * er_pr,dk4_tsp32_t const * tsp)620 dk4tsp32_get_errors(dk4_er_t *er_en, dk4_er_t *er_pr, dk4_tsp32_t const *tsp)
621 {
622 #if DK4_USE_ASSERT
623 assert(NULL != tsp);
624 #endif
625 if (NULL != tsp) {
626 if (NULL != er_en) {
627 DK4_MEMCPY(er_en, &(tsp->er_en), sizeof(dk4_er_t));
628 }
629 if (NULL != er_pr) {
630 DK4_MEMCPY(er_pr, &(tsp->er_pr), sizeof(dk4_er_t));
631 }
632 }
633 }
634
635
636
637