1 /*
2 Copyright (C) 2015-2021, Dirk Krause
3 SPDX-License-Identifier: BSD-3-Clause
4 */
5
6 /*
7 WARNING: This file was generated by the dkct program (see
8 http://dktools.sourceforge.net/ for details).
9 Changes you make here will be lost if dkct is run again!
10 You should modify the original source and run dkct on it.
11 Original source: dk4tsp08.ctr
12 */
13
14 /** @file dk4tsp08.c The dk4tsp08 module.
15 */
16
17
18 #include "dk4conf.h"
19 #include <libdk4c/dk4tsp.h>
20 #include <libdk4c/dk4tsp08.h>
21 #include <libdk4base/dk4mem.h>
22 #include <libdk4c/dk4enc.h>
23 #include <libdk4c/dk4ansi.h>
24 #include <libdk4c/dk4utf8.h>
25 #include <libdk4c/dk4utf16.h>
26 #include <libdk4c/dk4c32.h>
27 #include <libdk4c/dk4bom.h>
28 #include <libdk4c/dk4rec26.h>
29
30 #if DK4_HAVE_ASSERT_H
31 #ifndef ASSERT_H_INCLUDED
32 #include <assert.h>
33 #define ASSERT_H_INCLUDED 1
34 #endif
35 #endif
36
37
38
39
40
41
42 /** Initialize text stream processor structure.
43 @param tsp Text stream processor to set up.
44 @param eie Expected input encoding.
45 */
46 static
47 void
dk4tsp08_init(dk4_tsp08_t * tsp,int eie)48 dk4tsp08_init(dk4_tsp08_t *tsp, int eie)
49 {
50
51 #if DK4_USE_ASSERT
52 assert(NULL != tsp);
53 #endif
54 DK4_MEMRES(tsp, sizeof(dk4_tsp08_t));
55 dk4bom_detect_init(&(tsp->bomd), eie);
56 dk4error_init(&(tsp->er_en));
57 dk4error_init(&(tsp->er_pr));
58 tsp->inbuf = NULL;
59 tsp->obj = NULL;
60 tsp->in_sz = 0;
61 tsp->in_us = 0;
62 tsp->ief = eie;
63 tsp->iee = eie;
64 tsp->upt = 0;
65 tsp->pen = 0;
66 tsp->pst = 0;
67 tsp->pth = 0;
68 (tsp->pos).bytes = (dk4_um_t)0UL;
69 (tsp->pos).chars = (dk4_um_t)1UL;
70 (tsp->pos).lineno = (dk4_um_t)1UL;
71 (tsp->pos).charil = (dk4_um_t)1UL;
72
73 }
74
75
76
77 /** Initialize decoder for found input encoding.
78 @param tsp Text stream processor.
79 */
80 static
81 void
dk4tsp08_initialize_decoder(dk4_tsp08_t * tsp)82 dk4tsp08_initialize_decoder(dk4_tsp08_t *tsp)
83 {
84
85 #if DK4_USE_ASSERT
86 assert(NULL != tsp);
87 #endif
88 if (0 == tsp->pth) {
89 switch (tsp->ief) {
90 case DK4_FILE_ENCODING_UTF8: {
91 dk4utf8_init(&((tsp->dec).u08));
92 } break;
93 case DK4_FILE_ENCODING_UTF16_LE: {
94 dk4utf16_byte_init(&((tsp->dec).u16), 0);
95 } break;
96 case DK4_FILE_ENCODING_UTF16_BE: {
97 dk4utf16_byte_init(&((tsp->dec).u16), 1);
98 } break;
99 case DK4_FILE_ENCODING_32_LE: {
100 dk4c32_decoder_init(&((tsp->dec).c32), 0);
101 } break;
102 case DK4_FILE_ENCODING_32_BE: {
103 dk4c32_decoder_init(&((tsp->dec).c32), 1);
104 } break;
105 }
106 }
107 }
108
109
110
111 /** Process group of characters (one UTF-8 encoded 32 bit character).
112 @param tsp Text stream processor.
113 @param buf Buffer containing characters to process.
114 @param sz Number of characters in buffer.
115 @return DK4_TSP_RES_OK if the characters were processed
116 successfully,
117 DK4_TSP_RES_ERROR if there was an error but we can
118 continue,
119 DK4_TSP_RES_FATAL if there was a fatal error so we
120 should abort processing.
121 */
122 static
123 int
dk4tsp08_process_group(dk4_tsp08_t * tsp,char * buf,size_t sz)124 dk4tsp08_process_group(dk4_tsp08_t *tsp, char *buf, size_t sz)
125 {
126 size_t i;
127 int back = DK4_TSP_RES_FATAL;
128 int res;
129
130 #if DK4_USE_ASSERT
131 assert(NULL != tsp);
132 assert(NULL != buf);
133 assert(0 < sz);
134 #endif
135 /* Increase character numbers
136 */
137 (tsp->pos).chars += (dk4_um_t)1UL;
138 (tsp->pos).charil += (dk4_um_t)1UL;
139 /* Decide between line by line or char by char processing */
140 if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) {
141 /* Line processing */
142 if (sz < tsp->in_sz) {
143 if ((tsp->in_sz - sz) > tsp->in_us) {
144 back = DK4_TSP_RES_OK;
145 DK4_MEMCPY( &((tsp->inbuf)[tsp->in_us]), buf, sz );
146 tsp->in_us += sz;
147 if ((1 == sz) && ('\n' == buf[0])) {
148 back = DK4_TSP_RES_FATAL;
149 if (tsp->in_us < tsp->in_sz) {
150 (tsp->inbuf)[tsp->in_us] = '\0';
151 back = (*((tsp->fct).lh))(
152 tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr)
153 );
154 } else {
155 /* ERROR: Line too long! */
156 dk4error_set_with_position(
157 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
158 (tsp->pos).bytes, (tsp->pos).lineno,
159 (tsp->pos).chars, (tsp->pos).charil
160 );
161 }
162 tsp->in_us = 0;
163 }
164 } else {
165 /* ERROR: Line too long! */
166 dk4error_set_with_position(
167 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
168 (tsp->pos).bytes, (tsp->pos).lineno,
169 (tsp->pos).chars, (tsp->pos).charil
170 );
171 }
172 } else {
173 /* ERROR: Line too long! */
174 dk4error_set_with_position(
175 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
176 (tsp->pos).bytes, (tsp->pos).lineno,
177 (tsp->pos).chars, (tsp->pos).charil
178 );
179 }
180 } else {
181 /* Direct char processing */
182 if (NULL != (tsp->fct).ch) {
183 back = DK4_TSP_RES_OK;
184 for (i = 0; i < sz; i++) {
185 res = (*((tsp->fct).ch))(tsp->obj, buf[i], &(tsp->pos), &(tsp->er_pr));
186 switch (res) {
187 case DK4_TSP_RES_FATAL: {
188 back = DK4_TSP_RES_FATAL;
189 } break;
190 case DK4_TSP_RES_ERROR: {
191 if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
192 } break;
193 }
194 }
195 } else {
196 dk4error_set_with_position(
197 &(tsp->er_en), DK4_E_INVALID_ARGUMENTS,
198 (tsp->pos).bytes, (tsp->pos).lineno,
199 (tsp->pos).chars, (tsp->pos).charil
200 );
201 }
202 }
203 /* After newline correct line number and position in line
204 */
205 if (1 == sz) {
206 if ('\n' == buf[0]) {
207 (tsp->pos).lineno += (dk4_um_t)1UL;
208 (tsp->pos).charil = (dk4_um_t)1UL;
209 }
210 }
211 return back;
212 }
213
214
215 /** Normal processing for one byte.
216 @param tsp Text stream processor.
217 @param inbyte Byte to process.
218 @return Operation result, one from DK4_TSP_RES_OK,
219 DK4_TSP_RES_ERROR or DK4_TSP_RES_FATAL.
220 */
221 static
222 int
dk4tsp08_process_byte(dk4_tsp08_t * tsp,unsigned char inbyte)223 dk4tsp08_process_byte(
224 dk4_tsp08_t *tsp,
225 unsigned char inbyte
226 )
227 {
228 unsigned char buf[8];
229 dk4_c32_t c32 = dkC32(0);
230 size_t sz;
231 int back = DK4_TSP_RES_FATAL;
232 int cuc32 = 0;
233 int res = 0;
234 unsigned char uc = '\0';
235 char c08;
236
237 #if DK4_USE_ASSERT
238 assert(NULL != tsp);
239 #endif
240 if (0 != tsp->pth) {
241 back = dk4tsp08_process_group(tsp, (char *)(&inbyte), 1);
242 } else {
243 /* Decode and process */
244 switch (tsp->ief) {
245 case DK4_FILE_ENCODING_PLAIN: {
246 c32 = (dk4_c32_t)inbyte;
247 cuc32 = 1;
248 } break;
249 case DK4_FILE_ENCODING_WIN1252: {
250 if (0 != dk4ansi_decode(&c32, inbyte)) {
251 cuc32 = 1;
252 } else {
253 dk4error_set_with_position(
254 &(tsp->er_en), DK4_E_DECODING_FAILED,
255 (tsp->pos).bytes, (tsp->pos).lineno,
256 (tsp->pos).chars, (tsp->pos).charil
257 );
258 }
259 } break;
260 case DK4_FILE_ENCODING_UTF8: {
261 res = dk4utf8_add(&((tsp->dec).u08), inbyte);
262 switch (res) {
263 case DK4_EDSTM_ERROR: {
264 /* Decoding error */
265 dk4error_set_with_position(
266 &(tsp->er_en), DK4_E_DECODING_FAILED,
267 (tsp->pos).bytes, (tsp->pos).lineno,
268 (tsp->pos).chars, (tsp->pos).charil
269 );
270 } break;
271 case DK4_EDSTM_FINISHED: {
272 c32 = dk4utf8_get(&((tsp->dec).u08));
273 cuc32 = 1;
274 dk4utf8_init(&((tsp->dec).u08));
275 } break;
276 case DK4_EDSTM_ACCEPT: {
277 back = DK4_TSP_RES_OK;
278 } break;
279 }
280 } break;
281 case DK4_FILE_ENCODING_UTF16_LE: {
282 res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte);
283 switch (res) {
284 case DK4_EDSTM_ERROR: {
285 /* Decoding error */
286 dk4error_set_with_position(
287 &(tsp->er_en), DK4_E_DECODING_FAILED,
288 (tsp->pos).bytes, (tsp->pos).lineno,
289 (tsp->pos).chars, (tsp->pos).charil
290 );
291 } break;
292 case DK4_EDSTM_FINISHED: {
293 c32 = dk4utf16_byte_get(&((tsp->dec).u16));
294 cuc32 = 1;
295 dk4utf16_byte_init(&((tsp->dec).u16), 0);
296 } break;
297 case DK4_EDSTM_ACCEPT: {
298 back = DK4_TSP_RES_OK;
299 } break;
300 }
301 } break;
302 case DK4_FILE_ENCODING_UTF16_BE: {
303 res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte);
304 switch (res) {
305 case DK4_EDSTM_ERROR: {
306 /* Decoding error */
307 dk4error_set_with_position(
308 &(tsp->er_en), DK4_E_DECODING_FAILED,
309 (tsp->pos).bytes, (tsp->pos).lineno,
310 (tsp->pos).chars, (tsp->pos).charil
311 );
312 } break;
313 case DK4_EDSTM_FINISHED: {
314 c32 = dk4utf16_byte_get(&((tsp->dec).u16));
315 cuc32 = 1;
316 dk4utf16_byte_init(&((tsp->dec).u16), 1);
317 } break;
318 case DK4_EDSTM_ACCEPT: {
319 back = DK4_TSP_RES_OK;
320 } break;
321 }
322 } break;
323 case DK4_FILE_ENCODING_32_LE: {
324 res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte);
325 switch (res) {
326 case DK4_EDSTM_ERROR: {
327 /* Decoding error */
328 dk4error_set_with_position(
329 &(tsp->er_en), DK4_E_DECODING_FAILED,
330 (tsp->pos).bytes, (tsp->pos).lineno,
331 (tsp->pos).chars, (tsp->pos).charil
332 );
333 } break;
334 case DK4_EDSTM_FINISHED: {
335 c32 = dk4c32_decoder_get(&((tsp->dec).c32));
336 cuc32 = 1;
337 dk4c32_decoder_init(&((tsp->dec).c32), 0);
338 } break;
339 case DK4_EDSTM_ACCEPT: {
340 back = DK4_TSP_RES_OK;
341 } break;
342 }
343 } break;
344 case DK4_FILE_ENCODING_32_BE: {
345 res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte);
346 switch (res) {
347 case DK4_EDSTM_ERROR: {
348 /* Decoding error */
349 dk4error_set_with_position(
350 &(tsp->er_en), DK4_E_DECODING_FAILED,
351 (tsp->pos).bytes, (tsp->pos).lineno,
352 (tsp->pos).chars, (tsp->pos).charil
353 );
354 } break;
355 case DK4_EDSTM_FINISHED: {
356 c32 = dk4c32_decoder_get(&((tsp->dec).c32));
357 cuc32 = 1;
358 dk4c32_decoder_init(&((tsp->dec).c32), 1);
359 } break;
360 case DK4_EDSTM_ACCEPT: {
361 back = DK4_TSP_RES_OK;
362 } break;
363 }
364 } break;
365 }
366 if (0 != cuc32) {
367 switch (tsp->pen) {
368 case DK4_ENCODING_PLAIN: {
369 if (dkC32(0x00000100) > c32) {
370 c08 = (char)c32;
371 back = dk4tsp08_process_group(tsp, &c08, 1);
372 } else {
373 /* Encoding error */
374 dk4error_set_with_position(
375 &(tsp->er_en), DK4_E_ENCODING_FAILED,
376 (tsp->pos).bytes, (tsp->pos).lineno,
377 (tsp->pos).chars, (tsp->pos).charil
378 );
379 }
380 } break;
381 case DK4_ENCODING_WIN1252: {
382 if (0 != dk4ansi_encode(&uc, c32)) {
383 c08 = (char)uc;
384 back = dk4tsp08_process_group(tsp, &c08, 1);
385 } else {
386 /* Encoding error */
387 dk4error_set_with_position(
388 &(tsp->er_en), DK4_E_ENCODING_FAILED,
389 (tsp->pos).bytes, (tsp->pos).lineno,
390 (tsp->pos).chars, (tsp->pos).charil
391 );
392 }
393 } break;
394 case DK4_ENCODING_UTF8: {
395 sz = sizeof(buf);
396 if (0 != dk4utf8_encode(buf, &sz, c32, NULL)) {
397 back = dk4tsp08_process_group(tsp, (char *)buf, sz);
398 } else {
399 /* Encoding error */
400 dk4error_set_with_position(
401 &(tsp->er_en), DK4_E_ENCODING_FAILED,
402 (tsp->pos).bytes, (tsp->pos).lineno,
403 (tsp->pos).chars, (tsp->pos).charil
404 );
405 }
406 } break;
407 }
408 }
409 }
410 if (DK4_TSP_RES_FATAL == back) {
411 tsp->pst = 2;
412 }
413
414 return back;
415 }
416
417
418
419 /** Add one byte to internal data structures
420 (BOM detection and/or normal processing).
421 @param tsp Text stream processor.
422 @param inbyte Byte to add.
423 @return Operation result, one from DK4_TSP_RES_OK,
424 DK4_TSP_RES_ERROR or DK4_TSP_RES_FATAL.
425 */
426 static
427 int
dk4tsp08_i_add_one_byte(dk4_tsp08_t * tsp,unsigned char inbyte)428 dk4tsp08_i_add_one_byte(
429 dk4_tsp08_t *tsp,
430 unsigned char inbyte
431 )
432 {
433 size_t nrej = 0;
434 size_t i = 0;
435 int back = DK4_TSP_RES_FATAL;
436 int res;
437 unsigned char uc;
438
439 #if DK4_USE_ASSERT
440 assert(NULL != tsp);
441 #endif
442 (tsp->pos).bytes += (dk4_um_t)1UL;
443 switch (tsp->pst) {
444 case 0: { /* BOM detection */
445
446 res = dk4bom_detect_add(&(tsp->bomd), inbyte);
447 switch (res) {
448 case DK4_EDSTM_ACCEPT: {
449 back = DK4_TSP_RES_OK;
450 } break;
451 case DK4_EDSTM_FINISHED:
452 case DK4_EDSTM_FINISHED_WITH_UNUSED: {
453
454 /* This result is ok. */
455 back = DK4_TSP_RES_OK;
456 /* Retrieve found encoding */
457 tsp->ief = dk4bom_detect_get_encoding(&(tsp->bomd));
458 /* Check whether we can pass through data unchanged */
459 if (tsp->ief == tsp->pen) {
460 switch (tsp->ief) {
461
462 #if VERSION_BEFORE_20150321
463 /* 2015-30-21 We do not hard-code the decision whether or
464 not to enable pass-through for UTF-8
465 encoded bytes, better use a run-time
466 decision.
467 */
468 case DK4_ENCODING_PLAIN:
469 case DK4_ENCODING_WIN1252:
470 #if 0
471 /* 2014-12-01
472 We must make sure to process the complete UTF-8
473 representation of a glyph completely.
474 */
475 case DK4_ENCODING_UTF8:
476 #endif
477 {
478 tsp->pth = 1;
479 } break;
480 #endif
481
482 case DK4_ENCODING_PLAIN :
483 case DK4_ENCODING_WIN1252 : {
484 tsp->pth = 1;
485 } break;
486 case DK4_ENCODING_UTF8 : {
487 if (0 != tsp->upt) { tsp->pth = 1; }
488 } break;
489 }
490 }
491 /* If necessary, initialize decoder */
492 dk4tsp08_initialize_decoder(tsp);
493 /* Switch to normal processing */
494 tsp->pst = 1;
495 /* Process bytes stored in BOM detector */
496 if (DK4_EDSTM_FINISHED_WITH_UNUSED == res) {
497 nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd));
498 if (0 < nrej) {
499
500 for (i = 0; ((i < nrej) && (DK4_TSP_RES_FATAL != back)); i++) {
501 uc = dk4bom_detect_unused_byte(&(tsp->bomd), i);
502
503 switch (dk4tsp08_process_byte(tsp, uc)) {
504 case DK4_TSP_RES_FATAL: {
505 back = DK4_TSP_RES_FATAL;
506 } break;
507 case DK4_TSP_RES_ERROR: {
508 if (DK4_TSP_RES_OK == back) {
509 back = DK4_TSP_RES_ERROR;
510 }
511 } break;
512 }
513 }
514 }
515 }
516 } break;
517 case DK4_EDSTM_ERROR: {
518 } break;
519 }
520 } break;
521 case 1: { /* Normal processing */
522
523 back = dk4tsp08_process_byte(tsp, inbyte);
524 } break;
525 /*
526 Processing stage 2 indicates there was a serious error
527 before, we must skip further processing.
528 This situation is covered by the initialization value
529 DK4_TSP_RES_FATAL, so we do not need a case branch here.
530 */
531 }
532
533 return back;
534 }
535
536
537
538 int
dk4tsp08_setup_char(dk4_tsp08_t * tsp,void * obj,dk4_c8_handler_t * fct,int pre,int eie,dk4_er_t * erp)539 dk4tsp08_setup_char(
540 dk4_tsp08_t *tsp,
541 void *obj,
542 dk4_c8_handler_t *fct,
543 int pre,
544 int eie,
545 dk4_er_t *erp
546 )
547 {
548 int back = 0;
549
550 #if DK4_USE_ASSERT
551 assert(NULL != tsp);
552 #endif
553 if (NULL != tsp) {
554 dk4tsp08_init(tsp, eie);
555 if (NULL != fct) {
556 switch (pre) {
557 case DK4_ENCODING_PLAIN:
558 case DK4_ENCODING_WIN1252:
559 case DK4_ENCODING_UTF8:
560 {
561 tsp->obj = obj;
562 (tsp->fct).ch = fct;
563 tsp->pen = pre;
564 tsp->iee = eie;
565 tsp->ief = eie;
566 back = 1;
567 } break;
568 default: {
569 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
570 tsp->pst = 2;
571 } break;
572 }
573 } else {
574 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
575 tsp->pst = 2;
576 }
577 } else {
578 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
579 }
580 return back;
581 }
582
583
584 int
dk4tsp08_setup_line(dk4_tsp08_t * tsp,void * obj,dk4_c8_line_handler_t * fct,char * inbuf,size_t szin,int pre,int eie,dk4_er_t * erp)585 dk4tsp08_setup_line(
586 dk4_tsp08_t *tsp,
587 void *obj,
588 dk4_c8_line_handler_t *fct,
589 char *inbuf,
590 size_t szin,
591 int pre,
592 int eie,
593 dk4_er_t *erp
594 )
595 {
596 int back = 0;
597
598 #if DK4_USE_ASSERT
599 assert(NULL != tsp);
600 assert(NULL != inbuf);
601 assert(0 < szin);
602 #endif
603 if (NULL != tsp) {
604 dk4tsp08_init(tsp, eie);
605 if ((NULL != fct) && (NULL != inbuf) && (0 < szin)) {
606
607 switch (pre) {
608 case DK4_ENCODING_PLAIN:
609 case DK4_ENCODING_WIN1252:
610 case DK4_ENCODING_UTF8:
611 {
612 tsp->obj = obj;
613 (tsp->fct).lh = fct;
614 tsp->inbuf = inbuf;
615 tsp->in_sz = szin;
616 tsp->in_us = 0;
617 tsp->pen = pre;
618 tsp->iee = eie;
619 tsp->ief = eie;
620 back = 1;
621 } break;
622 default: {
623 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
624 tsp->pst = 2;
625 } break;
626 }
627 } else {
628 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
629 tsp->pst = 2;
630 }
631 } else {
632 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS);
633 }
634 return back;
635 }
636
637
638
639 int
dk4tsp08_add_one_byte(dk4_tsp08_t * tsp,unsigned char inbyte)640 dk4tsp08_add_one_byte(
641 dk4_tsp08_t *tsp,
642 unsigned char inbyte
643 )
644 {
645 int back = DK4_TSP_RES_FATAL;
646
647 #if DK4_USE_ASSERT
648 assert(NULL != tsp);
649 #endif
650 if (NULL != tsp) {
651 if (2 > tsp->pst) {
652 back = dk4tsp08_i_add_one_byte(tsp, inbyte);
653 }
654 }
655 return back;
656 }
657
658
659
660 int
dk4tsp08_add_bytes(dk4_tsp08_t * tsp,const unsigned char * buffer,size_t sz)661 dk4tsp08_add_bytes(
662 dk4_tsp08_t *tsp,
663 const unsigned char *buffer,
664 size_t sz
665 )
666 {
667 int back = DK4_TSP_RES_FATAL;
668 int res;
669
670 #if DK4_USE_ASSERT
671 assert(NULL != tsp);
672 assert(NULL != buffer);
673 assert(0 < sz);
674 #endif
675 if ((NULL != tsp) && (NULL != buffer) && (0 < sz)) {
676 if (2 > tsp->pst) {
677 back = DK4_TSP_RES_OK;
678 while((sz--) && (2 > tsp->pst)) {
679 res = dk4tsp08_i_add_one_byte(tsp, *(buffer++));
680 switch (res) {
681 case DK4_TSP_RES_FATAL: {
682 back = DK4_TSP_RES_FATAL;
683 } break;
684 case DK4_TSP_RES_ERROR: {
685 if (DK4_TSP_RES_OK == back) {
686 back = DK4_TSP_RES_ERROR;
687 }
688 } break;
689 }
690 }
691 }
692 }
693 return back;
694 }
695
696
697
698 int
dk4tsp08_finish(dk4_tsp08_t * tsp)699 dk4tsp08_finish(dk4_tsp08_t *tsp)
700 {
701 size_t nrej = 0;
702 size_t i = 0;
703 int back = DK4_TSP_RES_FATAL;
704 int res;
705 unsigned char uc;
706
707 #if DK4_USE_ASSERT
708 assert(NULL != tsp);
709 #endif
710 if (NULL != tsp) {
711 /* Retrieve rejected bytes from BOM detector */
712 back = DK4_TSP_RES_OK;
713 if (0 == tsp->pst) {
714 tsp->pst = 1;
715 nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd));
716 if (0 < nrej) {
717 dk4tsp08_initialize_decoder(tsp);
718 for (i = 0; ((i < nrej) && (2 > tsp->pst)); i++) {
719 uc = dk4bom_detect_unused_byte(&(tsp->bomd), i);
720
721 switch (dk4tsp08_process_byte(tsp, uc)) {
722 case DK4_TSP_RES_FATAL: {
723 back = DK4_TSP_RES_FATAL;
724 tsp->pst = 2;
725 } break;
726 case DK4_TSP_RES_ERROR: {
727 if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
728 } break;
729 }
730 }
731 }
732 } else {
733 }
734 /* Process final line */
735 if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) {
736 if (0 < tsp->in_us) {
737
738 if (2 > tsp->pst) {
739 if (tsp->in_us < tsp->in_sz) {
740 (tsp->inbuf)[tsp->in_us] = '\0';
741 res =
742 (*((tsp->fct).lh))(
743 tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr)
744 );
745 switch (res) {
746 case DK4_TSP_RES_FATAL: {
747 back = DK4_TSP_RES_FATAL;
748 tsp->pst = 2;
749 } break;
750 case DK4_TSP_RES_ERROR: {
751 if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; }
752 } break;
753 }
754 } else {
755 back = DK4_TSP_RES_FATAL;
756 tsp->pst = 2;
757 /* ERROR: Line too long! */
758 dk4error_set_with_position(
759 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL,
760 (tsp->pos).bytes, (tsp->pos).lineno,
761 (tsp->pos).chars, (tsp->pos).charil
762 );
763 }
764 } else {
765 }
766 }
767 }
768 }
769
770 return back;
771 }
772
773
774
775 void
dk4tsp08_get_errors(dk4_er_t * er_en,dk4_er_t * er_pr,dk4_tsp08_t const * tsp)776 dk4tsp08_get_errors(dk4_er_t *er_en, dk4_er_t *er_pr, dk4_tsp08_t const *tsp)
777 {
778 #if DK4_USE_ASSERT
779 assert(NULL != tsp);
780 #endif
781 if (NULL != tsp) {
782 if (NULL != er_en) {
783 DK4_MEMCPY(er_en, &(tsp->er_en), sizeof(dk4_er_t));
784 }
785 if (NULL != er_pr) {
786 DK4_MEMCPY(er_pr, &(tsp->er_pr), sizeof(dk4_er_t));
787 }
788 }
789 }
790
791
792
793 void
dk4tsp08_enable_utf8_pass_through(dk4_tsp08_t * tsp,int fl)794 dk4tsp08_enable_utf8_pass_through(dk4_tsp08_t *tsp, int fl)
795 {
796 #if DK4_USE_ASSERT
797 assert(NULL != tsp);
798 #endif
799 if (NULL != tsp) {
800 tsp->upt = ((0 != fl) ? 1 : 0);
801 }
802 }
803
804
805