1%% options 2 3copyright owner = Dirk Krause 4copyright year = 2015-xxxx 5SPDX-License-Identifier: BSD-3-Clause 6 7 8 9%% header 10 11/** @file 12 Text stream processing for 32 bit characters. 13 14 CRT on Windows: Optional. 15*/ 16 17#ifndef DK4CONF_H_INCLUDED 18#if DK4_BUILDING_DKTOOLS4 19#include "dk4conf.h" 20#else 21#include <dktools-4/dk4conf.h> 22#endif 23#endif 24 25#ifndef DK4TYPES_H_INCLUDED 26#if DK4_BUILDING_DKTOOLS4 27#include <libdk4base/dk4types.h> 28#else 29#include <dktools-4/dk4types.h> 30#endif 31#endif 32 33#ifndef DK4ERROR_H_INCLUDED 34#if DK4_BUILDING_DKTOOLS4 35#include <libdk4base/dk4error.h> 36#else 37#include <dktools-4/dk4error.h> 38#endif 39#endif 40 41#ifndef DK4BOM_H_INCLUDED 42#if DK4_BUILDING_DKTOOLS4 43#include <libdk4c/dk4bom.h> 44#else 45#include <dktools-4/dk4bom.h> 46#endif 47#endif 48 49#ifndef DK4TSP_H_INCLUDED 50#if DK4_BUILDING_DKTOOLS4 51#include <libdk4c/dk4tsp.h> 52#else 53#include <dktools-4/dk4tsp.h> 54#endif 55#endif 56 57#ifndef DK4UTF8_H_INCLUDED 58#if DK4_BUILDING_DKTOOLS4 59#include <libdk4c/dk4utf8.h> 60#else 61#include <dktools-4/dk4utf8.h> 62#endif 63#endif 64 65#ifndef DK4UTF16_H_INCLUDED 66#if DK4_BUILDING_DKTOOLS4 67#include <libdk4c/dk4utf16.h> 68#else 69#include <dktools-4/dk4utf16.h> 70#endif 71#endif 72 73#ifndef DK4C32_H_INCLUDED 74#if DK4_BUILDING_DKTOOLS4 75#include <libdk4c/dk4c32.h> 76#else 77#include <dktools-4/dk4c32.h> 78#endif 79#endif 80 81/** Handler function for single characters. 82 @param obj Object to modify while processing the character. 83 @param c Character to process. 84 @param pos Current position in file or data stream. 85 @param erp Error report, may be NULL. 86 @return DK4_TSP_RES_OK if the character was processed 87 successfully, 88 DK4_TSP_RES_ERROR if there was an error but we can 89 continue, 90 DK4_TSP_RES_FATAL if there was a fatal error so we 91 should abort processing. 92*/ 93typedef int dk4_c32_handler_t( 94 void *obj, 95 dk4_c32_t c, 96 dk4_text_stream_position_t *pos, 97 dk4_er_t *erp 98); 99 100/** Handler function for text lines. 101 @param obj Object to modify while processing the character. 102 @param line Text line to process. 103 @param lineno Current line number. 104 @param erp Error report, may be NULL. 105 @return DK4_TSP_RES_OK if the character was processed 106 successfully, 107 DK4_TSP_RES_ERROR if there was an error but we can 108 continue, 109 DK4_TSP_RES_FATAL if there was a fata error so we 110 should abort processing. 111*/ 112typedef int dk4_c32_line_handler_t( 113 void *obj, 114 dk4_c32_t *line, 115 dk4_um_t lineno, 116 dk4_er_t *erp 117); 118 119/** Structure for 32 bit character processing. 120*/ 121typedef struct { 122 union { 123 dk4_utf8_decoder_t u08; /**< UTF-8 decoder. */ 124 dk4_utf16_byte_decoder_t u16; /**< UTF-16 decoder. */ 125 dk4_c32_byte_decoder_t c32; /**< 32 bit char decoder. */ 126 } dec; /**< Input decoder. */ 127 dk4_bom_detector_t bomd; /**< BOM detector. */ 128 dk4_text_stream_position_t pos; /**< Current position. */ 129 dk4_er_t er_en; /**< Errors in input decoding. */ 130 dk4_er_t er_pr; /**< Errors in processing. */ 131 union { 132 dk4_c32_line_handler_t *lh; /**< Handler function for lines. */ 133 dk4_c32_handler_t *ch; /**< Handler function for char. */ 134 } fct; /**< Handler function. */ 135 dk4_c32_t *inbuf; /**< Buffer for input line. */ 136 void *obj; /**< Object to modify in processing. */ 137 size_t in_sz; /**< Size of input line buffer. */ 138 size_t in_us; /**< Used bytes in input line buffer. */ 139 int ief; /**< Input encoding found. */ 140 int iee; /**< Input encoding expected. */ 141 int pst; /**< Processing stage. */ 142} dk4_tsp32_t; 143 144 145 146#ifdef __cplusplus 147extern "C" { 148#endif 149 150/** Set up processor for byte by byte processing. 151 @param tsp Processor to set up. 152 @param obj Object to modify when processing input, may be NULL. 153 @param fct Handler function to call for each character. 154 @param eie Expected input encoding. 155 @param erp Error report, may be NULL. 156 @return 1 on success, 0 on error. 157*/ 158int 159dk4tsp32_setup_char( 160 dk4_tsp32_t *tsp, 161 void *obj, 162 dk4_c32_handler_t *fct, 163 int eie, 164 dk4_er_t *erp 165); 166 167/** Set up processor for line processing. 168 @param tsp Processor to set up. 169 @param obj Object to modify when processing input, may be NULL. 170 @param fct Handler function to invoke for each line. 171 @param inbuf Input line buffer. 172 @param szin Size of input line buffer (number of dk4_c32_t). 173 @param eie Expected input encoding. 174 @param erp Error report, may be NULL. 175 @return 1 on success, 0 on error. 176*/ 177int 178dk4tsp32_setup_line( 179 dk4_tsp32_t *tsp, 180 void *obj, 181 dk4_c32_line_handler_t *fct, 182 dk4_c32_t *inbuf, 183 size_t szin, 184 int eie, 185 dk4_er_t *erp 186); 187 188/** Add one single byte. 189 @param tsp Text stream processor. 190 @param inbyte Byte to process. 191 @return DK4_TSP_RES_OK if the character was processed 192 successfully, 193 DK4_TSP_RES_ERROR if there was an error but we can 194 continue, 195 DK4_TSP_RES_FATAL if there was a fata error so we 196 should abort processing. 197*/ 198int 199dk4tsp32_add_one_byte( 200 dk4_tsp32_t *tsp, 201 unsigned char inbyte 202); 203 204/** Add multiple bytes. 205 @param tsp Text stream processor. 206 @param buffer Buffer start address. 207 @param sz Number of bytes in buffer. 208 @return DK4_TSP_RES_OK if the character was processed 209 successfully, 210 DK4_TSP_RES_ERROR if there was an error but we can 211 continue, 212 DK4_TSP_RES_FATAL if there was a fata error so we 213 should abort processing. 214*/ 215int 216dk4tsp32_add_bytes( 217 dk4_tsp32_t *tsp, 218 const unsigned char *buffer, 219 size_t sz 220); 221 222/** Finish processing. 223 @param tsp Text stream processor. 224 @return DK4_TSP_RES_OK if processing was finished 225 successfully, 226 DK4_TSP_RES_ERROR if there was an error, 227 DK4_TSP_RES_FATAL if there was a fatal error. 228*/ 229int 230dk4tsp32_finish(dk4_tsp32_t *tsp); 231 232/** Retrieve error reports for encoding/decoding and processing. 233 @param er_en Destination error report buffer for encoding/decoding. 234 @param er_pr Destination error report buffer for processing. 235 @param tsp Text stream processor to retrieve errors from. 236*/ 237void 238dk4tsp32_get_errors(dk4_er_t *er_en, dk4_er_t *er_pr, dk4_tsp32_t const *tsp); 239 240#ifdef __cplusplus 241} 242#endif 243 244 245 246%% module 247 248#include "dk4conf.h" 249#include <libdk4c/dk4tsp32.h> 250#include <libdk4c/dk4enc.h> 251#include <libdk4base/dk4mem.h> 252#include <libdk4c/dk4ansi.h> 253#include <libdk4c/dk4utf8.h> 254#include <libdk4c/dk4utf16.h> 255#include <libdk4c/dk4c32.h> 256 257#if DK4_HAVE_ASSERT_H 258#ifndef ASSERT_H_INCLUDED 259#include <assert.h> 260#define ASSERT_H_INCLUDED 1 261#endif 262#endif 263 264 265$!trace-include 266 267 268 269/** Initialize text stream processing structure. 270 @param tsp Text stream processor. 271 @param eie Expected input encoding. 272*/ 273static 274void 275dk4tsp32_init(dk4_tsp32_t *tsp, int eie) 276{ 277#if DK4_USE_ASSERT 278 assert(NULL != tsp); 279#endif 280 DK4_MEMRES(tsp, sizeof(dk4_tsp32_t)); 281 dk4bom_detect_init(&(tsp->bomd), eie); 282 dk4error_init(&(tsp->er_en)); 283 dk4error_init(&(tsp->er_pr)); 284 tsp->inbuf = NULL; 285 tsp->obj = NULL; 286 tsp->in_sz = 0; 287 tsp->in_us = 0; 288 tsp->ief = eie; 289 tsp->iee = eie; 290 tsp->pst = 0; 291 (tsp->pos).bytes = (dk4_um_t)0UL; 292 (tsp->pos).chars = (dk4_um_t)1UL; 293 (tsp->pos).lineno = (dk4_um_t)1UL; 294 (tsp->pos).charil = (dk4_um_t)1UL; 295} 296 297 298 299/** Process a 32 bit character. 300 @param tsp Text stream processor. 301 @param chr Character to process. 302 @return DK4_TSP_RES_OK if the characters were processed 303 successfully, 304 DK4_TSP_RES_ERROR if there was an error but we can 305 continue, 306 DK4_TSP_RES_FATAL if there was a fatal error so 307 we should abort processing. 308*/ 309static 310int 311dk4tsp32_process_character( 312 dk4_tsp32_t *tsp, 313 dk4_c32_t chr 314) 315{ 316 int back = DK4_TSP_RES_FATAL; 317 $? "+ dk4tsp32_process_character" 318#if DK4_USE_ASSERT 319 assert(NULL != tsp); 320#endif 321 /* Increase position */ 322 (tsp->pos).chars += (dk4_um_t)1UL; 323 (tsp->pos).charil += (dk4_um_t)1UL; 324 /* Check for line buffering or direct processing */ 325 if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) { 326 $? ". line buffering %lu %lu", (unsigned long)(tsp->in_us), (unsigned long)(tsp->in_sz) 327 if (tsp->in_us < tsp->in_sz) { $? ". used size < buffer size" 328 (tsp->inbuf)[tsp->in_us] = chr; 329 tsp->in_us += 1; 330 back = DK4_TSP_RES_OK; 331 if ((dk4_c32_t)'\n' == chr) { 332 back = DK4_TSP_RES_FATAL; 333 if (tsp->in_us < tsp->in_sz) { 334 (tsp->inbuf)[tsp->in_us] = (dk4_c32_t)0UL; 335 back = (*((tsp->fct).lh))( 336 tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr) 337 ); 338 } else { 339 dk4error_set_with_position( 340 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL, 341 (tsp->pos).bytes, (tsp->pos).lineno, 342 (tsp->pos).chars, (tsp->pos).charil 343 ); 344 } 345 /* 2015-07-18 346 Bugfix: We must reset the number of used characters to 0 347 after processing and flushing the line buffer. 348 */ 349 tsp->in_us = 0; 350 } 351 } else { $? "! line buffer full" 352 dk4error_set_with_position( 353 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL, 354 (tsp->pos).bytes, (tsp->pos).lineno, 355 (tsp->pos).chars, (tsp->pos).charil 356 ); 357 } 358 } else { 359 $? ". direct char processing" 360 if (NULL != (tsp->fct).ch) { 361 back = (*((tsp->fct).ch))(tsp->obj, chr, &(tsp->pos), &(tsp->er_pr)); 362 } 363 } 364 if ((dk4_c32_t)('\n') == chr) { 365 (tsp->pos).lineno += (dk4_um_t)1UL; 366 (tsp->pos).charil = (dk4_um_t)1UL; 367 } 368 $? "- dk4tsp32_process_character %d", back 369 return back; 370} 371 372 373 374/** Normal processing for one byte. 375 Retrieve a 32 bit character first, either by decoding directly 376 or by adding to a decoder. 377 @param tsp Text stream processor. 378 @param inbyte Byte to process. 379 @return Operation result, one from DK4_TSP_RES_OK, 380 DK4_TSP_RES_ERROR or DK4_TSP_RES_FATAL. 381*/ 382static 383int 384dk4tsp32_process_byte( 385 dk4_tsp32_t *tsp, 386 unsigned char inbyte 387) 388{ 389 dk4_c32_t c32 = (dk4_c32_t)0UL; /* 32 bit character */ 390 int back = DK4_TSP_RES_FATAL; /* Function result */ 391 int cuc32 = 0; /* Flag: Have 32 bit char */ 392 int res = 0; /* Operation result */ 393#if DK4_USE_ASSERT 394 assert(NULL != tsp); 395#endif 396 switch (tsp->ief) { 397 case DK4_FILE_ENCODING_PLAIN: { 398 c32 = (dk4_c32_t)inbyte; 399 cuc32 = 1; 400 } break; 401 case DK4_FILE_ENCODING_WIN1252: { 402 if (0 != dk4ansi_decode(&c32, inbyte)) { 403 cuc32 = 1; 404 } else { 405 dk4error_set_with_position( 406 &(tsp->er_en), DK4_E_DECODING_FAILED, 407 (tsp->pos).bytes, (tsp->pos).lineno, 408 (tsp->pos).chars, (tsp->pos).charil 409 ); 410 } 411 } break; 412 case DK4_FILE_ENCODING_UTF8: { 413 res = dk4utf8_add(&((tsp->dec).u08), inbyte); 414 switch (res) { 415 case DK4_EDSTM_ERROR: { 416 dk4error_set_with_position( 417 &(tsp->er_en), DK4_E_DECODING_FAILED, 418 (tsp->pos).bytes, (tsp->pos).lineno, 419 (tsp->pos).chars, (tsp->pos).charil 420 ); 421 } break; 422 case DK4_EDSTM_FINISHED: { 423 c32 = dk4utf8_get(&((tsp->dec).u08)); 424 cuc32 = 1; 425 dk4utf8_init(&((tsp->dec).u08)); 426 } break; 427 case DK4_EDSTM_ACCEPT: { 428 back = DK4_TSP_RES_OK; 429 } break; 430 } 431 } break; 432 case DK4_FILE_ENCODING_UTF16_LE: { 433 res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte); 434 switch (res) { 435 case DK4_EDSTM_ERROR: { 436 dk4error_set_with_position( 437 &(tsp->er_en), DK4_E_DECODING_FAILED, 438 (tsp->pos).bytes, (tsp->pos).lineno, 439 (tsp->pos).chars, (tsp->pos).charil 440 ); 441 } break; 442 case DK4_EDSTM_FINISHED: { 443 c32 = dk4utf16_byte_get(&((tsp->dec).u16)); 444 cuc32 = 1; 445 dk4utf16_byte_init(&((tsp->dec).u16), 0); 446 } break; 447 case DK4_EDSTM_ACCEPT: { 448 back = DK4_TSP_RES_OK; 449 } break; 450 } 451 } break; 452 case DK4_FILE_ENCODING_UTF16_BE: { 453 res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte); 454 switch (res) { 455 case DK4_EDSTM_ERROR: { 456 dk4error_set_with_position( 457 &(tsp->er_en), DK4_E_DECODING_FAILED, 458 (tsp->pos).bytes, (tsp->pos).lineno, 459 (tsp->pos).chars, (tsp->pos).charil 460 ); 461 } break; 462 case DK4_EDSTM_FINISHED: { 463 c32 = dk4utf16_byte_get(&((tsp->dec).u16)); 464 cuc32 = 1; 465 dk4utf16_byte_init(&((tsp->dec).u16), 1); 466 } break; 467 case DK4_EDSTM_ACCEPT: { 468 back = DK4_TSP_RES_OK; 469 } break; 470 } 471 } break; 472 case DK4_FILE_ENCODING_32_LE: { 473 res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte); 474 switch (res) { 475 case DK4_EDSTM_ERROR: { 476 dk4error_set_with_position( 477 &(tsp->er_en), DK4_E_DECODING_FAILED, 478 (tsp->pos).bytes, (tsp->pos).lineno, 479 (tsp->pos).chars, (tsp->pos).charil 480 ); 481 } break; 482 case DK4_EDSTM_FINISHED: { 483 c32 = dk4c32_decoder_get(&((tsp->dec).c32)); 484 cuc32 = 1; 485 dk4c32_decoder_init(&((tsp->dec).c32), 0); 486 } break; 487 case DK4_EDSTM_ACCEPT: { 488 back = DK4_TSP_RES_OK; 489 } break; 490 } 491 } break; 492 case DK4_FILE_ENCODING_32_BE: { 493 res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte); 494 switch (res) { 495 case DK4_EDSTM_ERROR: { 496 dk4error_set_with_position( 497 &(tsp->er_en), DK4_E_DECODING_FAILED, 498 (tsp->pos).bytes, (tsp->pos).lineno, 499 (tsp->pos).chars, (tsp->pos).charil 500 ); 501 } break; 502 case DK4_EDSTM_FINISHED: { 503 c32 = dk4c32_decoder_get(&((tsp->dec).c32)); 504 cuc32 = 1; 505 dk4c32_decoder_init(&((tsp->dec).c32), 1); 506 } break; 507 case DK4_EDSTM_ACCEPT: { 508 back = DK4_TSP_RES_OK; 509 } break; 510 } 511 } break; 512 } 513 if (0 != cuc32) { 514 back = dk4tsp32_process_character(tsp, c32); 515 } 516 if (DK4_TSP_RES_FATAL == back) { 517 tsp->pst = 2; 518 } 519 return back; 520} 521 522 523 524/** Initialize decoder for found input encoding. 525 @param tsp Text stream processor. 526*/ 527static 528void 529dk4tsp32_initialize_decoder(dk4_tsp32_t *tsp) 530{ 531#if DK4_USE_ASSERT 532 assert(NULL != tsp); 533#endif 534 switch (tsp->ief) { 535 case DK4_FILE_ENCODING_UTF8: { 536 dk4utf8_init(&((tsp->dec).u08)); 537 } break; 538 case DK4_FILE_ENCODING_UTF16_LE: { 539 dk4utf16_byte_init(&((tsp->dec).u16), 0); 540 } break; 541 case DK4_FILE_ENCODING_UTF16_BE: { 542 dk4utf16_byte_init(&((tsp->dec).u16), 1); 543 } break; 544 case DK4_FILE_ENCODING_32_LE: { 545 dk4c32_decoder_init(&((tsp->dec).c32), 0); 546 } break; 547 case DK4_FILE_ENCODING_32_BE: { 548 dk4c32_decoder_init(&((tsp->dec).c32), 1); 549 } break; 550 } 551} 552 553 554 555#if TRACE_DEBUG 556static unsigned long byte_number = 0UL; 557#endif 558 559/** Add one single byte. 560 @param tsp Text stream processor. 561 @param inbyte Byte to process. 562 @return DK4_TSP_RES_OK if the character was processed 563 successfully, 564 DK4_TSP_RES_ERROR if there was an error but we can 565 continue, 566 DK4_TSP_RES_FATAL if there was a fata error so we 567 should abort processing. 568*/ 569static 570int 571dk4tsp32_i_add_one_byte( 572 dk4_tsp32_t *tsp, 573 unsigned char inbyte 574) 575{ 576 size_t nrej; /* Number of unused bytes */ 577 size_t i; /* Current unused byte index */ 578 int back = DK4_TSP_RES_FATAL; /* Function result */ 579 int res; /* Operation result */ 580 unsigned char uc; /* Current unused byte */ 581 $? "+ dk4tsp32_i_add_one_byte index=%lu '%!8c' 0x%02x", byte_number++, inbyte, (unsigned)inbyte 582#if DK4_USE_ASSERT 583 assert(NULL != tsp); 584#endif 585 (tsp->pos).bytes += (dk4_um_t)1UL; 586 switch (tsp->pst) { 587 case 0: { 588 res = dk4bom_detect_add(&(tsp->bomd), inbyte); 589 switch (res) { 590 case DK4_EDSTM_ACCEPT: { 591 back = DK4_TSP_RES_OK; 592 } break; 593 case DK4_EDSTM_FINISHED: case DK4_EDSTM_FINISHED_WITH_UNUSED: { 594 back = DK4_TSP_RES_OK; 595 tsp->ief = dk4bom_detect_get_encoding(&(tsp->bomd)); 596 dk4tsp32_initialize_decoder(tsp); 597 tsp->pst = 1; 598 if (DK4_EDSTM_FINISHED_WITH_UNUSED == res) { 599 nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd)); 600 if (0 < nrej) { 601 for (i = 0; ((i < nrej) && (DK4_TSP_RES_FATAL != back)); i++) { 602 uc = dk4bom_detect_unused_byte(&(tsp->bomd), i); 603 switch (dk4tsp32_process_byte(tsp, uc)) { 604 case DK4_TSP_RES_FATAL: { 605 back = DK4_TSP_RES_FATAL; 606 } break; 607 case DK4_TSP_RES_ERROR: { 608 if (DK4_TSP_RES_OK == back) { 609 back = DK4_TSP_RES_ERROR; 610 } 611 } break; 612 } 613 } 614 } 615 } 616 } break; 617 } 618 } break; 619 case 1: { 620 back = dk4tsp32_process_byte(tsp, inbyte); 621 } break; 622 /* 623 Processing stage 2 indicates there was a serious error 624 before, we must skip further processing. 625 This situation is covered by the initialization value 626 DK4_TSP_RES_FATAL, so we do not need a case branch here. 627 */ 628 } 629 $? "- dk4tsp32_i_add_one_byte %d", back 630 return back; 631} 632 633 634 635int 636dk4tsp32_setup_char( 637 dk4_tsp32_t *tsp, 638 void *obj, 639 dk4_c32_handler_t *fct, 640 int eie, 641 dk4_er_t *erp 642) 643{ 644 int back = 0; 645#if DK4_USE_ASSERT 646 assert(NULL != tsp); 647#endif 648 if (NULL != tsp) { 649 dk4tsp32_init(tsp, eie); 650 if (NULL != fct) { 651 tsp->obj = obj; 652 (tsp->fct).ch = fct; 653 tsp->iee = eie; 654 tsp->ief = eie; 655 tsp->pst = 0; 656 back = 1; 657 } else { 658 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS); 659 tsp->pst = 2; 660 } 661 } else { 662 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS); 663 } 664 return back; 665} 666 667 668 669int 670dk4tsp32_setup_line( 671 dk4_tsp32_t *tsp, 672 void *obj, 673 dk4_c32_line_handler_t *fct, 674 dk4_c32_t *inbuf, 675 size_t szin, 676 int eie, 677 dk4_er_t *erp 678) 679{ 680 int back = 0; 681#if DK4_USE_ASSERT 682 assert(NULL != tsp); 683 assert(NULL != inbuf); 684 assert(0 < szin); 685#endif 686 if (NULL != tsp) { 687 dk4tsp32_init(tsp, eie); 688 if ((NULL != fct) && (NULL != inbuf) && (0 < szin)) { 689 tsp->obj = obj; 690 (tsp->fct).lh = fct; 691 tsp->inbuf = inbuf; 692 tsp->in_sz = szin; 693 tsp->in_us = 0; 694 tsp->iee = eie; 695 tsp->ief = eie; 696 tsp->pst = 0; 697 back = 1; 698 } else { 699 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS); 700 tsp->pst = 2; 701 } 702 } else { 703 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS); 704 } 705 return back; 706} 707 708 709 710int 711dk4tsp32_add_one_byte( 712 dk4_tsp32_t *tsp, 713 unsigned char inbyte 714) 715{ 716 int back = DK4_TSP_RES_FATAL; 717 $? "+ dk4tsp32_add_one_byte" 718#if DK4_USE_ASSERT 719 assert(NULL != tsp); 720#endif 721 if (NULL != tsp) { 722 if (2 > tsp->pst) { 723 back = dk4tsp32_i_add_one_byte(tsp, inbyte); 724 } 725 } 726 $? "- dk4tsp32_add_one_byte %d", back 727 return back; 728} 729 730 731 732int 733dk4tsp32_add_bytes( 734 dk4_tsp32_t *tsp, 735 const unsigned char *buffer, 736 size_t sz 737) 738{ 739 int back = DK4_TSP_RES_FATAL; /* Function result */ 740 int res; /* Processing result */ 741 $? "+ dk4tsp32_add_bytes" 742#if DK4_USE_ASSERT 743 assert(NULL != tsp); 744 assert(NULL != buffer); 745 assert(0 < sz); 746#endif 747 if ((NULL != tsp) && (NULL != buffer) && (0 < sz)) { 748 if (2 > tsp->pst) { 749 back = DK4_TSP_RES_OK; 750 while ((sz--) && (2 > tsp->pst)) { 751 res = dk4tsp32_i_add_one_byte(tsp, *(buffer++)); 752 switch (res) { 753 case DK4_TSP_RES_FATAL: { 754 back = DK4_TSP_RES_FATAL; 755 } break; 756 case DK4_TSP_RES_ERROR: { 757 if (DK4_TSP_RES_OK == back) { 758 back = DK4_TSP_RES_ERROR; 759 } 760 } break; 761 } 762 } 763 } 764 } 765 $? "- dk4tsp32_add_bytes %d", back 766 return back; 767} 768 769 770 771int 772dk4tsp32_finish(dk4_tsp32_t *tsp) 773{ 774 size_t nrej = 0; /* Number of unused bytes */ 775 size_t i = 0; /* Current unused byte index */ 776 int back = DK4_TSP_RES_FATAL; /* Function result */ 777 int res; /* Operation result */ 778 unsigned char uc; /* Current unused byte */ 779 $? "+ dk4tsp32_finish" 780#if DK4_USE_ASSERT 781 assert(NULL != tsp); 782#endif 783 if (NULL != tsp) { 784 /* Flush all unprocessed output 785 1. If there are unprocessed bytes in the BOM detector as BOM 786 detection was not completed, process these bytes. 787 2. If there are characters in the line buffer, 788 finalize the line buffer text and process it. 789 */ 790 back = DK4_TSP_RES_OK; 791 /* Retrieve an process bytes stored in BOM detector */ 792 if (0 == tsp->pst) { 793 tsp->pst = 1; 794 nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd)); 795 if (0 < nrej) { 796 dk4tsp32_initialize_decoder(tsp); 797 for (i = 0; ((i < nrej) && (2 > tsp->pst)); i++) { 798 uc = dk4bom_detect_unused_byte(&(tsp->bomd), i); 799 switch (dk4tsp32_process_byte(tsp, uc)) { 800 case DK4_TSP_RES_FATAL: { 801 back = DK4_TSP_RES_FATAL; 802 tsp->pst = 2; 803 } break; 804 case DK4_TSP_RES_ERROR: { 805 if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; } 806 } break; 807 } 808 } 809 } 810 } 811 /* Process final line */ 812 if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) { 813 if (0 < tsp->in_us) { 814 if (2 > tsp->pst) { 815 if (tsp->in_us < tsp->in_sz) { 816 (tsp->inbuf)[tsp->in_us] = (dk4_c32_t)0UL; 817 res = 818 (*((tsp->fct).lh))( 819 tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr) 820 ); 821 switch (res) { 822 case DK4_TSP_RES_FATAL: { 823 back = DK4_TSP_RES_FATAL; 824 tsp->pst = 2; 825 } break; 826 case DK4_TSP_RES_ERROR: { 827 if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; } 828 } break; 829 } 830 } else { 831 back = DK4_TSP_RES_FATAL; 832 tsp->pst = 2; 833 dk4error_set_with_position( 834 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL, 835 (tsp->pos).bytes, (tsp->pos).lineno, 836 (tsp->pos).chars, (tsp->pos).charil 837 ); 838 } 839 } 840 } 841 } 842 } 843 $? "- dk4tsp32_finish %d", back 844 return back; 845} 846 847 848 849void 850dk4tsp32_get_errors(dk4_er_t *er_en, dk4_er_t *er_pr, dk4_tsp32_t const *tsp) 851{ 852#if DK4_USE_ASSERT 853 assert(NULL != tsp); 854#endif 855 if (NULL != tsp) { 856 if (NULL != er_en) { 857 DK4_MEMCPY(er_en, &(tsp->er_en), sizeof(dk4_er_t)); 858 } 859 if (NULL != er_pr) { 860 DK4_MEMCPY(er_pr, &(tsp->er_pr), sizeof(dk4_er_t)); 861 } 862 } 863} 864 865 866 867