1%% options 2 3copyright owner = Dirk Krause 4copyright year = 2015-xxxx 5SPDX-License-Identifier: BSD-3-Clause 6 7 8%% header 9 10/** @file 11 Text stream processing for 16 bit characters. 12 13 CRT on Windows: Optional. 14*/ 15 16#ifndef DK4CONF_H_INCLUDED 17#if DK4_BUILDING_DKTOOLS4 18#include "dk4conf.h" 19#else 20#include <dktools-4/dk4conf.h> 21#endif 22#endif 23 24#ifndef DK4TYPES_H_INCLUDED 25#if DK4_BUILDING_DKTOOLS4 26#include <libdk4base/dk4types.h> 27#else 28#include <dktools-4/dk4types.h> 29#endif 30#endif 31 32#ifndef DK4ERROR_H_INCLUDED 33#if DK4_BUILDING_DKTOOLS4 34#include <libdk4base/dk4error.h> 35#else 36#include <dktools-4/dk4error.h> 37#endif 38#endif 39 40#ifndef DK4BOM_H_INCLUDED 41#if DK4_BUILDING_DKTOOLS4 42#include <libdk4c/dk4bom.h> 43#else 44#include <dktools-4/dk4bom.h> 45#endif 46#endif 47 48#ifndef DK4TSP_H_INCLUDED 49#if DK4_BUILDING_DKTOOLS4 50#include <libdk4c/dk4tsp.h> 51#else 52#include <dktools-4/dk4tsp.h> 53#endif 54#endif 55 56#ifndef DK4UTF8_H_INCLUDED 57#if DK4_BUILDING_DKTOOLS4 58#include <libdk4c/dk4utf8.h> 59#else 60#include <dktools-4/dk4utf8.h> 61#endif 62#endif 63 64#ifndef DK4UTF16_H_INCLUDED 65#if DK4_BUILDING_DKTOOLS4 66#include <libdk4c/dk4utf16.h> 67#else 68#include <dktools-4/dk4utf16.h> 69#endif 70#endif 71 72#ifndef DK4C32_H_INCLUDED 73#if DK4_BUILDING_DKTOOLS4 74#include <libdk4c/dk4c32.h> 75#else 76#include <dktools-4/dk4c32.h> 77#endif 78#endif 79 80/** Handler function for single characters. 81 @param obj Object to modify while processing the character. 82 @param c Character to process. 83 @param pos Current position in file or data stream. 84 @param erp Error report, may be NULL. 85 @return DK4_TSP_RES_OK if the character was processed 86 successfully, 87 DK4_TSP_RES_ERROR if there was an error but we can 88 continue, 89 DK4_TSP_RES_FATAL if there was a fatal error so we 90 should abort processing. 91*/ 92typedef int dk4_c16_handler_t( 93 void *obj, 94 dk4_c16_t c, 95 dk4_text_stream_position_t *pos, 96 dk4_er_t *erp 97); 98 99/** Handler function for text lines. 100 @param obj Object to modify while processing the character. 101 @param line Text line to process. 102 @param lineno Current line number. 103 @param erp Error report, may be NULL. 104 @return DK4_TSP_RES_OK if the character was processed 105 successfully, 106 DK4_TSP_RES_ERROR if there was an error but we can 107 continue, 108 DK4_TSP_RES_FATAL if there was a fata error so we 109 should abort processing. 110*/ 111typedef int dk4_c16_line_handler_t( 112 void *obj, 113 dk4_c16_t *line, 114 dk4_um_t lineno, 115 dk4_er_t *erp 116); 117 118/** Structure for 16 bit character processing. 119*/ 120typedef struct { 121 union { 122 dk4_utf8_decoder_t u08; /**< UTF-8 decoder. */ 123 dk4_utf16_byte_decoder_t u16; /**< UTF-16 decoder. */ 124 dk4_c32_byte_decoder_t c32; /**< 32 bit char decoder. */ 125 } dec; /**< Input decoder. */ 126 dk4_bom_detector_t bomd; /**< BOM detector. */ 127 dk4_text_stream_position_t pos; /**< Current position. */ 128 dk4_er_t er_en; /**< Errors in input decoding. */ 129 dk4_er_t er_pr; /**< Errors in processing. */ 130 union { 131 dk4_c16_line_handler_t *lh; /**< Handler function for lines. */ 132 dk4_c16_handler_t *ch; /**< Handler function for char. */ 133 } fct; /**< Handler function. */ 134 dk4_c16_t *inbuf; /**< Buffer for input line. */ 135 void *obj; /**< Object to modify in processing. */ 136 size_t in_sz; /**< Size of input line buffer. */ 137 size_t in_us; /**< Used bytes in input line buffer. */ 138 int ief; /**< Input encoding found. */ 139 int iee; /**< Input encoding expected. */ 140 int pst; /**< Processing stage. */ 141} dk4_tsp16_t; 142 143 144 145#ifdef __cplusplus 146extern "C" { 147#endif 148 149/** Set up processor for byte by byte processing. 150 @param tsp Processor to set up. 151 @param obj Object to modify when processing input, may be NULL. 152 @param fct Handler function to call for each character. 153 @param eie Expected input encoding. 154 @param erp Error report, may be NULL. 155 @return 1 on success, 0 on error. 156*/ 157int 158dk4tsp16_setup_char( 159 dk4_tsp16_t *tsp, 160 void *obj, 161 dk4_c16_handler_t *fct, 162 int eie, 163 dk4_er_t *erp 164); 165 166/** Set up processor for line processing. 167 @param tsp Processor to set up. 168 @param obj Object to modify when processing input, may be NULL. 169 @param fct Handler function to invoke for each line. 170 @param inbuf Input line buffer. 171 @param szin Size of input line buffer (number of dk4_c16_t). 172 @param eie Expected input encoding. 173 @param erp Error report, may be NULL. 174 @return 1 on success, 0 on error. 175*/ 176int 177dk4tsp16_setup_line( 178 dk4_tsp16_t *tsp, 179 void *obj, 180 dk4_c16_line_handler_t *fct, 181 dk4_c16_t *inbuf, 182 size_t szin, 183 int eie, 184 dk4_er_t *erp 185); 186 187/** Add one single byte. 188 @param tsp Text stream processor. 189 @param inbyte Byte to process. 190 @return DK4_TSP_RES_OK if the character was processed 191 successfully, 192 DK4_TSP_RES_ERROR if there was an error but we can 193 continue, 194 DK4_TSP_RES_FATAL if there was a fata error so we 195 should abort processing. 196*/ 197int 198dk4tsp16_add_one_byte( 199 dk4_tsp16_t *tsp, 200 unsigned char inbyte 201); 202 203/** Add multiple bytes. 204 @param tsp Text stream processor. 205 @param buffer Buffer start address. 206 @param sz Number of bytes in buffer. 207 @return DK4_TSP_RES_OK if the character was processed 208 successfully, 209 DK4_TSP_RES_ERROR if there was an error but we can 210 continue, 211 DK4_TSP_RES_FATAL if there was a fata error so we 212 should abort processing. 213*/ 214int 215dk4tsp16_add_bytes( 216 dk4_tsp16_t *tsp, 217 const unsigned char *buffer, 218 size_t sz 219); 220 221/** Finish processing. 222 @param tsp Text stream processor. 223 @return DK4_TSP_RES_OK if processing was finished 224 successfully, 225 DK4_TSP_RES_ERROR if there was an error, 226 DK4_TSP_RES_FATAL if there was a fatal error. 227*/ 228int 229dk4tsp16_finish(dk4_tsp16_t *tsp); 230 231/** Retrieve error reports for encoding/decoding and processing. 232 @param er_en Destination error report buffer for encoding/decoding. 233 @param er_pr Destination error report buffer for processing. 234 @param tsp Text stream processor to retrieve errors from. 235*/ 236void 237dk4tsp16_get_errors(dk4_er_t *er_en, dk4_er_t *er_pr, dk4_tsp16_t const *tsp); 238 239#ifdef __cplusplus 240} 241#endif 242 243 244 245%% module 246 247#include "dk4conf.h" 248#include <libdk4c/dk4tsp16.h> 249#include <libdk4c/dk4enc.h> 250#include <libdk4base/dk4mem.h> 251#include <libdk4c/dk4ansi.h> 252#include <libdk4c/dk4utf8.h> 253#include <libdk4c/dk4utf16.h> 254#include <libdk4c/dk4c32.h> 255 256#if DK4_HAVE_ASSERT_H 257#ifndef ASSERT_H_INCLUDED 258#include <assert.h> 259#define ASSERT_H_INCLUDED 1 260#endif 261#endif 262 263 264$!trace-include 265 266 267 268/** Initialize text stream processing structure. 269 @param tsp Text stream processor. 270 @param eie Expected input encoding. 271*/ 272static 273void 274dk4tsp16_init(dk4_tsp16_t *tsp, int eie) 275{ 276#if DK4_USE_ASSERT 277 assert(NULL != tsp); 278#endif 279 DK4_MEMRES(tsp, sizeof(dk4_tsp16_t)); 280 dk4bom_detect_init(&(tsp->bomd), eie); 281 dk4error_init(&(tsp->er_en)); 282 dk4error_init(&(tsp->er_pr)); 283 tsp->inbuf = NULL; 284 tsp->obj = NULL; 285 tsp->in_sz = 0; 286 tsp->in_us = 0; 287 tsp->ief = eie; 288 tsp->iee = eie; 289 tsp->pst = 0; 290 (tsp->pos).bytes = (dk4_um_t)0UL; 291 (tsp->pos).chars = (dk4_um_t)1UL; 292 (tsp->pos).lineno = (dk4_um_t)1UL; 293 (tsp->pos).charil = (dk4_um_t)1UL; 294} 295 296 297 298/** Initialize decoder for found input encoding. 299 @param tsp Text stream processor. 300*/ 301static 302void 303dk4tsp16_initialize_decoder(dk4_tsp16_t *tsp) 304{ 305#if DK4_USE_ASSERT 306 assert(NULL != tsp); 307#endif 308 switch (tsp->ief) { 309 case DK4_FILE_ENCODING_UTF8: { 310 dk4utf8_init(&((tsp->dec).u08)); 311 } break; 312 case DK4_FILE_ENCODING_UTF16_LE: { 313 dk4utf16_byte_init(&((tsp->dec).u16), 0); 314 } break; 315 case DK4_FILE_ENCODING_UTF16_BE: { 316 dk4utf16_byte_init(&((tsp->dec).u16), 1); 317 } break; 318 case DK4_FILE_ENCODING_32_LE: { 319 dk4c32_decoder_init(&((tsp->dec).c32), 0); 320 } break; 321 case DK4_FILE_ENCODING_32_BE: { 322 dk4c32_decoder_init(&((tsp->dec).c32), 1); 323 } break; 324 } 325} 326 327 328 329#if 0 330static 331int 332dk4tsp16_process_character( 333 dk4_tsp16_t *tsp, 334 dk4_c16_t chr 335) 336{ 337 int back = DK4_TSP_RES_FATAL; 338 $? "+ dk4tsp16_process_character %04x", (unsigned)chr 339 /* Increase position */ 340 (tsp->pos).chars += (dk4_um_t)1UL; 341 (tsp->pos).charil += (dk4_um_t)1UL; 342 /* Check for line buffering or direct processing */ 343 if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) { 344 $? ". line buffering" 345 if (tsp->in_us < tsp->in_sz) { 346 (tsp->inbuf)[tsp->in_us] = chr; 347 tsp->in_us += 1; 348 back = DK4_TSP_RES_OK; 349 if ((dk4_c16_t)'\n' == chr) { 350 back = DK4_TSP_RES_FATAL; 351 if (tsp->in_us < tsp->in_sz) { 352 (tsp->inbuf)[tsp->in_us] = (dk4_c16_t)'\0'; 353 back = (*((tsp->fct).lh))( 354 tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr) 355 ); 356 } else { 357 dk4error_set_with_position( 358 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL, 359 (tsp->pos).bytes, (tsp->pos).lineno, 360 (tsp->pos).chars, (tsp->pos).charil 361 ); 362 } 363 tsp->in_us = 0; 364 } 365 } else { 366 dk4error_set_with_position( 367 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL, 368 (tsp->pos).bytes, (tsp->pos).lineno, 369 (tsp->pos).chars, (tsp->pos).charil 370 ); 371 } 372 } else { 373 $? ". direct processing" 374 if (NULL != (tsp->fct).ch) { 375 back = (*((tsp->fct).ch))(tsp->obj, chr, &(tsp->pos), &(tsp->er_pr)); 376 } 377 } 378 /* Increase line number for newline characters */ 379 if ((dk4_c16_t)('\n') == chr) { 380 (tsp->pos).lineno += (dk4_um_t)1UL; 381 (tsp->pos).charil = (dk4_um_t)1UL; 382 } $? "- dk4tsp16_process_character %d", back 383 return back; 384} 385#endif 386 387 388 389/** Process a group of 16 bit characters, the group represents 390 one 32 bit character. 391 @param tsp Text stream processor. 392 @param buf Buffer of 16 bit characters. 393 @param sz Number of 16 bit characters. 394 @return DK4_TSP_RES_OK if the characters were processed 395 successfully, 396 DK4_TSP_RES_ERROR if there was an error but we can 397 continue, 398 DK4_TSP_RES_FATAL if there was a fatal error so 399 we should abort processing. 400*/ 401static 402int 403dk4tsp16_process_group(dk4_tsp16_t *tsp, dk4_c16_t *buf, size_t sz) 404{ 405 size_t i = 0; 406 int back = DK4_TSP_RES_FATAL; 407 int res = DK4_TSP_RES_FATAL; 408 $? "+ dk4tsp16_process_group" 409#if DK4_USE_ASSERT 410 assert(NULL != tsp); 411 assert(NULL != buf); 412 assert(0 < sz); 413#endif 414 /* Increase position */ 415 (tsp->pos).chars += (dk4_um_t)1UL; 416 (tsp->pos).charil += (dk4_um_t)1UL; 417 /* Check for line buffering or direct processing */ 418 if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) { 419 $? ". line buffering" 420 if (sz < tsp->in_sz) { 421 if (tsp->in_us < (tsp->in_sz - sz)) { 422 /* Append group to line and increase used size */ 423 DK4_MEMCPY(&((tsp->inbuf)[tsp->in_us]),buf,(sizeof(dk4_c16_t)*sz)); 424 tsp->in_us += sz; 425 back = DK4_TSP_RES_OK; 426 /* On newline, process the line buffer */ 427 if ((1 == sz) && ((dk4_c16_t)'\n' == buf[0])) { 428 back = DK4_TSP_RES_FATAL; 429 if (tsp->in_us < tsp->in_sz) { 430 (tsp->inbuf)[tsp->in_us] = (dk4_c16_t)'\0'; 431 back = (*((tsp->fct).lh))( 432 tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr) 433 ); 434 } else { 435 /* ERROR: Buffer too small */ 436 dk4error_set_with_position( 437 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL, 438 (tsp->pos).bytes, (tsp->pos).lineno, 439 (tsp->pos).chars, (tsp->pos).charil 440 ); 441 } 442 tsp->in_us = 0; 443 } 444 } else { 445 /* ERROR: Buffer too small */ 446 back = DK4_TSP_RES_FATAL; 447 dk4error_set_with_position( 448 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL, 449 (tsp->pos).bytes, (tsp->pos).lineno, 450 (tsp->pos).chars, (tsp->pos).charil 451 ); 452 } 453 } else { 454 /* ERROR: Buffer too small */ 455 back = DK4_TSP_RES_FATAL; 456 dk4error_set_with_position( 457 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL, 458 (tsp->pos).bytes, (tsp->pos).lineno, 459 (tsp->pos).chars, (tsp->pos).charil 460 ); 461 } 462 } else { 463 $? ". direct char processing" 464 if (NULL != (tsp->fct).ch) { 465 back = DK4_TSP_RES_OK; 466 for (i = 0; i < sz; i++) { 467 res = (*((tsp->fct).ch))(tsp->obj, buf[i], &(tsp->pos), &(tsp->er_pr)); 468 switch (res) { 469 case DK4_TSP_RES_FATAL: { 470 back = DK4_TSP_RES_FATAL; 471 } break; 472 case DK4_TSP_RES_ERROR: { 473 if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; } 474 } break; 475 } 476 } 477 } else { 478 dk4error_set_with_position( 479 &(tsp->er_en), DK4_E_INVALID_ARGUMENTS, 480 (tsp->pos).bytes, (tsp->pos).lineno, 481 (tsp->pos).chars, (tsp->pos).charil 482 ); 483 } 484 } 485 /* If we have a newline, increase line number and reset char in line */ 486 if (1 == sz) { 487 if ((dk4_c16_t)('\n') == buf[0]) { 488 (tsp->pos).lineno += (dk4_um_t)1UL; 489 (tsp->pos).charil = (dk4_um_t)1UL; 490 } 491 } 492 $? "- dk4tsp16_process_group %d", back 493 return back; 494} 495 496 497 498/** Normal processing for one byte. 499 Retrieve a 32 bit character first, either by decoding directly 500 or by adding to a decoder. In the next step encode the 32 bit 501 character in one or two 16 bit characters and process these. 502 @param tsp Text stream processor. 503 @param inbyte Byte to process. 504 @return Operation result, one from DK4_TSP_RES_OK, 505 DK4_TSP_RES_ERROR or DK4_TSP_RES_FATAL. 506*/ 507static 508int 509dk4tsp16_process_byte(dk4_tsp16_t *tsp, unsigned char inbyte) 510{ 511 dk4_c16_t buf[8]; 512 dk4_c32_t c32 = dkC32(0); 513 size_t sz; 514 int back = DK4_TSP_RES_FATAL; 515 int cuc32 = 0; 516 int res = 0; 517 /* Add byte to decoder, attempt to retrieve a 32 bit character */ 518#if DK4_USE_ASSERT 519 assert(NULL != tsp); 520#endif 521 switch (tsp->ief) { 522 case DK4_FILE_ENCODING_PLAIN: { 523 c32 = (dk4_c32_t)inbyte; 524 cuc32 = 1; 525 } break; 526 case DK4_FILE_ENCODING_WIN1252: { 527 if (0 != dk4ansi_decode(&c32, inbyte)) { 528 cuc32 = 1; 529 } else { 530 dk4error_set_with_position( 531 &(tsp->er_en), DK4_E_DECODING_FAILED, 532 (tsp->pos).bytes, (tsp->pos).lineno, 533 (tsp->pos).chars, (tsp->pos).charil 534 ); 535 } 536 } break; 537 case DK4_FILE_ENCODING_UTF8: { 538 res = dk4utf8_add(&((tsp->dec).u08), inbyte); 539 switch (res) { 540 case DK4_EDSTM_ERROR: { 541 dk4error_set_with_position( 542 &(tsp->er_en), DK4_E_DECODING_FAILED, 543 (tsp->pos).bytes, (tsp->pos).lineno, 544 (tsp->pos).chars, (tsp->pos).charil 545 ); 546 } break; 547 case DK4_EDSTM_FINISHED: { 548 c32 = dk4utf8_get(&((tsp->dec).u08)); 549 cuc32 = 1; 550 dk4utf8_init(&((tsp->dec).u08)); 551 } break; 552 case DK4_EDSTM_ACCEPT: { 553 back = DK4_TSP_RES_OK; 554 } break; 555 } 556 } break; 557 case DK4_FILE_ENCODING_UTF16_LE: { 558 res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte); 559 switch (res) { 560 case DK4_EDSTM_ERROR: { 561 dk4error_set_with_position( 562 &(tsp->er_en), DK4_E_DECODING_FAILED, 563 (tsp->pos).bytes, (tsp->pos).lineno, 564 (tsp->pos).chars, (tsp->pos).charil 565 ); 566 } break; 567 case DK4_EDSTM_FINISHED: { 568 c32 = dk4utf16_byte_get(&((tsp->dec).u16)); 569 cuc32 = 1; 570 dk4utf16_byte_init(&((tsp->dec).u16), 0); 571 } break; 572 case DK4_EDSTM_ACCEPT: { 573 back = DK4_TSP_RES_OK; 574 } break; 575 } 576 } break; 577 case DK4_FILE_ENCODING_UTF16_BE: { 578 res = dk4utf16_byte_add(&((tsp->dec).u16), inbyte); 579 switch (res) { 580 case DK4_EDSTM_ERROR: { 581 dk4error_set_with_position( 582 &(tsp->er_en), DK4_E_DECODING_FAILED, 583 (tsp->pos).bytes, (tsp->pos).lineno, 584 (tsp->pos).chars, (tsp->pos).charil 585 ); 586 } break; 587 case DK4_EDSTM_FINISHED: { 588 c32 = dk4utf16_byte_get(&((tsp->dec).u16)); 589 cuc32 = 1; 590 dk4utf16_byte_init(&((tsp->dec).u16), 1); 591 } break; 592 case DK4_EDSTM_ACCEPT: { 593 back = DK4_TSP_RES_OK; 594 } break; 595 } 596 } break; 597 case DK4_FILE_ENCODING_32_LE: { 598 res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte); 599 switch (res) { 600 case DK4_EDSTM_ERROR: { 601 dk4error_set_with_position( 602 &(tsp->er_en), DK4_E_DECODING_FAILED, 603 (tsp->pos).bytes, (tsp->pos).lineno, 604 (tsp->pos).chars, (tsp->pos).charil 605 ); 606 } break; 607 case DK4_EDSTM_FINISHED: { 608 c32 = dk4c32_decoder_get(&((tsp->dec).c32)); 609 cuc32 = 1; 610 dk4c32_decoder_init(&((tsp->dec).c32), 0); 611 } break; 612 case DK4_EDSTM_ACCEPT: { 613 back = DK4_TSP_RES_OK; 614 } break; 615 } 616 } break; 617 case DK4_FILE_ENCODING_32_BE: { 618 res = dk4c32_decoder_add(&((tsp->dec).c32), inbyte); 619 switch (res) { 620 case DK4_EDSTM_ERROR: { 621 dk4error_set_with_position( 622 &(tsp->er_en), DK4_E_DECODING_FAILED, 623 (tsp->pos).bytes, (tsp->pos).lineno, 624 (tsp->pos).chars, (tsp->pos).charil 625 ); 626 } break; 627 case DK4_EDSTM_FINISHED: { 628 c32 = dk4c32_decoder_get(&((tsp->dec).c32)); 629 cuc32 = 1; 630 dk4c32_decoder_init(&((tsp->dec).c32), 1); 631 } break; 632 case DK4_EDSTM_ACCEPT: { 633 back = DK4_TSP_RES_OK; 634 } break; 635 } 636 } break; 637 } 638 /* Process 32 bit character if we have a 32 bit char to process */ 639 if (0 != cuc32) { 640 sz = DK4_SIZEOF(buf,dk4_c16_t); 641 if (0 != dk4utf16_encode(buf, &sz, c32, NULL)) { 642#if 0 643 back = DK4_TSP_RES_OK; 644 for (i = 0; ((i < sz) && (DK4_TSP_RES_FATAL != back)); i++) { 645 switch (dk4tsp16_process_character(tsp, buf[i])) { 646 case DK4_TSP_RES_FATAL: { 647 back = DK4_TSP_RES_FATAL; 648 } break; 649 case DK4_TSP_RES_ERROR: { 650 if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; } 651 } break; 652 } 653 } 654#else 655 back = dk4tsp16_process_group(tsp, buf, sz); 656#endif 657 } else { 658 dk4error_set_with_position( 659 &(tsp->er_en), DK4_E_ENCODING_FAILED, 660 (tsp->pos).bytes, (tsp->pos).lineno, 661 (tsp->pos).chars, (tsp->pos).charil 662 ); 663 } 664 } 665 if (DK4_TSP_RES_FATAL == back) { 666 tsp->pst = 2; 667 } 668 return back; 669} 670 671 672 673/** Add one byte to internal data structures 674 (BOM detection and/or normal processing). 675 @param tsp Text stream processor. 676 @param inbyte Byte to add. 677 @return Operation result, one from DK4_TSP_RES_OK, 678 DK4_TSP_RES_ERROR or DK4_TSP_RES_FATAL. 679*/ 680static 681int 682dk4tsp16_i_add_one_byte( 683 dk4_tsp16_t *tsp, 684 unsigned char inbyte 685) 686{ 687 size_t nrej; 688 size_t i; 689 int back = DK4_TSP_RES_FATAL; 690 int res; 691 unsigned char uc; 692 $? "+ dk4tsp16_i_add_one_byte" 693#if DK4_USE_ASSERT 694 assert(NULL != tsp); 695#endif 696 (tsp->pos).bytes += (dk4_um_t)1UL; 697 switch (tsp->pst) { 698 case 0: { $? ". bom detection" 699 res = dk4bom_detect_add(&(tsp->bomd), inbyte); 700 switch (res) { 701 case DK4_EDSTM_ACCEPT: { 702 back = DK4_TSP_RES_OK; 703 } break; 704 case DK4_EDSTM_FINISHED: case DK4_EDSTM_FINISHED_WITH_UNUSED: { 705 /* Result is acceptable */ 706 back = DK4_TSP_RES_OK; 707 /* Retrieve found encoding */ 708 tsp->ief = dk4bom_detect_get_encoding(&(tsp->bomd)); 709 /* If necessary, initialize decoder */ 710 dk4tsp16_initialize_decoder(tsp); 711 /* Switch to normal processing */ 712 tsp->pst = 1; 713 /* Process bytes stored in BOM detector */ 714 if (DK4_EDSTM_FINISHED_WITH_UNUSED == res) { 715 nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd)); 716 if (0 < nrej) { 717 for (i = 0; ((i < nrej) && (DK4_TSP_RES_FATAL != back)); i++) { 718 uc = dk4bom_detect_unused_byte(&(tsp->bomd), i); 719 switch (dk4tsp16_process_byte(tsp, uc)) { 720 case DK4_TSP_RES_FATAL: { 721 back = DK4_TSP_RES_FATAL; 722 } break; 723 case DK4_TSP_RES_ERROR: { 724 if (DK4_TSP_RES_OK == back) { 725 back = DK4_TSP_RES_ERROR; 726 } 727 } break; 728 } 729 } 730 } 731 } 732 } break; 733 } 734 } break; 735 case 1: { $? ". normal processing" 736 back = dk4tsp16_process_byte(tsp, inbyte); 737 } break; 738 /* 739 Processing stage 2 indicates there was a serious error 740 before, we must skip further processing. 741 This situation is covered by the initialization value 742 DK4_TSP_RES_FATAL, so we do not need a case branch here. 743 */ 744 } 745 $? "- dk4tsp16_i_add_one_byte %d", back 746 return back; 747} 748 749 750 751int 752dk4tsp16_setup_char( 753 dk4_tsp16_t *tsp, 754 void *obj, 755 dk4_c16_handler_t *fct, 756 int eie, 757 dk4_er_t *erp 758) 759{ 760 int back = 0; 761#if DK4_USE_ASSERT 762 assert(NULL != tsp); 763#endif 764 if (NULL != tsp) { 765 dk4tsp16_init(tsp, eie); 766 if (NULL != fct) { 767 tsp->obj = obj; 768 (tsp->fct).ch = fct; 769 tsp->iee = eie; 770 tsp->ief = eie; 771 tsp->pst = 0; 772 back = 1; 773 } else { 774 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS); 775 tsp->pst = 2; 776 } 777 } else { 778 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS); 779 } 780 return back; 781} 782 783 784 785int 786dk4tsp16_setup_line( 787 dk4_tsp16_t *tsp, 788 void *obj, 789 dk4_c16_line_handler_t *fct, 790 dk4_c16_t *inbuf, 791 size_t szin, 792 int eie, 793 dk4_er_t *erp 794) 795{ 796 int back = 0; 797#if DK4_USE_ASSERT 798 assert(NULL != tsp); 799 assert(NULL != inbuf); 800 assert(0 < szin); 801#endif 802 if (NULL != tsp) { 803 dk4tsp16_init(tsp, eie); 804 if ((NULL != fct) && (NULL != inbuf) && (0 < szin)) { 805 tsp->obj = obj; 806 (tsp->fct).lh = fct; 807 tsp->inbuf = inbuf; 808 tsp->in_sz = szin; 809 tsp->in_us = 0; 810 tsp->iee = eie; 811 tsp->ief = eie; 812 tsp->pst = 0; 813 back = 1; 814 } else { 815 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS); 816 tsp->pst = 2; 817 } 818 } else { 819 dk4error_set_simple_error_code(erp, DK4_E_INVALID_ARGUMENTS); 820 } 821 return back; 822} 823 824 825 826int 827dk4tsp16_add_one_byte( 828 dk4_tsp16_t *tsp, 829 unsigned char inbyte 830) 831{ 832 int back = DK4_TSP_RES_FATAL; 833#if DK4_USE_ASSERT 834 assert(NULL != tsp); 835#endif 836 if (NULL != tsp) { 837 if (2 > tsp->pst) { 838 back = dk4tsp16_i_add_one_byte(tsp, inbyte); 839 } 840 } 841 return back; 842} 843 844 845 846int 847dk4tsp16_add_bytes( 848 dk4_tsp16_t *tsp, 849 const unsigned char *buffer, 850 size_t sz 851) 852{ 853 int back = DK4_TSP_RES_FATAL; /* Function result */ 854 int res; /* Processing result */ 855#if DK4_USE_ASSERT 856 assert(NULL != tsp); 857 assert(NULL != buffer); 858 assert(0 < sz); 859#endif 860 if ((NULL != tsp) && (NULL != buffer) && (0 < sz)) { 861 if (2 > tsp->pst) { 862 back = DK4_TSP_RES_OK; 863 while ((sz--) && (2 > tsp->pst)) { 864 res = dk4tsp16_i_add_one_byte(tsp, *(buffer++)); 865 switch (res) { 866 case DK4_TSP_RES_FATAL: { 867 back = DK4_TSP_RES_FATAL; 868 } break; 869 case DK4_TSP_RES_ERROR: { 870 if (DK4_TSP_RES_OK == back) { 871 back = DK4_TSP_RES_ERROR; 872 } 873 } break; 874 } 875 } 876 } 877 } 878 return back; 879} 880 881 882 883int 884dk4tsp16_finish(dk4_tsp16_t *tsp) 885{ 886 size_t nrej = 0; /* Number of unused bytes */ 887 size_t i = 0; /* Current unused byte index */ 888 int back = DK4_TSP_RES_FATAL; /* Function result */ 889 int res; /* Operation result */ 890 unsigned char uc; /* Current unused byte */ 891#if DK4_USE_ASSERT 892 assert(NULL != tsp); 893#endif 894 if (NULL != tsp) { 895 /* Flush all unprocessed output 896 1. If there are unprocessed bytes in the BOM detector as BOM 897 detection was not completed, process these bytes. 898 2. If there are characters in the line buffer, 899 finalize the line buffer text and process it. 900 */ 901 back = DK4_TSP_RES_OK; 902 /* Retrieve an process bytes stored in BOM detector */ 903 if (0 == tsp->pst) { 904 tsp->pst = 1; 905 nrej = dk4bom_detect_num_unused_bytes(&(tsp->bomd)); 906 if (0 < nrej) { 907 dk4tsp16_initialize_decoder(tsp); 908 for (i = 0; ((i < nrej) && (2 > tsp->pst)); i++) { 909 uc = dk4bom_detect_unused_byte(&(tsp->bomd), i); 910 switch (dk4tsp16_process_byte(tsp, uc)) { 911 case DK4_TSP_RES_FATAL: { 912 back = DK4_TSP_RES_FATAL; 913 tsp->pst = 2; 914 } break; 915 case DK4_TSP_RES_ERROR: { 916 if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; } 917 } break; 918 } 919 } 920 } 921 } 922 /* Process final line */ 923 if ((NULL != tsp->inbuf) && (0 < tsp->in_sz) && (NULL != (tsp->fct).lh)) { 924 if (0 < tsp->in_us) { 925 if (2 > tsp->pst) { 926 if (tsp->in_us < tsp->in_sz) { 927 (tsp->inbuf)[tsp->in_us] = (dk4_c16_t)'\0'; 928 res = 929 (*((tsp->fct).lh))( 930 tsp->obj,tsp->inbuf,(tsp->pos).lineno,&(tsp->er_pr) 931 ); 932 switch (res) { 933 case DK4_TSP_RES_FATAL: { 934 back = DK4_TSP_RES_FATAL; 935 tsp->pst = 2; 936 } break; 937 case DK4_TSP_RES_ERROR: { 938 if (DK4_TSP_RES_OK == back) { back = DK4_TSP_RES_ERROR; } 939 } break; 940 } 941 } else { 942 back = DK4_TSP_RES_FATAL; 943 tsp->pst = 2; 944 dk4error_set_with_position( 945 &(tsp->er_en), DK4_E_BUFFER_TOO_SMALL, 946 (tsp->pos).bytes, (tsp->pos).lineno, 947 (tsp->pos).chars, (tsp->pos).charil 948 ); 949 } 950 } 951 } 952 } 953 } 954 return back; 955} 956 957 958 959void 960dk4tsp16_get_errors(dk4_er_t *er_en, dk4_er_t *er_pr, dk4_tsp16_t const *tsp) 961{ 962#if DK4_USE_ASSERT 963 assert(NULL != tsp); 964#endif 965 if (NULL != tsp) { 966 if (NULL != er_en) { 967 DK4_MEMCPY(er_en, &(tsp->er_en), sizeof(dk4_er_t)); 968 } 969 if (NULL != er_pr) { 970 DK4_MEMCPY(er_pr, &(tsp->er_pr), sizeof(dk4_er_t)); 971 } 972 } 973} 974 975 976