1 /*
2 * UTF-8 string functions
3 *
4 * Copyright (C) 2008-2020, Joachim Metz <joachim.metz@gmail.com>
5 *
6 * Refer to AUTHORS for acknowledgements.
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this program. If not, see <https://www.gnu.org/licenses/>.
20 */
21
22 #include <common.h>
23 #include <types.h>
24
25 #include "libuna_definitions.h"
26 #include "libuna_libcerror.h"
27 #include "libuna_scsu.h"
28 #include "libuna_types.h"
29 #include "libuna_unicode_character.h"
30 #include "libuna_utf8_string.h"
31
32 /* Determines the size of an UTF-8 string from a byte stream
33 * Returns 1 if successful or -1 on error
34 */
libuna_utf8_string_size_from_byte_stream(const uint8_t * byte_stream,size_t byte_stream_size,int codepage,size_t * utf8_string_size,libcerror_error_t ** error)35 int libuna_utf8_string_size_from_byte_stream(
36 const uint8_t *byte_stream,
37 size_t byte_stream_size,
38 int codepage,
39 size_t *utf8_string_size,
40 libcerror_error_t **error )
41 {
42 static char *function = "libuna_utf8_string_size_from_byte_stream";
43 size_t byte_stream_index = 0;
44 libuna_unicode_character_t unicode_character = 0;
45
46 if( byte_stream == NULL )
47 {
48 libcerror_error_set(
49 error,
50 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
51 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
52 "%s: invalid byte stream.",
53 function );
54
55 return( -1 );
56 }
57 if( byte_stream_size > (size_t) SSIZE_MAX )
58 {
59 libcerror_error_set(
60 error,
61 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
62 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
63 "%s: invalid byte stream size value exceeds maximum.",
64 function );
65
66 return( -1 );
67 }
68 if( utf8_string_size == NULL )
69 {
70 libcerror_error_set(
71 error,
72 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
73 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
74 "%s: invalid UTF-8 string size.",
75 function );
76
77 return( -1 );
78 }
79 *utf8_string_size = 0;
80
81 if( byte_stream_size == 0 )
82 {
83 return( 1 );
84 }
85 while( byte_stream_index < byte_stream_size )
86 {
87 /* Convert the byte stream bytes into an Unicode character
88 */
89 if( libuna_unicode_character_copy_from_byte_stream(
90 &unicode_character,
91 byte_stream,
92 byte_stream_size,
93 &byte_stream_index,
94 codepage,
95 error ) != 1 )
96 {
97 libcerror_error_set(
98 error,
99 LIBCERROR_ERROR_DOMAIN_CONVERSION,
100 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
101 "%s: unable to copy Unicode character from byte stream.",
102 function );
103
104 return( -1 );
105 }
106 /* Determine how many UTF-8 character bytes are required
107 */
108 if( libuna_unicode_character_size_to_utf8(
109 unicode_character,
110 utf8_string_size,
111 error ) != 1 )
112 {
113 libcerror_error_set(
114 error,
115 LIBCERROR_ERROR_DOMAIN_CONVERSION,
116 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
117 "%s: unable to unable to determine size of Unicode character in UTF-8.",
118 function );
119
120 return( -1 );
121 }
122 if( unicode_character == 0 )
123 {
124 break;
125 }
126 }
127 /* Check if the string is terminated with an end-of-string character
128 */
129 if( unicode_character != 0 )
130 {
131 *utf8_string_size += 1;
132 }
133 return( 1 );
134 }
135
136 /* Copies an UTF-8 string from a byte stream
137 * Returns 1 if successful or -1 on error
138 */
libuna_utf8_string_copy_from_byte_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * byte_stream,size_t byte_stream_size,int codepage,libcerror_error_t ** error)139 int libuna_utf8_string_copy_from_byte_stream(
140 libuna_utf8_character_t *utf8_string,
141 size_t utf8_string_size,
142 const uint8_t *byte_stream,
143 size_t byte_stream_size,
144 int codepage,
145 libcerror_error_t **error )
146 {
147 static char *function = "libuna_utf8_string_copy_from_byte_stream";
148 size_t utf8_string_index = 0;
149
150 if( libuna_utf8_string_with_index_copy_from_byte_stream(
151 utf8_string,
152 utf8_string_size,
153 &utf8_string_index,
154 byte_stream,
155 byte_stream_size,
156 codepage,
157 error ) != 1 )
158 {
159 libcerror_error_set(
160 error,
161 LIBCERROR_ERROR_DOMAIN_RUNTIME,
162 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
163 "%s: unable to copy byte stream to UTF-8 string.",
164 function );
165
166 return( -1 );
167 }
168 return( 1 );
169 }
170
171 /* Copies an UTF-8 string from a byte stream
172 * Returns 1 if successful or -1 on error
173 */
libuna_utf8_string_with_index_copy_from_byte_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const uint8_t * byte_stream,size_t byte_stream_size,int codepage,libcerror_error_t ** error)174 int libuna_utf8_string_with_index_copy_from_byte_stream(
175 libuna_utf8_character_t *utf8_string,
176 size_t utf8_string_size,
177 size_t *utf8_string_index,
178 const uint8_t *byte_stream,
179 size_t byte_stream_size,
180 int codepage,
181 libcerror_error_t **error )
182 {
183 static char *function = "libuna_utf8_string_with_index_copy_from_byte_stream";
184 size_t byte_stream_index = 0;
185 libuna_unicode_character_t unicode_character = 0;
186
187 if( utf8_string == NULL )
188 {
189 libcerror_error_set(
190 error,
191 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
192 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
193 "%s: invalid UTF-8 string.",
194 function );
195
196 return( -1 );
197 }
198 if( utf8_string_size > (size_t) SSIZE_MAX )
199 {
200 libcerror_error_set(
201 error,
202 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
203 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
204 "%s: invalid UTF-8 string size value exceeds maximum.",
205 function );
206
207 return( -1 );
208 }
209 if( utf8_string_index == NULL )
210 {
211 libcerror_error_set(
212 error,
213 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
214 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
215 "%s: invalid UTF-8 string index.",
216 function );
217
218 return( -1 );
219 }
220 if( byte_stream == NULL )
221 {
222 libcerror_error_set(
223 error,
224 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
225 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
226 "%s: invalid byte stream.",
227 function );
228
229 return( -1 );
230 }
231 if( byte_stream_size > (size_t) SSIZE_MAX )
232 {
233 libcerror_error_set(
234 error,
235 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
236 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
237 "%s: invalid byte stream size value exceeds maximum.",
238 function );
239
240 return( -1 );
241 }
242 if( byte_stream_size == 0 )
243 {
244 libcerror_error_set(
245 error,
246 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
247 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
248 "%s: missing byte stream value.",
249 function );
250
251 return( -1 );
252 }
253 while( byte_stream_index < byte_stream_size )
254 {
255 /* Convert the byte stream bytes into an Unicode character
256 */
257 if( libuna_unicode_character_copy_from_byte_stream(
258 &unicode_character,
259 byte_stream,
260 byte_stream_size,
261 &byte_stream_index,
262 codepage,
263 error ) != 1 )
264 {
265 libcerror_error_set(
266 error,
267 LIBCERROR_ERROR_DOMAIN_CONVERSION,
268 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
269 "%s: unable to copy Unicode character from byte stream.",
270 function );
271
272 return( -1 );
273 }
274 /* Convert the Unicode character into UTF-8 character bytes
275 */
276 if( libuna_unicode_character_copy_to_utf8(
277 unicode_character,
278 utf8_string,
279 utf8_string_size,
280 utf8_string_index,
281 error ) != 1 )
282 {
283 libcerror_error_set(
284 error,
285 LIBCERROR_ERROR_DOMAIN_CONVERSION,
286 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
287 "%s: unable to copy Unicode character to UTF-8.",
288 function );
289
290 return( -1 );
291 }
292 if( unicode_character == 0 )
293 {
294 break;
295 }
296 }
297 /* Check if the string is terminated with an end-of-string character
298 */
299 if( unicode_character != 0 )
300 {
301 if( *utf8_string_index >= utf8_string_size )
302 {
303 libcerror_error_set(
304 error,
305 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
306 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
307 "%s: UTF-8 string too small.",
308 function );
309
310 return( -1 );
311 }
312 utf8_string[ *utf8_string_index ] = 0;
313
314 *utf8_string_index += 1;
315 }
316 return( 1 );
317 }
318
319 /* Compares an UTF-8 string with a byte stream
320 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
321 */
libuna_utf8_string_compare_with_byte_stream(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * byte_stream,size_t byte_stream_size,int codepage,libcerror_error_t ** error)322 int libuna_utf8_string_compare_with_byte_stream(
323 const libuna_utf8_character_t *utf8_string,
324 size_t utf8_string_size,
325 const uint8_t *byte_stream,
326 size_t byte_stream_size,
327 int codepage,
328 libcerror_error_t **error )
329 {
330 static char *function = "libuna_utf8_string_compare_with_byte_stream";
331 size_t byte_stream_index = 0;
332 size_t utf8_string_index = 0;
333 libuna_unicode_character_t utf8_unicode_character = 0;
334 libuna_unicode_character_t byte_stream_unicode_character = 0;
335
336 if( utf8_string == NULL )
337 {
338 libcerror_error_set(
339 error,
340 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
341 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
342 "%s: invalid UTF-8 string.",
343 function );
344
345 return( -1 );
346 }
347 if( utf8_string_size > (size_t) SSIZE_MAX )
348 {
349 libcerror_error_set(
350 error,
351 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
352 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
353 "%s: invalid UTF-8 string size value exceeds maximum.",
354 function );
355
356 return( -1 );
357 }
358 if( byte_stream == NULL )
359 {
360 libcerror_error_set(
361 error,
362 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
363 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
364 "%s: invalid byte stream.",
365 function );
366
367 return( -1 );
368 }
369 if( byte_stream_size > (size_t) SSIZE_MAX )
370 {
371 libcerror_error_set(
372 error,
373 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
374 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
375 "%s: invalid byte stream size value exceeds maximum.",
376 function );
377
378 return( -1 );
379 }
380 if( byte_stream_size == 0 )
381 {
382 libcerror_error_set(
383 error,
384 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
385 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
386 "%s: missing byte stream value.",
387 function );
388
389 return( -1 );
390 }
391 if( ( utf8_string_size >= 1 )
392 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
393 {
394 utf8_string_size -= 1;
395 }
396 /* Check if the byte stream is terminated with zero bytes
397 */
398 if( ( byte_stream_size >= 1 )
399 && ( byte_stream[ byte_stream_size - 1 ] == 0 ) )
400 {
401 byte_stream_size -= 1;
402 }
403 while( ( utf8_string_index < utf8_string_size )
404 && ( byte_stream_index < byte_stream_size ) )
405 {
406 /* Convert the UTF-8 character bytes into an Unicode character
407 */
408 if( libuna_unicode_character_copy_from_utf8(
409 &utf8_unicode_character,
410 utf8_string,
411 utf8_string_size,
412 &utf8_string_index,
413 error ) != 1 )
414 {
415 libcerror_error_set(
416 error,
417 LIBCERROR_ERROR_DOMAIN_CONVERSION,
418 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
419 "%s: unable to copy Unicode character from UTF-8.",
420 function );
421
422 return( -1 );
423 }
424 /* Convert the byte stream bytes into an Unicode character
425 */
426 if( libuna_unicode_character_copy_from_byte_stream(
427 &byte_stream_unicode_character,
428 byte_stream,
429 byte_stream_size,
430 &byte_stream_index,
431 codepage,
432 error ) != 1 )
433 {
434 libcerror_error_set(
435 error,
436 LIBCERROR_ERROR_DOMAIN_CONVERSION,
437 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
438 "%s: unable to copy Unicode character from byte stream.",
439 function );
440
441 return( -1 );
442 }
443 if( utf8_unicode_character < byte_stream_unicode_character )
444 {
445 return( LIBUNA_COMPARE_LESS );
446 }
447 else if( utf8_unicode_character > byte_stream_unicode_character )
448 {
449 return( LIBUNA_COMPARE_GREATER );
450 }
451 }
452 /* Check if both strings were entirely processed
453 */
454 if( utf8_string_index < utf8_string_size )
455 {
456 return( LIBUNA_COMPARE_GREATER );
457 }
458 else if( byte_stream_index < byte_stream_size )
459 {
460 return( LIBUNA_COMPARE_LESS );
461 }
462 return( LIBUNA_COMPARE_EQUAL );
463 }
464
465 /* Determines the size of an UTF-8 string from an UTF-7 stream
466 * Returns 1 if successful or -1 on error
467 */
libuna_utf8_string_size_from_utf7_stream(const uint8_t * utf7_stream,size_t utf7_stream_size,size_t * utf8_string_size,libcerror_error_t ** error)468 int libuna_utf8_string_size_from_utf7_stream(
469 const uint8_t *utf7_stream,
470 size_t utf7_stream_size,
471 size_t *utf8_string_size,
472 libcerror_error_t **error )
473 {
474 static char *function = "libuna_utf8_string_size_from_utf7_stream";
475 size_t utf7_stream_index = 0;
476 libuna_unicode_character_t unicode_character = 0;
477 uint32_t utf7_stream_base64_data = 0;
478
479 if( utf7_stream == NULL )
480 {
481 libcerror_error_set(
482 error,
483 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
484 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
485 "%s: invalid UTF-7 stream.",
486 function );
487
488 return( -1 );
489 }
490 if( utf7_stream_size > (size_t) SSIZE_MAX )
491 {
492 libcerror_error_set(
493 error,
494 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
495 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
496 "%s: invalid UTF-7 stream size value exceeds maximum.",
497 function );
498
499 return( -1 );
500 }
501 if( utf8_string_size == NULL )
502 {
503 libcerror_error_set(
504 error,
505 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
506 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
507 "%s: invalid UTF-8 string size.",
508 function );
509
510 return( -1 );
511 }
512 *utf8_string_size = 0;
513
514 if( utf7_stream_size == 0 )
515 {
516 return( 1 );
517 }
518 while( utf7_stream_index < utf7_stream_size )
519 {
520 /* Convert the UTF-7 stream bytes into an Unicode character
521 */
522 if( libuna_unicode_character_copy_from_utf7_stream(
523 &unicode_character,
524 utf7_stream,
525 utf7_stream_size,
526 &utf7_stream_index,
527 &utf7_stream_base64_data,
528 error ) != 1 )
529 {
530 libcerror_error_set(
531 error,
532 LIBCERROR_ERROR_DOMAIN_CONVERSION,
533 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
534 "%s: unable to copy Unicode character from UTF-7 stream.",
535 function );
536
537 return( -1 );
538 }
539 /* Determine how many UTF-8 character bytes are required
540 */
541 if( libuna_unicode_character_size_to_utf8(
542 unicode_character,
543 utf8_string_size,
544 error ) != 1 )
545 {
546 libcerror_error_set(
547 error,
548 LIBCERROR_ERROR_DOMAIN_CONVERSION,
549 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
550 "%s: unable to unable to determine size of Unicode character in UTF-8.",
551 function );
552
553 return( -1 );
554 }
555 if( unicode_character == 0 )
556 {
557 break;
558 }
559 }
560 /* Check if the string is terminated with an end-of-string character
561 */
562 if( unicode_character != 0 )
563 {
564 *utf8_string_size += 1;
565 }
566 return( 1 );
567 }
568
569 /* Copies an UTF-8 string from an UTF-7 stream
570 * Returns 1 if successful or -1 on error
571 */
libuna_utf8_string_copy_from_utf7_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf7_stream,size_t utf7_stream_size,libcerror_error_t ** error)572 int libuna_utf8_string_copy_from_utf7_stream(
573 libuna_utf8_character_t *utf8_string,
574 size_t utf8_string_size,
575 const uint8_t *utf7_stream,
576 size_t utf7_stream_size,
577 libcerror_error_t **error )
578 {
579 static char *function = "libuna_utf8_string_copy_from_utf7_stream";
580 size_t utf8_string_index = 0;
581
582 if( libuna_utf8_string_with_index_copy_from_utf7_stream(
583 utf8_string,
584 utf8_string_size,
585 &utf8_string_index,
586 utf7_stream,
587 utf7_stream_size,
588 error ) != 1 )
589 {
590 libcerror_error_set(
591 error,
592 LIBCERROR_ERROR_DOMAIN_RUNTIME,
593 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
594 "%s: unable to UTF-7 stream to UTF-8 string.",
595 function );
596
597 return( -1 );
598 }
599 return( 1 );
600 }
601
602 /* Copies an UTF-8 string from an UTF-7 stream
603 * Returns 1 if successful or -1 on error
604 */
libuna_utf8_string_with_index_copy_from_utf7_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const uint8_t * utf7_stream,size_t utf7_stream_size,libcerror_error_t ** error)605 int libuna_utf8_string_with_index_copy_from_utf7_stream(
606 libuna_utf8_character_t *utf8_string,
607 size_t utf8_string_size,
608 size_t *utf8_string_index,
609 const uint8_t *utf7_stream,
610 size_t utf7_stream_size,
611 libcerror_error_t **error )
612 {
613 static char *function = "libuna_utf8_string_with_index_copy_from_utf7_stream";
614 size_t utf7_stream_index = 0;
615 libuna_unicode_character_t unicode_character = 0;
616 uint32_t utf7_stream_base64_data = 0;
617
618 if( utf8_string == NULL )
619 {
620 libcerror_error_set(
621 error,
622 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
623 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
624 "%s: invalid UTF-8 string.",
625 function );
626
627 return( -1 );
628 }
629 if( utf8_string_size > (size_t) SSIZE_MAX )
630 {
631 libcerror_error_set(
632 error,
633 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
634 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
635 "%s: invalid UTF-8 string size value exceeds maximum.",
636 function );
637
638 return( -1 );
639 }
640 if( utf8_string_index == NULL )
641 {
642 libcerror_error_set(
643 error,
644 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
645 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
646 "%s: invalid UTF-8 string index.",
647 function );
648
649 return( -1 );
650 }
651 if( utf7_stream == NULL )
652 {
653 libcerror_error_set(
654 error,
655 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
656 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
657 "%s: invalid UTF-7 stream.",
658 function );
659
660 return( -1 );
661 }
662 if( utf7_stream_size > (size_t) SSIZE_MAX )
663 {
664 libcerror_error_set(
665 error,
666 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
667 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
668 "%s: invalid UTF-7 stream size value exceeds maximum.",
669 function );
670
671 return( -1 );
672 }
673 if( utf7_stream_size == 0 )
674 {
675 libcerror_error_set(
676 error,
677 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
678 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
679 "%s: missing UTF-7 stream value.",
680 function );
681
682 return( -1 );
683 }
684 while( utf7_stream_index < utf7_stream_size )
685 {
686 /* Convert the UTF-7 stream bytes into an Unicode character
687 */
688 if( libuna_unicode_character_copy_from_utf7_stream(
689 &unicode_character,
690 utf7_stream,
691 utf7_stream_size,
692 &utf7_stream_index,
693 &utf7_stream_base64_data,
694 error ) != 1 )
695 {
696 libcerror_error_set(
697 error,
698 LIBCERROR_ERROR_DOMAIN_CONVERSION,
699 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
700 "%s: unable to copy Unicode character from UTF-7 stream.",
701 function );
702
703 return( -1 );
704 }
705 /* Convert the Unicode character into UTF-8 character bytes
706 */
707 if( libuna_unicode_character_copy_to_utf8(
708 unicode_character,
709 utf8_string,
710 utf8_string_size,
711 utf8_string_index,
712 error ) != 1 )
713 {
714 libcerror_error_set(
715 error,
716 LIBCERROR_ERROR_DOMAIN_CONVERSION,
717 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
718 "%s: unable to copy Unicode character to UTF-8.",
719 function );
720
721 return( -1 );
722 }
723 if( unicode_character == 0 )
724 {
725 break;
726 }
727 }
728 /* Check if the string is terminated with an end-of-string character
729 */
730 if( unicode_character != 0 )
731 {
732 if( *utf8_string_index >= utf8_string_size )
733 {
734 libcerror_error_set(
735 error,
736 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
737 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
738 "%s: UTF-8 string too small.",
739 function );
740
741 return( -1 );
742 }
743 utf8_string[ *utf8_string_index ] = 0;
744
745 *utf8_string_index += 1;
746 }
747 return( 1 );
748 }
749
750 /* Compares an UTF-8 string with an UTF-7 stream
751 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
752 */
libuna_utf8_string_compare_with_utf7_stream(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf7_stream,size_t utf7_stream_size,libcerror_error_t ** error)753 int libuna_utf8_string_compare_with_utf7_stream(
754 const libuna_utf8_character_t *utf8_string,
755 size_t utf8_string_size,
756 const uint8_t *utf7_stream,
757 size_t utf7_stream_size,
758 libcerror_error_t **error )
759 {
760 static char *function = "libuna_utf8_string_compare_with_utf7_stream";
761 size_t utf7_stream_index = 0;
762 size_t utf8_string_index = 0;
763 libuna_unicode_character_t utf8_unicode_character = 0;
764 libuna_unicode_character_t utf7_stream_unicode_character = 0;
765 uint32_t utf7_stream_base64_data = 0;
766
767 if( utf8_string == NULL )
768 {
769 libcerror_error_set(
770 error,
771 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
772 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
773 "%s: invalid UTF-8 string.",
774 function );
775
776 return( -1 );
777 }
778 if( utf8_string_size > (size_t) SSIZE_MAX )
779 {
780 libcerror_error_set(
781 error,
782 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
783 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
784 "%s: invalid UTF-8 string size value exceeds maximum.",
785 function );
786
787 return( -1 );
788 }
789 if( utf7_stream == NULL )
790 {
791 libcerror_error_set(
792 error,
793 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
794 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
795 "%s: invalid UTF-7 stream.",
796 function );
797
798 return( -1 );
799 }
800 if( utf7_stream_size > (size_t) SSIZE_MAX )
801 {
802 libcerror_error_set(
803 error,
804 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
805 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
806 "%s: invalid UTF-7 stream size value exceeds maximum.",
807 function );
808
809 return( -1 );
810 }
811 if( utf7_stream_size == 0 )
812 {
813 libcerror_error_set(
814 error,
815 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
816 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
817 "%s: missing UTF-7 stream value.",
818 function );
819
820 return( -1 );
821 }
822 if( ( utf8_string_size >= 1 )
823 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
824 {
825 utf8_string_size -= 1;
826 }
827 /* Check if the UTF-7 stream is terminated with zero bytes
828 */
829 if( ( utf7_stream_size >= 1 )
830 && ( utf7_stream[ utf7_stream_size - 1 ] == 0 ) )
831 {
832 utf7_stream_size -= 1;
833 }
834 while( ( utf8_string_index < utf8_string_size )
835 && ( utf7_stream_index < utf7_stream_size ) )
836 {
837 /* Convert the UTF-8 character bytes into an Unicode character
838 */
839 if( libuna_unicode_character_copy_from_utf8(
840 &utf8_unicode_character,
841 utf8_string,
842 utf8_string_size,
843 &utf8_string_index,
844 error ) != 1 )
845 {
846 libcerror_error_set(
847 error,
848 LIBCERROR_ERROR_DOMAIN_CONVERSION,
849 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
850 "%s: unable to copy Unicode character from UTF-8.",
851 function );
852
853 return( -1 );
854 }
855 /* Convert the UTF-7 character bytes into an Unicode character
856 */
857 if( libuna_unicode_character_copy_from_utf7_stream(
858 &utf7_stream_unicode_character,
859 utf7_stream,
860 utf7_stream_size,
861 &utf7_stream_index,
862 &utf7_stream_base64_data,
863 error ) != 1 )
864 {
865 libcerror_error_set(
866 error,
867 LIBCERROR_ERROR_DOMAIN_CONVERSION,
868 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
869 "%s: unable to copy Unicode character from UTF-7 stream.",
870 function );
871
872 return( -1 );
873 }
874 if( utf8_unicode_character < utf7_stream_unicode_character )
875 {
876 return( LIBUNA_COMPARE_LESS );
877 }
878 else if( utf8_unicode_character > utf7_stream_unicode_character )
879 {
880 return( LIBUNA_COMPARE_GREATER );
881 }
882 }
883 /* Check if both strings were entirely processed
884 */
885 if( utf8_string_index < utf8_string_size )
886 {
887 return( LIBUNA_COMPARE_GREATER );
888 }
889 else if( utf7_stream_index < utf7_stream_size )
890 {
891 return( LIBUNA_COMPARE_LESS );
892 }
893 return( LIBUNA_COMPARE_EQUAL );
894 }
895
896 /* Determines the size of an UTF-8 string from an UTF-8 stream
897 * Returns 1 if successful or -1 on error
898 */
libuna_utf8_string_size_from_utf8_stream(const uint8_t * utf8_stream,size_t utf8_stream_size,size_t * utf8_string_size,libcerror_error_t ** error)899 int libuna_utf8_string_size_from_utf8_stream(
900 const uint8_t *utf8_stream,
901 size_t utf8_stream_size,
902 size_t *utf8_string_size,
903 libcerror_error_t **error )
904 {
905 static char *function = "libuna_utf8_string_size_from_utf8_stream";
906 size_t utf8_stream_index = 0;
907 libuna_unicode_character_t unicode_character = 0;
908
909 if( utf8_stream == NULL )
910 {
911 libcerror_error_set(
912 error,
913 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
914 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
915 "%s: invalid UTF-8 stream.",
916 function );
917
918 return( -1 );
919 }
920 if( utf8_stream_size > (size_t) SSIZE_MAX )
921 {
922 libcerror_error_set(
923 error,
924 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
925 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
926 "%s: invalid UTF-8 stream size value exceeds maximum.",
927 function );
928
929 return( -1 );
930 }
931 if( utf8_string_size == NULL )
932 {
933 libcerror_error_set(
934 error,
935 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
936 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
937 "%s: invalid UTF-8 string size.",
938 function );
939
940 return( -1 );
941 }
942 *utf8_string_size = 0;
943
944 if( utf8_stream_size == 0 )
945 {
946 return( 1 );
947 }
948 /* Check if UTF-8 stream starts with a byte order mark (BOM)
949 */
950 if( utf8_stream_size >= 3 )
951 {
952 if( ( utf8_stream[ 0 ] == 0x0ef )
953 && ( utf8_stream[ 1 ] == 0x0bb )
954 && ( utf8_stream[ 2 ] == 0x0bf ) )
955 {
956 utf8_stream_index += 3;
957 }
958 }
959 while( utf8_stream_index < utf8_stream_size )
960 {
961 /* Convert the UTF-8 stream bytes into an Unicode character
962 */
963 if( libuna_unicode_character_copy_from_utf8(
964 &unicode_character,
965 utf8_stream,
966 utf8_stream_size,
967 &utf8_stream_index,
968 error ) != 1 )
969 {
970 libcerror_error_set(
971 error,
972 LIBCERROR_ERROR_DOMAIN_CONVERSION,
973 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
974 "%s: unable to copy Unicode character from UTF-8 stream.",
975 function );
976
977 return( -1 );
978 }
979 /* Determine how many UTF-8 character bytes are required
980 */
981 if( libuna_unicode_character_size_to_utf8(
982 unicode_character,
983 utf8_string_size,
984 error ) != 1 )
985 {
986 libcerror_error_set(
987 error,
988 LIBCERROR_ERROR_DOMAIN_CONVERSION,
989 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
990 "%s: unable to unable to determine size of Unicode character in UTF-8.",
991 function );
992
993 return( -1 );
994 }
995 if( unicode_character == 0 )
996 {
997 break;
998 }
999 }
1000 /* Check if the string is terminated with an end-of-string character
1001 */
1002 if( unicode_character != 0 )
1003 {
1004 *utf8_string_size += 1;
1005 }
1006 return( 1 );
1007 }
1008
1009 /* Copies an UTF-8 string from an UTF-8 stream
1010 * Returns 1 if successful or -1 on error
1011 */
libuna_utf8_string_copy_from_utf8_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf8_stream,size_t utf8_stream_size,libcerror_error_t ** error)1012 int libuna_utf8_string_copy_from_utf8_stream(
1013 libuna_utf8_character_t *utf8_string,
1014 size_t utf8_string_size,
1015 const uint8_t *utf8_stream,
1016 size_t utf8_stream_size,
1017 libcerror_error_t **error )
1018 {
1019 static char *function = "libuna_utf8_string_copy_from_utf8_stream";
1020 size_t utf8_string_index = 0;
1021
1022 if( libuna_utf8_string_with_index_copy_from_utf8_stream(
1023 utf8_string,
1024 utf8_string_size,
1025 &utf8_string_index,
1026 utf8_stream,
1027 utf8_stream_size,
1028 error ) != 1 )
1029 {
1030 libcerror_error_set(
1031 error,
1032 LIBCERROR_ERROR_DOMAIN_RUNTIME,
1033 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1034 "%s: unable to UTF-8 stream to UTF-8 string.",
1035 function );
1036
1037 return( -1 );
1038 }
1039 return( 1 );
1040 }
1041
1042 /* Copies an UTF-8 string from an UTF-8 stream
1043 * Returns 1 if successful or -1 on error
1044 */
libuna_utf8_string_with_index_copy_from_utf8_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const uint8_t * utf8_stream,size_t utf8_stream_size,libcerror_error_t ** error)1045 int libuna_utf8_string_with_index_copy_from_utf8_stream(
1046 libuna_utf8_character_t *utf8_string,
1047 size_t utf8_string_size,
1048 size_t *utf8_string_index,
1049 const uint8_t *utf8_stream,
1050 size_t utf8_stream_size,
1051 libcerror_error_t **error )
1052 {
1053 static char *function = "libuna_utf8_string_with_index_copy_from_utf8_stream";
1054 size_t utf8_stream_index = 0;
1055 libuna_unicode_character_t unicode_character = 0;
1056
1057 if( utf8_string == NULL )
1058 {
1059 libcerror_error_set(
1060 error,
1061 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1062 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1063 "%s: invalid UTF-8 string.",
1064 function );
1065
1066 return( -1 );
1067 }
1068 if( utf8_string_size > (size_t) SSIZE_MAX )
1069 {
1070 libcerror_error_set(
1071 error,
1072 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1073 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1074 "%s: invalid UTF-8 string size value exceeds maximum.",
1075 function );
1076
1077 return( -1 );
1078 }
1079 if( utf8_string_index == NULL )
1080 {
1081 libcerror_error_set(
1082 error,
1083 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1084 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1085 "%s: invalid UTF-8 string index.",
1086 function );
1087
1088 return( -1 );
1089 }
1090 if( utf8_stream == NULL )
1091 {
1092 libcerror_error_set(
1093 error,
1094 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1095 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1096 "%s: invalid UTF-8 stream.",
1097 function );
1098
1099 return( -1 );
1100 }
1101 if( utf8_stream_size > (size_t) SSIZE_MAX )
1102 {
1103 libcerror_error_set(
1104 error,
1105 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1106 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1107 "%s: invalid UTF-8 stream size value exceeds maximum.",
1108 function );
1109
1110 return( -1 );
1111 }
1112 if( utf8_stream_size == 0 )
1113 {
1114 libcerror_error_set(
1115 error,
1116 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1117 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1118 "%s: missing UTF-8 stream value.",
1119 function );
1120
1121 return( -1 );
1122 }
1123 /* Check if UTF-8 stream starts with a byte order mark (BOM)
1124 */
1125 if( utf8_stream_size >= 3 )
1126 {
1127 if( ( utf8_stream[ 0 ] == 0x0ef )
1128 && ( utf8_stream[ 1 ] == 0x0bb )
1129 && ( utf8_stream[ 2 ] == 0x0bf ) )
1130 {
1131 utf8_stream_index += 3;
1132 }
1133 }
1134 while( utf8_stream_index < utf8_stream_size )
1135 {
1136 /* Convert the UTF-8 stream bytes into an Unicode character
1137 */
1138 if( libuna_unicode_character_copy_from_utf8(
1139 &unicode_character,
1140 utf8_stream,
1141 utf8_stream_size,
1142 &utf8_stream_index,
1143 error ) != 1 )
1144 {
1145 libcerror_error_set(
1146 error,
1147 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1148 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1149 "%s: unable to copy Unicode character from UTF-8 stream.",
1150 function );
1151
1152 return( -1 );
1153 }
1154 /* Convert the Unicode character into UTF-8 character bytes
1155 */
1156 if( libuna_unicode_character_copy_to_utf8(
1157 unicode_character,
1158 utf8_string,
1159 utf8_string_size,
1160 utf8_string_index,
1161 error ) != 1 )
1162 {
1163 libcerror_error_set(
1164 error,
1165 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1166 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1167 "%s: unable to copy Unicode character to UTF-8.",
1168 function );
1169
1170 return( -1 );
1171 }
1172 if( unicode_character == 0 )
1173 {
1174 break;
1175 }
1176 }
1177 /* Check if the string is terminated with an end-of-string character
1178 */
1179 if( unicode_character != 0 )
1180 {
1181 if( *utf8_string_index >= utf8_string_size )
1182 {
1183 libcerror_error_set(
1184 error,
1185 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1186 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1187 "%s: UTF-8 string too small.",
1188 function );
1189
1190 return( -1 );
1191 }
1192 utf8_string[ *utf8_string_index ] = 0;
1193
1194 *utf8_string_index += 1;
1195 }
1196 return( 1 );
1197 }
1198
1199 /* Compares an UTF-8 string with an UTF-8 stream
1200 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
1201 */
libuna_utf8_string_compare_with_utf8_stream(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf8_stream,size_t utf8_stream_size,libcerror_error_t ** error)1202 int libuna_utf8_string_compare_with_utf8_stream(
1203 const libuna_utf8_character_t *utf8_string,
1204 size_t utf8_string_size,
1205 const uint8_t *utf8_stream,
1206 size_t utf8_stream_size,
1207 libcerror_error_t **error )
1208 {
1209 static char *function = "libuna_utf8_string_compare_with_utf8_stream";
1210 size_t utf8_stream_index = 0;
1211 size_t utf8_string_index = 0;
1212 libuna_unicode_character_t utf8_unicode_character = 0;
1213 libuna_unicode_character_t utf8_stream_unicode_character = 0;
1214
1215 if( utf8_string == NULL )
1216 {
1217 libcerror_error_set(
1218 error,
1219 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1220 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1221 "%s: invalid UTF-8 string.",
1222 function );
1223
1224 return( -1 );
1225 }
1226 if( utf8_string_size > (size_t) SSIZE_MAX )
1227 {
1228 libcerror_error_set(
1229 error,
1230 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1231 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1232 "%s: invalid UTF-8 string size value exceeds maximum.",
1233 function );
1234
1235 return( -1 );
1236 }
1237 if( utf8_stream == NULL )
1238 {
1239 libcerror_error_set(
1240 error,
1241 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1242 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1243 "%s: invalid UTF-8 stream.",
1244 function );
1245
1246 return( -1 );
1247 }
1248 if( utf8_stream_size > (size_t) SSIZE_MAX )
1249 {
1250 libcerror_error_set(
1251 error,
1252 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1253 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1254 "%s: invalid UTF-8 stream size value exceeds maximum.",
1255 function );
1256
1257 return( -1 );
1258 }
1259 if( utf8_stream_size == 0 )
1260 {
1261 libcerror_error_set(
1262 error,
1263 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1264 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1265 "%s: missing UTF-8 stream value.",
1266 function );
1267
1268 return( -1 );
1269 }
1270 /* Check if UTF-8 stream starts with a byte order mark (BOM)
1271 */
1272 if( utf8_stream_size >= 3 )
1273 {
1274 if( ( utf8_stream[ 0 ] == 0x0ef )
1275 && ( utf8_stream[ 1 ] == 0x0bb )
1276 && ( utf8_stream[ 2 ] == 0x0bf ) )
1277 {
1278 utf8_stream_index += 3;
1279 }
1280 }
1281 if( ( utf8_string_size >= 1 )
1282 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
1283 {
1284 utf8_string_size -= 1;
1285 }
1286 /* Check if the UTF-8 stream is terminated with zero bytes
1287 */
1288 if( ( utf8_stream_size >= 1 )
1289 && ( utf8_stream[ utf8_stream_size - 1 ] == 0 ) )
1290 {
1291 utf8_stream_size -= 1;
1292 }
1293 while( ( utf8_string_index < utf8_string_size )
1294 && ( utf8_stream_index < utf8_stream_size ) )
1295 {
1296 /* Convert the UTF-8 character bytes into an Unicode character
1297 */
1298 if( libuna_unicode_character_copy_from_utf8(
1299 &utf8_unicode_character,
1300 utf8_string,
1301 utf8_string_size,
1302 &utf8_string_index,
1303 error ) != 1 )
1304 {
1305 libcerror_error_set(
1306 error,
1307 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1308 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1309 "%s: unable to copy Unicode character from UTF-8.",
1310 function );
1311
1312 return( -1 );
1313 }
1314 /* Convert the UTF-8 character bytes into an Unicode character
1315 */
1316 if( libuna_unicode_character_copy_from_utf8(
1317 &utf8_stream_unicode_character,
1318 utf8_stream,
1319 utf8_stream_size,
1320 &utf8_stream_index,
1321 error ) != 1 )
1322 {
1323 libcerror_error_set(
1324 error,
1325 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1326 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1327 "%s: unable to copy Unicode character from UTF-8 stream.",
1328 function );
1329
1330 return( -1 );
1331 }
1332 if( utf8_unicode_character < utf8_stream_unicode_character )
1333 {
1334 return( LIBUNA_COMPARE_LESS );
1335 }
1336 else if( utf8_unicode_character > utf8_stream_unicode_character )
1337 {
1338 return( LIBUNA_COMPARE_GREATER );
1339 }
1340 }
1341 /* Check if both strings were entirely processed
1342 */
1343 if( utf8_string_index < utf8_string_size )
1344 {
1345 return( LIBUNA_COMPARE_GREATER );
1346 }
1347 else if( utf8_stream_index < utf8_stream_size )
1348 {
1349 return( LIBUNA_COMPARE_LESS );
1350 }
1351 return( LIBUNA_COMPARE_EQUAL );
1352 }
1353
1354 /* Determines the size of an UTF-8 string from an UTF-16 string
1355 * Returns 1 if successful or -1 on error
1356 */
libuna_utf8_string_size_from_utf16(const libuna_utf16_character_t * utf16_string,size_t utf16_string_size,size_t * utf8_string_size,libcerror_error_t ** error)1357 int libuna_utf8_string_size_from_utf16(
1358 const libuna_utf16_character_t *utf16_string,
1359 size_t utf16_string_size,
1360 size_t *utf8_string_size,
1361 libcerror_error_t **error )
1362 {
1363 static char *function = "libuna_utf8_string_size_from_utf16";
1364 size_t utf16_string_index = 0;
1365 libuna_unicode_character_t unicode_character = 0;
1366
1367 if( utf16_string == NULL )
1368 {
1369 libcerror_error_set(
1370 error,
1371 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1372 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1373 "%s: invalid UTF-16 string.",
1374 function );
1375
1376 return( -1 );
1377 }
1378 if( utf16_string_size > (size_t) SSIZE_MAX )
1379 {
1380 libcerror_error_set(
1381 error,
1382 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1383 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1384 "%s: invalid UTF-16 string size value exceeds maximum.",
1385 function );
1386
1387 return( -1 );
1388 }
1389 if( utf8_string_size == NULL )
1390 {
1391 libcerror_error_set(
1392 error,
1393 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1394 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1395 "%s: invalid UTF-8 string size.",
1396 function );
1397
1398 return( -1 );
1399 }
1400 *utf8_string_size = 0;
1401
1402 if( utf16_string_size == 0 )
1403 {
1404 return( 1 );
1405 }
1406 while( utf16_string_index < utf16_string_size )
1407 {
1408 /* Convert the UTF-16 character bytes into an Unicode character
1409 */
1410 if( libuna_unicode_character_copy_from_utf16(
1411 &unicode_character,
1412 utf16_string,
1413 utf16_string_size,
1414 &utf16_string_index,
1415 error ) != 1 )
1416 {
1417 libcerror_error_set(
1418 error,
1419 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1420 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1421 "%s: unable to copy Unicode character from UTF-16.",
1422 function );
1423
1424 return( -1 );
1425 }
1426 /* Determine how many UTF-8 character bytes are required
1427 */
1428 if( libuna_unicode_character_size_to_utf8(
1429 unicode_character,
1430 utf8_string_size,
1431 error ) != 1 )
1432 {
1433 libcerror_error_set(
1434 error,
1435 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1436 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1437 "%s: unable to unable to determine size of Unicode character in UTF-8.",
1438 function );
1439
1440 return( -1 );
1441 }
1442 }
1443 /* Check if the string is terminated with an end-of-string character
1444 */
1445 if( unicode_character != 0 )
1446 {
1447 *utf8_string_size += 1;
1448 }
1449 return( 1 );
1450 }
1451
1452 /* Copies an UTF-8 string from an UTF-16 string
1453 * Returns 1 if successful or -1 on error
1454 */
libuna_utf8_string_copy_from_utf16(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const libuna_utf16_character_t * utf16_string,size_t utf16_string_size,libcerror_error_t ** error)1455 int libuna_utf8_string_copy_from_utf16(
1456 libuna_utf8_character_t *utf8_string,
1457 size_t utf8_string_size,
1458 const libuna_utf16_character_t *utf16_string,
1459 size_t utf16_string_size,
1460 libcerror_error_t **error )
1461 {
1462 static char *function = "libuna_utf8_string_copy_from_utf16";
1463 size_t utf8_string_index = 0;
1464
1465 if( libuna_utf8_string_with_index_copy_from_utf16(
1466 utf8_string,
1467 utf8_string_size,
1468 &utf8_string_index,
1469 utf16_string,
1470 utf16_string_size,
1471 error ) != 1 )
1472 {
1473 libcerror_error_set(
1474 error,
1475 LIBCERROR_ERROR_DOMAIN_RUNTIME,
1476 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1477 "%s: unable to copy UTF-16 string to UTF-8 string.",
1478 function );
1479
1480 return( -1 );
1481 }
1482 return( 1 );
1483 }
1484
1485 /* Copies an UTF-8 string from an UTF-16 string
1486 * Returns 1 if successful or -1 on error
1487 */
libuna_utf8_string_with_index_copy_from_utf16(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const libuna_utf16_character_t * utf16_string,size_t utf16_string_size,libcerror_error_t ** error)1488 int libuna_utf8_string_with_index_copy_from_utf16(
1489 libuna_utf8_character_t *utf8_string,
1490 size_t utf8_string_size,
1491 size_t *utf8_string_index,
1492 const libuna_utf16_character_t *utf16_string,
1493 size_t utf16_string_size,
1494 libcerror_error_t **error )
1495 {
1496 static char *function = "libuna_utf8_string_with_index_copy_from_utf16";
1497 size_t utf16_string_index = 0;
1498 libuna_unicode_character_t unicode_character = 0;
1499
1500 if( utf8_string == NULL )
1501 {
1502 libcerror_error_set(
1503 error,
1504 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1505 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1506 "%s: invalid UTF-8 string.",
1507 function );
1508
1509 return( -1 );
1510 }
1511 if( utf8_string_size > (size_t) SSIZE_MAX )
1512 {
1513 libcerror_error_set(
1514 error,
1515 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1516 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1517 "%s: invalid UTF-8 string size value exceeds maximum.",
1518 function );
1519
1520 return( -1 );
1521 }
1522 if( utf8_string_index == NULL )
1523 {
1524 libcerror_error_set(
1525 error,
1526 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1527 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1528 "%s: invalid UTF-8 string index.",
1529 function );
1530
1531 return( -1 );
1532 }
1533 if( utf16_string == NULL )
1534 {
1535 libcerror_error_set(
1536 error,
1537 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1538 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1539 "%s: invalid UTF-16 string.",
1540 function );
1541
1542 return( -1 );
1543 }
1544 if( utf16_string_size > (size_t) SSIZE_MAX )
1545 {
1546 libcerror_error_set(
1547 error,
1548 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1549 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1550 "%s: invalid UTF-16 string size value exceeds maximum.",
1551 function );
1552
1553 return( -1 );
1554 }
1555 if( utf16_string_size == 0 )
1556 {
1557 libcerror_error_set(
1558 error,
1559 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1560 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1561 "%s: missing UTF-16 string value.",
1562 function );
1563
1564 return( -1 );
1565 }
1566 while( utf16_string_index < utf16_string_size )
1567 {
1568 /* Convert the UTF-16 character bytes into an Unicode character
1569 */
1570 if( libuna_unicode_character_copy_from_utf16(
1571 &unicode_character,
1572 utf16_string,
1573 utf16_string_size,
1574 &utf16_string_index,
1575 error ) != 1 )
1576 {
1577 libcerror_error_set(
1578 error,
1579 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1580 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1581 "%s: unable to copy Unicode character from UTF-16.",
1582 function );
1583
1584 return( -1 );
1585 }
1586 /* Convert the Unicode character into UTF-8 character bytes
1587 */
1588 if( libuna_unicode_character_copy_to_utf8(
1589 unicode_character,
1590 utf8_string,
1591 utf8_string_size,
1592 utf8_string_index,
1593 error ) != 1 )
1594 {
1595 libcerror_error_set(
1596 error,
1597 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1598 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1599 "%s: unable to copy Unicode character to UTF-8.",
1600 function );
1601
1602 return( -1 );
1603 }
1604 }
1605 /* Check if the string is terminated with an end-of-string character
1606 */
1607 if( unicode_character != 0 )
1608 {
1609 if( *utf8_string_index >= utf8_string_size )
1610 {
1611 libcerror_error_set(
1612 error,
1613 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1614 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1615 "%s: UTF-8 string too small.",
1616 function );
1617
1618 return( -1 );
1619 }
1620 utf8_string[ *utf8_string_index ] = 0;
1621
1622 *utf8_string_index += 1;
1623 }
1624 return( 1 );
1625 }
1626
1627 /* Compares an UTF-8 string with an UTF-16 string
1628 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
1629 */
libuna_utf8_string_compare_with_utf16(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const libuna_utf16_character_t * utf16_string,size_t utf16_string_size,libcerror_error_t ** error)1630 int libuna_utf8_string_compare_with_utf16(
1631 const libuna_utf8_character_t *utf8_string,
1632 size_t utf8_string_size,
1633 const libuna_utf16_character_t *utf16_string,
1634 size_t utf16_string_size,
1635 libcerror_error_t **error )
1636 {
1637 static char *function = "libuna_utf8_string_compare_with_utf16";
1638 size_t utf16_string_index = 0;
1639 size_t utf8_string_index = 0;
1640 libuna_unicode_character_t utf8_unicode_character = 0;
1641 libuna_unicode_character_t utf16_unicode_character = 0;
1642
1643 if( utf8_string == NULL )
1644 {
1645 libcerror_error_set(
1646 error,
1647 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1648 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1649 "%s: invalid UTF-8 string.",
1650 function );
1651
1652 return( -1 );
1653 }
1654 if( utf8_string_size > (size_t) SSIZE_MAX )
1655 {
1656 libcerror_error_set(
1657 error,
1658 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1659 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1660 "%s: invalid UTF-8 string size value exceeds maximum.",
1661 function );
1662
1663 return( -1 );
1664 }
1665 if( utf16_string == NULL )
1666 {
1667 libcerror_error_set(
1668 error,
1669 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1670 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1671 "%s: invalid UTF-16 string.",
1672 function );
1673
1674 return( -1 );
1675 }
1676 if( utf16_string_size > (size_t) SSIZE_MAX )
1677 {
1678 libcerror_error_set(
1679 error,
1680 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1681 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1682 "%s: invalid UTF-16 string size value exceeds maximum.",
1683 function );
1684
1685 return( -1 );
1686 }
1687 if( utf16_string_size == 0 )
1688 {
1689 libcerror_error_set(
1690 error,
1691 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1692 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1693 "%s: missing UTF-16 string value.",
1694 function );
1695
1696 return( -1 );
1697 }
1698 if( ( utf8_string_size >= 1 )
1699 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
1700 {
1701 utf8_string_size -= 1;
1702 }
1703 if( ( utf16_string_size >= 1 )
1704 && ( utf16_string[ utf16_string_size - 1 ] == 0 ) )
1705 {
1706 utf16_string_size -= 1;
1707 }
1708 while( ( utf8_string_index < utf8_string_size )
1709 && ( utf16_string_index < utf16_string_size ) )
1710 {
1711 /* Convert the UTF-8 character bytes into an Unicode character
1712 */
1713 if( libuna_unicode_character_copy_from_utf8(
1714 &utf8_unicode_character,
1715 utf8_string,
1716 utf8_string_size,
1717 &utf8_string_index,
1718 error ) != 1 )
1719 {
1720 libcerror_error_set(
1721 error,
1722 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1723 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1724 "%s: unable to copy Unicode character from UTF-8.",
1725 function );
1726
1727 return( -1 );
1728 }
1729 /* Convert the UTF-16 character bytes into an Unicode character
1730 */
1731 if( libuna_unicode_character_copy_from_utf16(
1732 &utf16_unicode_character,
1733 utf16_string,
1734 utf16_string_size,
1735 &utf16_string_index,
1736 error ) != 1 )
1737 {
1738 libcerror_error_set(
1739 error,
1740 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1741 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1742 "%s: unable to copy Unicode character from UTF-16.",
1743 function );
1744
1745 return( -1 );
1746 }
1747 if( utf8_unicode_character < utf16_unicode_character )
1748 {
1749 return( LIBUNA_COMPARE_LESS );
1750 }
1751 else if( utf8_unicode_character > utf16_unicode_character )
1752 {
1753 return( LIBUNA_COMPARE_GREATER );
1754 }
1755 }
1756 /* Check if both strings were entirely processed
1757 */
1758 if( utf8_string_index < utf8_string_size )
1759 {
1760 return( LIBUNA_COMPARE_GREATER );
1761 }
1762 else if( utf16_string_index < utf16_string_size )
1763 {
1764 return( LIBUNA_COMPARE_LESS );
1765 }
1766 return( LIBUNA_COMPARE_EQUAL );
1767 }
1768
1769 /* Determines the size of an UTF-8 string from an UTF-16 stream
1770 * Returns 1 if successful or -1 on error
1771 */
libuna_utf8_string_size_from_utf16_stream(const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,size_t * utf8_string_size,libcerror_error_t ** error)1772 int libuna_utf8_string_size_from_utf16_stream(
1773 const uint8_t *utf16_stream,
1774 size_t utf16_stream_size,
1775 int byte_order,
1776 size_t *utf8_string_size,
1777 libcerror_error_t **error )
1778 {
1779 static char *function = "libuna_utf8_string_size_from_utf16_stream";
1780 size_t utf16_stream_index = 0;
1781 libuna_unicode_character_t unicode_character = 0;
1782 int read_byte_order = 0;
1783
1784 if( utf16_stream == NULL )
1785 {
1786 libcerror_error_set(
1787 error,
1788 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1789 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1790 "%s: invalid UTF-16 stream.",
1791 function );
1792
1793 return( -1 );
1794 }
1795 if( utf16_stream_size > (size_t) SSIZE_MAX )
1796 {
1797 libcerror_error_set(
1798 error,
1799 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1800 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1801 "%s: invalid UTF-16 stream size value exceeds maximum.",
1802 function );
1803
1804 return( -1 );
1805 }
1806 if( ( utf16_stream_size % 2 ) != 0 )
1807 {
1808 libcerror_error_set(
1809 error,
1810 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1811 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1812 "%s: missing UTF-16 stream value.",
1813 function );
1814
1815 return( -1 );
1816 }
1817 if( utf8_string_size == NULL )
1818 {
1819 libcerror_error_set(
1820 error,
1821 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1822 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1823 "%s: invalid UTF-8 string size.",
1824 function );
1825
1826 return( -1 );
1827 }
1828 *utf8_string_size = 0;
1829
1830 if( utf16_stream_size == 0 )
1831 {
1832 return( 1 );
1833 }
1834 /* Check if UTF-16 stream is in big or little endian
1835 */
1836 if( utf16_stream_size >= 2 )
1837 {
1838 if( ( utf16_stream[ 0 ] == 0xfe )
1839 && ( utf16_stream[ 1 ] == 0xff ) )
1840 {
1841 read_byte_order = LIBUNA_ENDIAN_BIG;
1842 utf16_stream_index = 2;
1843 }
1844 else if( ( utf16_stream[ 0 ] == 0xff )
1845 && ( utf16_stream[ 1 ] == 0xfe ) )
1846 {
1847 read_byte_order = LIBUNA_ENDIAN_LITTLE;
1848 utf16_stream_index = 2;
1849 }
1850 if( byte_order == 0 )
1851 {
1852 byte_order = read_byte_order;
1853 }
1854 }
1855 if( ( byte_order != LIBUNA_ENDIAN_BIG )
1856 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
1857 {
1858 libcerror_error_set(
1859 error,
1860 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1861 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
1862 "%s: unsupported byte order.",
1863 function );
1864
1865 return( -1 );
1866 }
1867 while( ( utf16_stream_index + 1 ) < utf16_stream_size )
1868 {
1869 /* Convert the UTF-16 stream bytes into an Unicode character
1870 */
1871 if( libuna_unicode_character_copy_from_utf16_stream(
1872 &unicode_character,
1873 utf16_stream,
1874 utf16_stream_size,
1875 &utf16_stream_index,
1876 byte_order,
1877 error ) != 1 )
1878 {
1879 libcerror_error_set(
1880 error,
1881 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1882 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1883 "%s: unable to copy Unicode character from UTF-16 stream.",
1884 function );
1885
1886 return( -1 );
1887 }
1888 /* Determine how many UTF-8 character bytes are required
1889 */
1890 if( libuna_unicode_character_size_to_utf8(
1891 unicode_character,
1892 utf8_string_size,
1893 error ) != 1 )
1894 {
1895 libcerror_error_set(
1896 error,
1897 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1898 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1899 "%s: unable to unable to determine size of Unicode character in UTF-8.",
1900 function );
1901
1902 return( -1 );
1903 }
1904 if( unicode_character == 0 )
1905 {
1906 break;
1907 }
1908 }
1909 /* Check if the string is terminated with an end-of-string character
1910 */
1911 if( unicode_character != 0 )
1912 {
1913 *utf8_string_size += 1;
1914 }
1915 return( 1 );
1916 }
1917
1918 /* Copies an UTF-8 string from an UTF-16 stream
1919 * Returns 1 if successful or -1 on error
1920 */
libuna_utf8_string_copy_from_utf16_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,libcerror_error_t ** error)1921 int libuna_utf8_string_copy_from_utf16_stream(
1922 libuna_utf8_character_t *utf8_string,
1923 size_t utf8_string_size,
1924 const uint8_t *utf16_stream,
1925 size_t utf16_stream_size,
1926 int byte_order,
1927 libcerror_error_t **error )
1928 {
1929 static char *function = "libuna_utf8_string_copy_from_utf16_stream";
1930 size_t utf8_string_index = 0;
1931
1932 if( libuna_utf8_string_with_index_copy_from_utf16_stream(
1933 utf8_string,
1934 utf8_string_size,
1935 &utf8_string_index,
1936 utf16_stream,
1937 utf16_stream_size,
1938 byte_order,
1939 error ) != 1 )
1940 {
1941 libcerror_error_set(
1942 error,
1943 LIBCERROR_ERROR_DOMAIN_RUNTIME,
1944 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1945 "%s: unable to copy UTF-16 stream to UTF-8 string.",
1946 function );
1947
1948 return( -1 );
1949 }
1950 return( 1 );
1951 }
1952
1953 /* Copies an UTF-8 string from an UTF-16 stream
1954 * Returns 1 if successful or -1 on error
1955 */
libuna_utf8_string_with_index_copy_from_utf16_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,libcerror_error_t ** error)1956 int libuna_utf8_string_with_index_copy_from_utf16_stream(
1957 libuna_utf8_character_t *utf8_string,
1958 size_t utf8_string_size,
1959 size_t *utf8_string_index,
1960 const uint8_t *utf16_stream,
1961 size_t utf16_stream_size,
1962 int byte_order,
1963 libcerror_error_t **error )
1964 {
1965 static char *function = "libuna_utf8_string_with_index_copy_from_utf16_stream";
1966 size_t utf16_stream_index = 0;
1967 libuna_unicode_character_t unicode_character = 0;
1968 int read_byte_order = 0;
1969
1970 if( utf8_string == NULL )
1971 {
1972 libcerror_error_set(
1973 error,
1974 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1975 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1976 "%s: invalid UTF-8 string.",
1977 function );
1978
1979 return( -1 );
1980 }
1981 if( utf8_string_size > (size_t) SSIZE_MAX )
1982 {
1983 libcerror_error_set(
1984 error,
1985 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1986 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1987 "%s: invalid UTF-8 string size value exceeds maximum.",
1988 function );
1989
1990 return( -1 );
1991 }
1992 if( utf8_string_index == NULL )
1993 {
1994 libcerror_error_set(
1995 error,
1996 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1997 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1998 "%s: invalid UTF-8 string index.",
1999 function );
2000
2001 return( -1 );
2002 }
2003 if( utf16_stream == NULL )
2004 {
2005 libcerror_error_set(
2006 error,
2007 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2008 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2009 "%s: invalid UTF-16 stream.",
2010 function );
2011
2012 return( -1 );
2013 }
2014 if( utf16_stream_size > (size_t) SSIZE_MAX )
2015 {
2016 libcerror_error_set(
2017 error,
2018 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2019 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2020 "%s: invalid UTF-16 stream size value exceeds maximum.",
2021 function );
2022
2023 return( -1 );
2024 }
2025 if( ( utf16_stream_size == 0 )
2026 || ( ( utf16_stream_size % 2 ) != 0 ) )
2027 {
2028 libcerror_error_set(
2029 error,
2030 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2031 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2032 "%s: missing UTF-16 stream value.",
2033 function );
2034
2035 return( -1 );
2036 }
2037 /* Check if UTF-16 stream is in big or little endian
2038 */
2039 if( utf16_stream_size >= 2 )
2040 {
2041 if( ( utf16_stream[ 0 ] == 0xfe )
2042 && ( utf16_stream[ 1 ] == 0xff ) )
2043 {
2044 read_byte_order = LIBUNA_ENDIAN_BIG;
2045 utf16_stream_index = 2;
2046 }
2047 else if( ( utf16_stream[ 0 ] == 0xff )
2048 && ( utf16_stream[ 1 ] == 0xfe ) )
2049 {
2050 read_byte_order = LIBUNA_ENDIAN_LITTLE;
2051 utf16_stream_index = 2;
2052 }
2053 if( byte_order == 0 )
2054 {
2055 byte_order = read_byte_order;
2056 }
2057 }
2058 if( ( byte_order != LIBUNA_ENDIAN_BIG )
2059 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
2060 {
2061 libcerror_error_set(
2062 error,
2063 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2064 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2065 "%s: unsupported byte order.",
2066 function );
2067
2068 return( -1 );
2069 }
2070 while( ( utf16_stream_index + 1 ) < utf16_stream_size )
2071 {
2072 /* Convert the UTF-16 stream bytes into an Unicode character
2073 */
2074 if( libuna_unicode_character_copy_from_utf16_stream(
2075 &unicode_character,
2076 utf16_stream,
2077 utf16_stream_size,
2078 &utf16_stream_index,
2079 byte_order,
2080 error ) != 1 )
2081 {
2082 libcerror_error_set(
2083 error,
2084 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2085 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2086 "%s: unable to copy Unicode character from UTF-16 stream.",
2087 function );
2088
2089 return( -1 );
2090 }
2091 /* Convert the Unicode character into UTF-8 character bytes
2092 */
2093 if( libuna_unicode_character_copy_to_utf8(
2094 unicode_character,
2095 utf8_string,
2096 utf8_string_size,
2097 utf8_string_index,
2098 error ) != 1 )
2099 {
2100 libcerror_error_set(
2101 error,
2102 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2103 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2104 "%s: unable to copy Unicode character to UTF-8.",
2105 function );
2106
2107 return( -1 );
2108 }
2109 if( unicode_character == 0 )
2110 {
2111 break;
2112 }
2113 }
2114 /* Check if the string is terminated with an end-of-string character
2115 */
2116 if( unicode_character != 0 )
2117 {
2118 if( *utf8_string_index >= utf8_string_size )
2119 {
2120 libcerror_error_set(
2121 error,
2122 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2123 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2124 "%s: UTF-8 string too small.",
2125 function );
2126
2127 return( -1 );
2128 }
2129 utf8_string[ *utf8_string_index ] = 0;
2130
2131 *utf8_string_index += 1;
2132 }
2133 return( 1 );
2134 }
2135
2136 /* Compares an UTF-8 string with an UTF-16 stream
2137 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
2138 */
libuna_utf8_string_compare_with_utf16_stream(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,libcerror_error_t ** error)2139 int libuna_utf8_string_compare_with_utf16_stream(
2140 const libuna_utf8_character_t *utf8_string,
2141 size_t utf8_string_size,
2142 const uint8_t *utf16_stream,
2143 size_t utf16_stream_size,
2144 int byte_order,
2145 libcerror_error_t **error )
2146 {
2147 static char *function = "libuna_utf8_string_compare_with_utf16_stream";
2148 size_t utf16_stream_index = 0;
2149 size_t utf8_string_index = 0;
2150 libuna_unicode_character_t utf8_unicode_character = 0;
2151 libuna_unicode_character_t utf16_stream_unicode_character = 0;
2152 int read_byte_order = 0;
2153
2154 if( utf8_string == NULL )
2155 {
2156 libcerror_error_set(
2157 error,
2158 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2159 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2160 "%s: invalid UTF-8 string.",
2161 function );
2162
2163 return( -1 );
2164 }
2165 if( utf8_string_size > (size_t) SSIZE_MAX )
2166 {
2167 libcerror_error_set(
2168 error,
2169 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2170 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2171 "%s: invalid UTF-8 string size value exceeds maximum.",
2172 function );
2173
2174 return( -1 );
2175 }
2176 if( utf16_stream == NULL )
2177 {
2178 libcerror_error_set(
2179 error,
2180 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2181 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2182 "%s: invalid UTF-16 stream.",
2183 function );
2184
2185 return( -1 );
2186 }
2187 if( utf16_stream_size > (size_t) SSIZE_MAX )
2188 {
2189 libcerror_error_set(
2190 error,
2191 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2192 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2193 "%s: invalid UTF-16 stream size value exceeds maximum.",
2194 function );
2195
2196 return( -1 );
2197 }
2198 if( ( utf16_stream_size == 0 )
2199 || ( ( utf16_stream_size % 2 ) != 0 ) )
2200 {
2201 libcerror_error_set(
2202 error,
2203 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2204 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2205 "%s: missing UTF-16 stream value.",
2206 function );
2207
2208 return( -1 );
2209 }
2210 /* Check if UTF-16 stream is in big or little endian
2211 */
2212 if( utf16_stream_size >= 2 )
2213 {
2214 if( ( utf16_stream[ 0 ] == 0xfe )
2215 && ( utf16_stream[ 1 ] == 0xff ) )
2216 {
2217 read_byte_order = LIBUNA_ENDIAN_BIG;
2218 utf16_stream_index = 2;
2219 }
2220 else if( ( utf16_stream[ 0 ] == 0xff )
2221 && ( utf16_stream[ 1 ] == 0xfe ) )
2222 {
2223 read_byte_order = LIBUNA_ENDIAN_LITTLE;
2224 utf16_stream_index = 2;
2225 }
2226 if( byte_order == 0 )
2227 {
2228 byte_order = read_byte_order;
2229 }
2230 }
2231 if( ( byte_order != LIBUNA_ENDIAN_BIG )
2232 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
2233 {
2234 libcerror_error_set(
2235 error,
2236 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2237 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2238 "%s: unsupported byte order.",
2239 function );
2240
2241 return( -1 );
2242 }
2243 if( ( utf8_string_size >= 1 )
2244 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
2245 {
2246 utf8_string_size -= 1;
2247 }
2248 /* Check if the UTF-16 stream is terminated with zero bytes
2249 */
2250 if( ( utf16_stream_size >= 2 )
2251 && ( utf16_stream[ utf16_stream_size - 2 ] == 0 )
2252 && ( utf16_stream[ utf16_stream_size - 1 ] == 0 ) )
2253 {
2254 utf16_stream_size -= 2;
2255 }
2256 while( ( utf8_string_index < utf8_string_size )
2257 && ( utf16_stream_index < utf16_stream_size ) )
2258 {
2259 /* Convert the UTF-8 character bytes into an Unicode character
2260 */
2261 if( libuna_unicode_character_copy_from_utf8(
2262 &utf8_unicode_character,
2263 utf8_string,
2264 utf8_string_size,
2265 &utf8_string_index,
2266 error ) != 1 )
2267 {
2268 libcerror_error_set(
2269 error,
2270 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2271 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2272 "%s: unable to copy Unicode character from UTF-8.",
2273 function );
2274
2275 return( -1 );
2276 }
2277 /* Convert the UTF-16 stream bytes into an Unicode character
2278 */
2279 if( libuna_unicode_character_copy_from_utf16_stream(
2280 &utf16_stream_unicode_character,
2281 utf16_stream,
2282 utf16_stream_size,
2283 &utf16_stream_index,
2284 byte_order,
2285 error ) != 1 )
2286 {
2287 libcerror_error_set(
2288 error,
2289 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2290 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2291 "%s: unable to copy Unicode character from UTF-16 stream.",
2292 function );
2293
2294 return( -1 );
2295 }
2296 if( utf8_unicode_character < utf16_stream_unicode_character )
2297 {
2298 return( LIBUNA_COMPARE_LESS );
2299 }
2300 else if( utf8_unicode_character > utf16_stream_unicode_character )
2301 {
2302 return( LIBUNA_COMPARE_GREATER );
2303 }
2304 }
2305 /* Check if both strings were entirely processed
2306 */
2307 if( utf8_string_index < utf8_string_size )
2308 {
2309 return( LIBUNA_COMPARE_GREATER );
2310 }
2311 else if( utf16_stream_index < utf16_stream_size )
2312 {
2313 return( LIBUNA_COMPARE_LESS );
2314 }
2315 return( LIBUNA_COMPARE_EQUAL );
2316 }
2317
2318 /* Determines the size of an UTF-8 string from an UTF-32 string
2319 * Returns 1 if successful or -1 on error
2320 */
libuna_utf8_string_size_from_utf32(const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf8_string_size,libcerror_error_t ** error)2321 int libuna_utf8_string_size_from_utf32(
2322 const libuna_utf32_character_t *utf32_string,
2323 size_t utf32_string_size,
2324 size_t *utf8_string_size,
2325 libcerror_error_t **error )
2326 {
2327 static char *function = "libuna_utf8_string_size_from_utf32";
2328 size_t utf32_string_index = 0;
2329 libuna_unicode_character_t unicode_character = 0;
2330
2331 if( utf32_string == NULL )
2332 {
2333 libcerror_error_set(
2334 error,
2335 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2336 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2337 "%s: invalid UTF-32 string.",
2338 function );
2339
2340 return( -1 );
2341 }
2342 if( utf32_string_size > (size_t) SSIZE_MAX )
2343 {
2344 libcerror_error_set(
2345 error,
2346 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2347 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2348 "%s: invalid UTF-32 string size value exceeds maximum.",
2349 function );
2350
2351 return( -1 );
2352 }
2353 if( utf8_string_size == NULL )
2354 {
2355 libcerror_error_set(
2356 error,
2357 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2358 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2359 "%s: invalid UTF-8 string size.",
2360 function );
2361
2362 return( -1 );
2363 }
2364 *utf8_string_size = 0;
2365
2366 if( utf32_string_size == 0 )
2367 {
2368 return( 1 );
2369 }
2370 while( utf32_string_index < utf32_string_size )
2371 {
2372 /* Convert the UTF-32 character bytes into an Unicode character
2373 */
2374 if( libuna_unicode_character_copy_from_utf32(
2375 &unicode_character,
2376 utf32_string,
2377 utf32_string_size,
2378 &utf32_string_index,
2379 error ) != 1 )
2380 {
2381 libcerror_error_set(
2382 error,
2383 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2384 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2385 "%s: unable to copy Unicode character from UTF-32.",
2386 function );
2387
2388 return( -1 );
2389 }
2390 /* Determine how many UTF-8 character bytes are required
2391 */
2392 if( libuna_unicode_character_size_to_utf8(
2393 unicode_character,
2394 utf8_string_size,
2395 error ) != 1 )
2396 {
2397 libcerror_error_set(
2398 error,
2399 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2400 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2401 "%s: unable to unable to determine size of Unicode character in UTF-8.",
2402 function );
2403
2404 return( -1 );
2405 }
2406 }
2407 /* Check if the string is terminated with an end-of-string character
2408 */
2409 if( unicode_character != 0 )
2410 {
2411 *utf8_string_size += 1;
2412 }
2413 return( 1 );
2414 }
2415
2416 /* Copies an UTF-8 string from an UTF-32 string
2417 * Returns 1 if successful or -1 on error
2418 */
libuna_utf8_string_copy_from_utf32(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,libcerror_error_t ** error)2419 int libuna_utf8_string_copy_from_utf32(
2420 libuna_utf8_character_t *utf8_string,
2421 size_t utf8_string_size,
2422 const libuna_utf32_character_t *utf32_string,
2423 size_t utf32_string_size,
2424 libcerror_error_t **error )
2425 {
2426 static char *function = "libuna_utf8_string_copy_from_utf32";
2427 size_t utf8_string_index = 0;
2428
2429 if( libuna_utf8_string_with_index_copy_from_utf32(
2430 utf8_string,
2431 utf8_string_size,
2432 &utf8_string_index,
2433 utf32_string,
2434 utf32_string_size,
2435 error ) != 1 )
2436 {
2437 libcerror_error_set(
2438 error,
2439 LIBCERROR_ERROR_DOMAIN_RUNTIME,
2440 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2441 "%s: unable to copy UTF-32 string to UTF-8 string.",
2442 function );
2443
2444 return( -1 );
2445 }
2446 return( 1 );
2447 }
2448
2449 /* Copies an UTF-8 string from an UTF-32 string
2450 * Returns 1 if successful or -1 on error
2451 */
libuna_utf8_string_with_index_copy_from_utf32(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,libcerror_error_t ** error)2452 int libuna_utf8_string_with_index_copy_from_utf32(
2453 libuna_utf8_character_t *utf8_string,
2454 size_t utf8_string_size,
2455 size_t *utf8_string_index,
2456 const libuna_utf32_character_t *utf32_string,
2457 size_t utf32_string_size,
2458 libcerror_error_t **error )
2459 {
2460 static char *function = "libuna_utf8_string_with_index_copy_from_utf32";
2461 size_t utf32_string_index = 0;
2462 libuna_unicode_character_t unicode_character = 0;
2463
2464 if( utf8_string == NULL )
2465 {
2466 libcerror_error_set(
2467 error,
2468 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2469 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2470 "%s: invalid UTF-8 string.",
2471 function );
2472
2473 return( -1 );
2474 }
2475 if( utf8_string_size > (size_t) SSIZE_MAX )
2476 {
2477 libcerror_error_set(
2478 error,
2479 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2480 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2481 "%s: invalid UTF-8 string size value exceeds maximum.",
2482 function );
2483
2484 return( -1 );
2485 }
2486 if( utf8_string_index == NULL )
2487 {
2488 libcerror_error_set(
2489 error,
2490 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2491 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2492 "%s: invalid UTF-8 string index.",
2493 function );
2494
2495 return( -1 );
2496 }
2497 if( utf32_string == NULL )
2498 {
2499 libcerror_error_set(
2500 error,
2501 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2502 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2503 "%s: invalid UTF-32 string.",
2504 function );
2505
2506 return( -1 );
2507 }
2508 if( utf32_string_size > (size_t) SSIZE_MAX )
2509 {
2510 libcerror_error_set(
2511 error,
2512 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2513 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2514 "%s: invalid UTF-32 string size value exceeds maximum.",
2515 function );
2516
2517 return( -1 );
2518 }
2519 if( utf32_string_size == 0 )
2520 {
2521 libcerror_error_set(
2522 error,
2523 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2524 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
2525 "%s: missing UTF-32 string value.",
2526 function );
2527
2528 return( -1 );
2529 }
2530 while( utf32_string_index < utf32_string_size )
2531 {
2532 /* Convert the UTF-32 character bytes into an Unicode character
2533 */
2534 if( libuna_unicode_character_copy_from_utf32(
2535 &unicode_character,
2536 utf32_string,
2537 utf32_string_size,
2538 &utf32_string_index,
2539 error ) != 1 )
2540 {
2541 libcerror_error_set(
2542 error,
2543 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2544 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2545 "%s: unable to copy Unicode character from UTF-32.",
2546 function );
2547
2548 return( -1 );
2549 }
2550 /* Convert the Unicode character into UTF-8 character bytes
2551 */
2552 if( libuna_unicode_character_copy_to_utf8(
2553 unicode_character,
2554 utf8_string,
2555 utf8_string_size,
2556 utf8_string_index,
2557 error ) != 1 )
2558 {
2559 libcerror_error_set(
2560 error,
2561 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2562 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2563 "%s: unable to copy Unicode character to UTF-8.",
2564 function );
2565
2566 return( -1 );
2567 }
2568 }
2569 /* Check if the string is terminated with an end-of-string character
2570 */
2571 if( unicode_character != 0 )
2572 {
2573 if( *utf8_string_index >= utf8_string_size )
2574 {
2575 libcerror_error_set(
2576 error,
2577 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2578 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2579 "%s: UTF-8 string too small.",
2580 function );
2581
2582 return( -1 );
2583 }
2584 utf8_string[ *utf8_string_index ] = 0;
2585
2586 *utf8_string_index += 1;
2587 }
2588 return( 1 );
2589 }
2590
2591 /* Compares an UTF-8 string with an UTF-32 string
2592 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
2593 */
libuna_utf8_string_compare_with_utf32(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,libcerror_error_t ** error)2594 int libuna_utf8_string_compare_with_utf32(
2595 const libuna_utf8_character_t *utf8_string,
2596 size_t utf8_string_size,
2597 const libuna_utf32_character_t *utf32_string,
2598 size_t utf32_string_size,
2599 libcerror_error_t **error )
2600 {
2601 static char *function = "libuna_utf8_string_compare_with_utf32";
2602 size_t utf32_string_index = 0;
2603 size_t utf8_string_index = 0;
2604 libuna_unicode_character_t utf8_unicode_character = 0;
2605 libuna_unicode_character_t utf32_unicode_character = 0;
2606
2607 if( utf8_string == NULL )
2608 {
2609 libcerror_error_set(
2610 error,
2611 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2612 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2613 "%s: invalid UTF-8 string.",
2614 function );
2615
2616 return( -1 );
2617 }
2618 if( utf8_string_size > (size_t) SSIZE_MAX )
2619 {
2620 libcerror_error_set(
2621 error,
2622 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2623 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2624 "%s: invalid UTF-8 string size value exceeds maximum.",
2625 function );
2626
2627 return( -1 );
2628 }
2629 if( utf32_string == NULL )
2630 {
2631 libcerror_error_set(
2632 error,
2633 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2634 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2635 "%s: invalid UTF-32 string.",
2636 function );
2637
2638 return( -1 );
2639 }
2640 if( utf32_string_size > (size_t) SSIZE_MAX )
2641 {
2642 libcerror_error_set(
2643 error,
2644 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2645 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2646 "%s: invalid UTF-32 string size value exceeds maximum.",
2647 function );
2648
2649 return( -1 );
2650 }
2651 if( utf32_string_size == 0 )
2652 {
2653 libcerror_error_set(
2654 error,
2655 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2656 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
2657 "%s: missing UTF-32 string value.",
2658 function );
2659
2660 return( -1 );
2661 }
2662 if( ( utf8_string_size >= 1 )
2663 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
2664 {
2665 utf8_string_size -= 1;
2666 }
2667 if( ( utf32_string_size >= 1 )
2668 && ( utf32_string[ utf32_string_size - 1 ] == 0 ) )
2669 {
2670 utf32_string_size -= 1;
2671 }
2672 while( ( utf8_string_index < utf8_string_size )
2673 && ( utf32_string_index < utf32_string_size ) )
2674 {
2675 /* Convert the UTF-8 character bytes into an Unicode character
2676 */
2677 if( libuna_unicode_character_copy_from_utf8(
2678 &utf8_unicode_character,
2679 utf8_string,
2680 utf8_string_size,
2681 &utf8_string_index,
2682 error ) != 1 )
2683 {
2684 libcerror_error_set(
2685 error,
2686 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2687 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2688 "%s: unable to copy Unicode character from UTF-8.",
2689 function );
2690
2691 return( -1 );
2692 }
2693 /* Convert the UTF-32 character bytes into an Unicode character
2694 */
2695 if( libuna_unicode_character_copy_from_utf32(
2696 &utf32_unicode_character,
2697 utf32_string,
2698 utf32_string_size,
2699 &utf32_string_index,
2700 error ) != 1 )
2701 {
2702 libcerror_error_set(
2703 error,
2704 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2705 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2706 "%s: unable to copy Unicode character from UTF-32.",
2707 function );
2708
2709 return( -1 );
2710 }
2711 if( utf8_unicode_character < utf32_unicode_character )
2712 {
2713 return( LIBUNA_COMPARE_LESS );
2714 }
2715 else if( utf8_unicode_character > utf32_unicode_character )
2716 {
2717 return( LIBUNA_COMPARE_GREATER );
2718 }
2719 }
2720 /* Check if both strings were entirely processed
2721 */
2722 if( utf8_string_index < utf8_string_size )
2723 {
2724 return( LIBUNA_COMPARE_GREATER );
2725 }
2726 else if( utf32_string_index < utf32_string_size )
2727 {
2728 return( LIBUNA_COMPARE_LESS );
2729 }
2730 return( LIBUNA_COMPARE_EQUAL );
2731 }
2732
2733 /* Determines the size of an UTF-8 string from an UTF-32 stream
2734 * Returns 1 if successful or -1 on error
2735 */
libuna_utf8_string_size_from_utf32_stream(const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,size_t * utf8_string_size,libcerror_error_t ** error)2736 int libuna_utf8_string_size_from_utf32_stream(
2737 const uint8_t *utf32_stream,
2738 size_t utf32_stream_size,
2739 int byte_order,
2740 size_t *utf8_string_size,
2741 libcerror_error_t **error )
2742 {
2743 static char *function = "libuna_utf8_string_size_from_utf32_stream";
2744 size_t utf32_stream_index = 0;
2745 libuna_unicode_character_t unicode_character = 0;
2746 int read_byte_order = 0;
2747
2748 if( utf32_stream == NULL )
2749 {
2750 libcerror_error_set(
2751 error,
2752 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2753 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2754 "%s: invalid UTF-32 stream.",
2755 function );
2756
2757 return( -1 );
2758 }
2759 if( utf32_stream_size > (size_t) SSIZE_MAX )
2760 {
2761 libcerror_error_set(
2762 error,
2763 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2764 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2765 "%s: invalid UTF-32 stream size value exceeds maximum.",
2766 function );
2767
2768 return( -1 );
2769 }
2770 if( ( utf32_stream_size % 4 ) != 0 )
2771 {
2772 libcerror_error_set(
2773 error,
2774 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2775 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2776 "%s: missing UTF-32 stream value.",
2777 function );
2778
2779 return( -1 );
2780 }
2781 if( utf8_string_size == NULL )
2782 {
2783 libcerror_error_set(
2784 error,
2785 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2786 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2787 "%s: invalid UTF-8 string size.",
2788 function );
2789
2790 return( -1 );
2791 }
2792 *utf8_string_size = 0;
2793
2794 if( utf32_stream_size == 0 )
2795 {
2796 return( 1 );
2797 }
2798 /* Check if UTF-32 stream is in big or little endian
2799 */
2800 if( utf32_stream_size >= 4 )
2801 {
2802 if( ( utf32_stream[ 0 ] == 0x00 )
2803 && ( utf32_stream[ 1 ] == 0x00 )
2804 && ( utf32_stream[ 2 ] == 0xfe )
2805 && ( utf32_stream[ 3 ] == 0xff ) )
2806 {
2807 read_byte_order = LIBUNA_ENDIAN_BIG;
2808 utf32_stream_index = 4;
2809 }
2810 else if( ( utf32_stream[ 0 ] == 0xff )
2811 && ( utf32_stream[ 1 ] == 0xfe )
2812 && ( utf32_stream[ 2 ] == 0x00 )
2813 && ( utf32_stream[ 3 ] == 0x00 ) )
2814 {
2815 read_byte_order = LIBUNA_ENDIAN_LITTLE;
2816 utf32_stream_index = 4;
2817 }
2818 if( byte_order == 0 )
2819 {
2820 byte_order = read_byte_order;
2821 }
2822 }
2823 if( ( byte_order != LIBUNA_ENDIAN_BIG )
2824 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
2825 {
2826 libcerror_error_set(
2827 error,
2828 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2829 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2830 "%s: unsupported byte order.",
2831 function );
2832
2833 return( -1 );
2834 }
2835 while( ( utf32_stream_index + 3 ) < utf32_stream_size )
2836 {
2837 /* Convert the UTF-32 stream bytes into an Unicode character
2838 */
2839 if( libuna_unicode_character_copy_from_utf32_stream(
2840 &unicode_character,
2841 utf32_stream,
2842 utf32_stream_size,
2843 &utf32_stream_index,
2844 byte_order,
2845 error ) != 1 )
2846 {
2847 libcerror_error_set(
2848 error,
2849 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2850 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2851 "%s: unable to copy Unicode character from UTF-32 stream.",
2852 function );
2853
2854 return( -1 );
2855 }
2856 /* Determine how many UTF-8 character bytes are required
2857 */
2858 if( libuna_unicode_character_size_to_utf8(
2859 unicode_character,
2860 utf8_string_size,
2861 error ) != 1 )
2862 {
2863 libcerror_error_set(
2864 error,
2865 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2866 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2867 "%s: unable to unable to determine size of Unicode character in UTF-8.",
2868 function );
2869
2870 return( -1 );
2871 }
2872 if( unicode_character == 0 )
2873 {
2874 break;
2875 }
2876 }
2877 /* Check if the string is terminated with an end-of-string character
2878 */
2879 if( unicode_character != 0 )
2880 {
2881 *utf8_string_size += 1;
2882 }
2883 return( 1 );
2884 }
2885
2886 /* Copies an UTF-8 string from an UTF-32 stream
2887 * Returns 1 if successful or -1 on error
2888 */
libuna_utf8_string_copy_from_utf32_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,libcerror_error_t ** error)2889 int libuna_utf8_string_copy_from_utf32_stream(
2890 libuna_utf8_character_t *utf8_string,
2891 size_t utf8_string_size,
2892 const uint8_t *utf32_stream,
2893 size_t utf32_stream_size,
2894 int byte_order,
2895 libcerror_error_t **error )
2896 {
2897 static char *function = "libuna_utf8_string_copy_from_utf32_stream";
2898 size_t utf8_string_index = 0;
2899
2900 if( libuna_utf8_string_with_index_copy_from_utf32_stream(
2901 utf8_string,
2902 utf8_string_size,
2903 &utf8_string_index,
2904 utf32_stream,
2905 utf32_stream_size,
2906 byte_order,
2907 error ) != 1 )
2908 {
2909 libcerror_error_set(
2910 error,
2911 LIBCERROR_ERROR_DOMAIN_RUNTIME,
2912 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2913 "%s: unable to copy UTF-32 stream to UTF-8 string.",
2914 function );
2915
2916 return( -1 );
2917 }
2918 return( 1 );
2919 }
2920
2921 /* Copies an UTF-8 string from an UTF-32 stream
2922 * Returns 1 if successful or -1 on error
2923 */
libuna_utf8_string_with_index_copy_from_utf32_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,libcerror_error_t ** error)2924 int libuna_utf8_string_with_index_copy_from_utf32_stream(
2925 libuna_utf8_character_t *utf8_string,
2926 size_t utf8_string_size,
2927 size_t *utf8_string_index,
2928 const uint8_t *utf32_stream,
2929 size_t utf32_stream_size,
2930 int byte_order,
2931 libcerror_error_t **error )
2932 {
2933 static char *function = "libuna_utf8_string_with_index_copy_from_utf32_stream";
2934 size_t utf32_stream_index = 0;
2935 libuna_unicode_character_t unicode_character = 0;
2936 int read_byte_order = 0;
2937
2938 if( utf8_string == NULL )
2939 {
2940 libcerror_error_set(
2941 error,
2942 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2943 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2944 "%s: invalid UTF-8 string.",
2945 function );
2946
2947 return( -1 );
2948 }
2949 if( utf8_string_size > (size_t) SSIZE_MAX )
2950 {
2951 libcerror_error_set(
2952 error,
2953 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2954 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2955 "%s: invalid UTF-8 string size value exceeds maximum.",
2956 function );
2957
2958 return( -1 );
2959 }
2960 if( utf8_string_index == NULL )
2961 {
2962 libcerror_error_set(
2963 error,
2964 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2965 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2966 "%s: invalid UTF-8 string index.",
2967 function );
2968
2969 return( -1 );
2970 }
2971 if( utf32_stream == NULL )
2972 {
2973 libcerror_error_set(
2974 error,
2975 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2976 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2977 "%s: invalid UTF-32 stream.",
2978 function );
2979
2980 return( -1 );
2981 }
2982 if( utf32_stream_size > (size_t) SSIZE_MAX )
2983 {
2984 libcerror_error_set(
2985 error,
2986 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2987 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2988 "%s: invalid UTF-32 stream size value exceeds maximum.",
2989 function );
2990
2991 return( -1 );
2992 }
2993 if( ( utf32_stream_size == 0 )
2994 || ( ( utf32_stream_size % 4 ) != 0 ) )
2995 {
2996 libcerror_error_set(
2997 error,
2998 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2999 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3000 "%s: missing UTF-32 stream value.",
3001 function );
3002
3003 return( -1 );
3004 }
3005 /* Check if UTF-32 stream is in big or little endian
3006 */
3007 if( utf32_stream_size >= 4 )
3008 {
3009 if( ( utf32_stream[ 0 ] == 0x00 )
3010 && ( utf32_stream[ 1 ] == 0x00 )
3011 && ( utf32_stream[ 2 ] == 0xfe )
3012 && ( utf32_stream[ 3 ] == 0xff ) )
3013 {
3014 read_byte_order = LIBUNA_ENDIAN_BIG;
3015 utf32_stream_index = 4;
3016 }
3017 else if( ( utf32_stream[ 0 ] == 0xff )
3018 && ( utf32_stream[ 1 ] == 0xfe )
3019 && ( utf32_stream[ 2 ] == 0x00 )
3020 && ( utf32_stream[ 3 ] == 0x00 ) )
3021 {
3022 read_byte_order = LIBUNA_ENDIAN_LITTLE;
3023 utf32_stream_index = 4;
3024 }
3025 if( byte_order == 0 )
3026 {
3027 byte_order = read_byte_order;
3028 }
3029 }
3030 if( ( byte_order != LIBUNA_ENDIAN_BIG )
3031 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
3032 {
3033 libcerror_error_set(
3034 error,
3035 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3036 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3037 "%s: unsupported byte order.",
3038 function );
3039
3040 return( -1 );
3041 }
3042 while( ( utf32_stream_index + 3 ) < utf32_stream_size )
3043 {
3044 /* Convert the UTF-32 stream bytes into an Unicode character
3045 */
3046 if( libuna_unicode_character_copy_from_utf32_stream(
3047 &unicode_character,
3048 utf32_stream,
3049 utf32_stream_size,
3050 &utf32_stream_index,
3051 byte_order,
3052 error ) != 1 )
3053 {
3054 libcerror_error_set(
3055 error,
3056 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3057 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
3058 "%s: unable to copy Unicode character from UTF-32 stream.",
3059 function );
3060
3061 return( -1 );
3062 }
3063 /* Convert the Unicode character into UTF-8 character bytes
3064 */
3065 if( libuna_unicode_character_copy_to_utf8(
3066 unicode_character,
3067 utf8_string,
3068 utf8_string_size,
3069 utf8_string_index,
3070 error ) != 1 )
3071 {
3072 libcerror_error_set(
3073 error,
3074 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3075 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3076 "%s: unable to copy Unicode character to UTF-8.",
3077 function );
3078
3079 return( -1 );
3080 }
3081 if( unicode_character == 0 )
3082 {
3083 break;
3084 }
3085 }
3086 /* Check if the string is terminated with an end-of-string character
3087 */
3088 if( unicode_character != 0 )
3089 {
3090 if( *utf8_string_index >= utf8_string_size )
3091 {
3092 libcerror_error_set(
3093 error,
3094 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3095 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3096 "%s: UTF-8 string too small.",
3097 function );
3098
3099 return( -1 );
3100 }
3101 utf8_string[ *utf8_string_index ] = 0;
3102
3103 *utf8_string_index += 1;
3104 }
3105 return( 1 );
3106 }
3107
3108 /* Compares an UTF-8 string with an UTF-32 stream
3109 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
3110 */
libuna_utf8_string_compare_with_utf32_stream(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,libcerror_error_t ** error)3111 int libuna_utf8_string_compare_with_utf32_stream(
3112 const libuna_utf8_character_t *utf8_string,
3113 size_t utf8_string_size,
3114 const uint8_t *utf32_stream,
3115 size_t utf32_stream_size,
3116 int byte_order,
3117 libcerror_error_t **error )
3118 {
3119 static char *function = "libuna_utf8_string_compare_with_utf32_stream";
3120 size_t utf32_stream_index = 0;
3121 size_t utf8_string_index = 0;
3122 libuna_unicode_character_t utf8_unicode_character = 0;
3123 libuna_unicode_character_t utf32_stream_unicode_character = 0;
3124 int read_byte_order = 0;
3125
3126 if( utf8_string == NULL )
3127 {
3128 libcerror_error_set(
3129 error,
3130 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3131 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3132 "%s: invalid UTF-8 string.",
3133 function );
3134
3135 return( -1 );
3136 }
3137 if( utf8_string_size > (size_t) SSIZE_MAX )
3138 {
3139 libcerror_error_set(
3140 error,
3141 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3142 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3143 "%s: invalid UTF-8 string size value exceeds maximum.",
3144 function );
3145
3146 return( -1 );
3147 }
3148 if( utf32_stream == NULL )
3149 {
3150 libcerror_error_set(
3151 error,
3152 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3153 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3154 "%s: invalid UTF-32 stream.",
3155 function );
3156
3157 return( -1 );
3158 }
3159 if( utf32_stream_size > (size_t) SSIZE_MAX )
3160 {
3161 libcerror_error_set(
3162 error,
3163 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3164 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3165 "%s: invalid UTF-32 stream size value exceeds maximum.",
3166 function );
3167
3168 return( -1 );
3169 }
3170 if( ( utf32_stream_size == 0 )
3171 || ( ( utf32_stream_size % 4 ) != 0 ) )
3172 {
3173 libcerror_error_set(
3174 error,
3175 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3176 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3177 "%s: missing UTF-32 stream value.",
3178 function );
3179
3180 return( -1 );
3181 }
3182 /* Check if UTF-32 stream is in big or little endian
3183 */
3184 if( utf32_stream_size >= 4 )
3185 {
3186 if( ( utf32_stream[ 0 ] == 0x00 )
3187 && ( utf32_stream[ 1 ] == 0x00 )
3188 && ( utf32_stream[ 2 ] == 0xfe )
3189 && ( utf32_stream[ 3 ] == 0xff ) )
3190 {
3191 read_byte_order = LIBUNA_ENDIAN_BIG;
3192 utf32_stream_index = 4;
3193 }
3194 else if( ( utf32_stream[ 0 ] == 0xff )
3195 && ( utf32_stream[ 1 ] == 0xfe )
3196 && ( utf32_stream[ 2 ] == 0x00 )
3197 && ( utf32_stream[ 3 ] == 0x00 ) )
3198 {
3199 read_byte_order = LIBUNA_ENDIAN_LITTLE;
3200 utf32_stream_index = 4;
3201 }
3202 if( byte_order == 0 )
3203 {
3204 byte_order = read_byte_order;
3205 }
3206 }
3207 if( ( byte_order != LIBUNA_ENDIAN_BIG )
3208 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
3209 {
3210 libcerror_error_set(
3211 error,
3212 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3213 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3214 "%s: unsupported byte order.",
3215 function );
3216
3217 return( -1 );
3218 }
3219 if( ( utf8_string_size >= 1 )
3220 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
3221 {
3222 utf8_string_size -= 1;
3223 }
3224 /* Check if the UTF-32 stream is terminated with zero bytes
3225 */
3226 if( ( utf32_stream_size >= 4 )
3227 && ( utf32_stream[ utf32_stream_size - 4 ] == 0 )
3228 && ( utf32_stream[ utf32_stream_size - 3 ] == 0 )
3229 && ( utf32_stream[ utf32_stream_size - 2 ] == 0 )
3230 && ( utf32_stream[ utf32_stream_size - 1 ] == 0 ) )
3231 {
3232 utf32_stream_size -= 1;
3233 }
3234 while( ( utf8_string_index < utf8_string_size )
3235 && ( utf32_stream_index < utf32_stream_size ) )
3236 {
3237 /* Convert the UTF-8 character bytes into an Unicode character
3238 */
3239 if( libuna_unicode_character_copy_from_utf8(
3240 &utf8_unicode_character,
3241 utf8_string,
3242 utf8_string_size,
3243 &utf8_string_index,
3244 error ) != 1 )
3245 {
3246 libcerror_error_set(
3247 error,
3248 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3249 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3250 "%s: unable to copy Unicode character from UTF-8.",
3251 function );
3252
3253 return( -1 );
3254 }
3255 /* Convert the UTF-32 stream bytes into an Unicode character
3256 */
3257 if( libuna_unicode_character_copy_from_utf32_stream(
3258 &utf32_stream_unicode_character,
3259 utf32_stream,
3260 utf32_stream_size,
3261 &utf32_stream_index,
3262 byte_order,
3263 error ) != 1 )
3264 {
3265 libcerror_error_set(
3266 error,
3267 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3268 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
3269 "%s: unable to copy Unicode character from UTF-32 stream.",
3270 function );
3271
3272 return( -1 );
3273 }
3274 if( utf8_unicode_character < utf32_stream_unicode_character )
3275 {
3276 return( LIBUNA_COMPARE_LESS );
3277 }
3278 else if( utf8_unicode_character > utf32_stream_unicode_character )
3279 {
3280 return( LIBUNA_COMPARE_GREATER );
3281 }
3282 }
3283 /* Check if both strings were entirely processed
3284 */
3285 if( utf8_string_index < utf8_string_size )
3286 {
3287 return( LIBUNA_COMPARE_GREATER );
3288 }
3289 else if( utf32_stream_index < utf32_stream_size )
3290 {
3291 return( LIBUNA_COMPARE_LESS );
3292 }
3293 return( LIBUNA_COMPARE_EQUAL );
3294 }
3295
3296 /* Determines the size of an UTF-8 string from a Standard Compression Scheme for Unicode (SCSU) stream
3297 * Returns 1 if successful or -1 on error
3298 */
libuna_utf8_string_size_from_scsu_stream(const uint8_t * scsu_stream,size_t scsu_stream_size,size_t * utf8_string_size,libcerror_error_t ** error)3299 int libuna_utf8_string_size_from_scsu_stream(
3300 const uint8_t *scsu_stream,
3301 size_t scsu_stream_size,
3302 size_t *utf8_string_size,
3303 libcerror_error_t **error )
3304 {
3305 uint32_t scsu_dynamic_window_positions[ 8 ] = {
3306 0x0080, 0x00c0, 0x0400, 0x0600, 0x0900, 0x3040, 0x30a0, 0xff00 };
3307
3308 static char *function = "libuna_utf8_string_size_from_scsu_stream";
3309 libuna_unicode_character_t unicode_character = 0;
3310 size_t scsu_stream_index = 0;
3311 uint32_t scsu_window_position = 0;
3312 uint8_t byte_value1 = 0;
3313 uint8_t byte_value2 = 0;
3314 uint8_t byte_value3 = 0;
3315 uint8_t dynamic_window_position_index = 0;
3316 uint8_t in_unicode_mode = 0;
3317 uint8_t unicode_character_set = 0;
3318
3319 if( scsu_stream == NULL )
3320 {
3321 libcerror_error_set(
3322 error,
3323 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3324 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3325 "%s: invalid SCSU stream.",
3326 function );
3327
3328 return( -1 );
3329 }
3330 if( scsu_stream_size > (size_t) SSIZE_MAX )
3331 {
3332 libcerror_error_set(
3333 error,
3334 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3335 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3336 "%s: invalid SCSU stream size value exceeds maximum.",
3337 function );
3338
3339 return( -1 );
3340 }
3341 if( utf8_string_size == NULL )
3342 {
3343 libcerror_error_set(
3344 error,
3345 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3346 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3347 "%s: invalid UTF-8 string size.",
3348 function );
3349
3350 return( -1 );
3351 }
3352 *utf8_string_size = 0;
3353
3354 if( scsu_stream_size == 0 )
3355 {
3356 return( 1 );
3357 }
3358 while( scsu_stream_index < scsu_stream_size )
3359 {
3360 unicode_character_set = 0;
3361
3362 if( scsu_stream_index >= scsu_stream_size )
3363 {
3364 libcerror_error_set(
3365 error,
3366 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3367 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3368 "%s: SCSU stream too small.",
3369 function );
3370
3371 return( -1 );
3372 }
3373 byte_value1 = scsu_stream[ scsu_stream_index++ ];
3374
3375 if( in_unicode_mode != 0 )
3376 {
3377 if( ( byte_value1 <= 0xdf )
3378 || ( byte_value1 >= 0xf3 ) )
3379 {
3380 if( scsu_stream_index >= scsu_stream_size )
3381 {
3382 libcerror_error_set(
3383 error,
3384 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3385 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3386 "%s: SCSU stream too small.",
3387 function );
3388
3389 return( -1 );
3390 }
3391 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3392
3393 unicode_character = byte_value1;
3394 unicode_character <<= 8;
3395 unicode_character |= byte_value2;
3396
3397 unicode_character_set = 1;
3398 }
3399 /* UCn tags
3400 */
3401 else if( ( byte_value1 >= 0xe0 )
3402 && ( byte_value1 <= 0xe7 ) )
3403 {
3404 dynamic_window_position_index = byte_value1 - 0xe0;
3405
3406 in_unicode_mode = 0;
3407 }
3408 /* UDn tags
3409 */
3410 else if( ( byte_value1 >= 0xe8 )
3411 && ( byte_value1 <= 0xef ) )
3412 {
3413 if( scsu_stream_index >= scsu_stream_size )
3414 {
3415 libcerror_error_set(
3416 error,
3417 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3418 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3419 "%s: SCSU stream too small.",
3420 function );
3421
3422 return( -1 );
3423 }
3424 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3425
3426 dynamic_window_position_index = byte_value1 - 0xe8;
3427 scsu_window_position = libuna_scsu_window_offset_table[ byte_value2 ];
3428
3429 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3430
3431 in_unicode_mode = 0;
3432 }
3433 /* UQU tag
3434 */
3435 else if( byte_value1 == 0xf0 )
3436 {
3437 if( ( scsu_stream_size < 2 )
3438 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3439 {
3440 libcerror_error_set(
3441 error,
3442 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3443 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3444 "%s: SCSU stream too small.",
3445 function );
3446
3447 return( -1 );
3448 }
3449 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3450 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3451
3452 unicode_character = byte_value2;
3453 unicode_character <<= 8;
3454 unicode_character |= byte_value3;
3455
3456 unicode_character_set = 1;
3457 }
3458 /* UDX tag
3459 */
3460 else if( byte_value1 == 0xf1 )
3461 {
3462 if( ( scsu_stream_size < 2 )
3463 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3464 {
3465 libcerror_error_set(
3466 error,
3467 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3468 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3469 "%s: SCSU stream too small.",
3470 function );
3471
3472 return( -1 );
3473 }
3474 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3475 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3476
3477 dynamic_window_position_index = byte_value2 >> 5;
3478 scsu_window_position = byte_value2 & 0x1f;
3479 scsu_window_position <<= 8;
3480 scsu_window_position |= byte_value3;
3481 scsu_window_position <<= 7;
3482 scsu_window_position += 0x00010000UL;
3483
3484 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3485
3486 in_unicode_mode = 0;
3487 }
3488 }
3489 else
3490 {
3491 if( ( byte_value1 == 0x00 )
3492 || ( byte_value1 == 0x09 )
3493 || ( byte_value1 == 0x0a )
3494 || ( byte_value1 == 0x0c )
3495 || ( byte_value1 == 0x0d )
3496 || ( ( byte_value1 >= 0x20 )
3497 && ( byte_value1 <= 0x7f ) ) )
3498 {
3499 unicode_character = byte_value1;
3500
3501 unicode_character_set = 1;
3502 }
3503 /* SQn tags
3504 */
3505 else if( ( byte_value1 >= 0x01 )
3506 && ( byte_value1 <= 0x08 ) )
3507 {
3508 if( scsu_stream_index >= scsu_stream_size )
3509 {
3510 libcerror_error_set(
3511 error,
3512 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3513 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3514 "%s: SCSU stream too small.",
3515 function );
3516
3517 return( -1 );
3518 }
3519 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3520
3521 unicode_character = byte_value2;
3522
3523 if( byte_value2 < 0x80 )
3524 {
3525 unicode_character += libuna_scsu_static_window_positions[ byte_value1 - 0x01 ];
3526 }
3527 else
3528 {
3529 unicode_character -= 0x80;
3530 unicode_character += scsu_dynamic_window_positions[ byte_value1 - 0x01 ];
3531 }
3532 unicode_character_set = 1;
3533 }
3534 /* SDX tag
3535 */
3536 else if( byte_value1 == 0x0b )
3537 {
3538 if( ( scsu_stream_size < 2 )
3539 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3540 {
3541 libcerror_error_set(
3542 error,
3543 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3544 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3545 "%s: SCSU stream too small.",
3546 function );
3547
3548 return( -1 );
3549 }
3550 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3551 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3552
3553 dynamic_window_position_index = byte_value2 >> 5;
3554 scsu_window_position = byte_value2 & 0x1f;
3555 scsu_window_position <<= 8;
3556 scsu_window_position |= byte_value3;
3557 scsu_window_position <<= 7;
3558 scsu_window_position += 0x00010000UL;
3559
3560 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3561 }
3562 /* SQU tag
3563 */
3564 else if( byte_value1 == 0x0e )
3565 {
3566 if( ( scsu_stream_size < 2 )
3567 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3568 {
3569 libcerror_error_set(
3570 error,
3571 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3572 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3573 "%s: SCSU stream too small.",
3574 function );
3575
3576 return( -1 );
3577 }
3578 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3579 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3580
3581 unicode_character = byte_value2;
3582 unicode_character <<= 8;
3583 unicode_character |= byte_value3;
3584
3585 unicode_character_set = 1;
3586 }
3587 /* SCU tag
3588 */
3589 else if( byte_value1 == 0x0f )
3590 {
3591 in_unicode_mode = 1;
3592 }
3593 /* SCn tags
3594 */
3595 else if( ( byte_value1 >= 0x10 )
3596 && ( byte_value1 <= 0x17 ) )
3597 {
3598 dynamic_window_position_index = byte_value1 - 0x10;
3599 }
3600 /* SDn tags
3601 */
3602 else if( ( byte_value1 >= 0x18 )
3603 && ( byte_value1 <= 0x1f ) )
3604 {
3605 if( scsu_stream_index >= scsu_stream_size )
3606 {
3607 libcerror_error_set(
3608 error,
3609 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3610 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3611 "%s: SCSU stream too small.",
3612 function );
3613
3614 return( -1 );
3615 }
3616 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3617
3618 dynamic_window_position_index = byte_value1 - 0x18;
3619 scsu_window_position = libuna_scsu_window_offset_table[ byte_value2 ];
3620
3621 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3622 }
3623 else if( byte_value1 >= 0x80 )
3624 {
3625 unicode_character = byte_value1 - 0x80;
3626 unicode_character += scsu_dynamic_window_positions[ dynamic_window_position_index ];
3627
3628 unicode_character_set = 1;
3629 }
3630 }
3631 if( unicode_character_set != 0 )
3632 {
3633 /* Determine how many UTF-8 character bytes are required
3634 */
3635 if( libuna_unicode_character_size_to_utf8(
3636 unicode_character,
3637 utf8_string_size,
3638 error ) != 1 )
3639 {
3640 libcerror_error_set(
3641 error,
3642 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3643 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
3644 "%s: unable to unable to determine size of Unicode character in UTF-8.",
3645 function );
3646
3647 return( -1 );
3648 }
3649 if( unicode_character == 0 )
3650 {
3651 break;
3652 }
3653 }
3654 }
3655 /* Check if the string is terminated with an end-of-string character
3656 */
3657 if( unicode_character != 0 )
3658 {
3659 *utf8_string_size += 1;
3660 }
3661 return( 1 );
3662 }
3663
3664 /* Copies an UTF-8 string from a Standard Compression Scheme for Unicode (SCSU) stream
3665 * Returns 1 if successful or -1 on error
3666 */
libuna_utf8_string_copy_from_scsu_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * scsu_stream,size_t scsu_stream_size,libcerror_error_t ** error)3667 int libuna_utf8_string_copy_from_scsu_stream(
3668 libuna_utf8_character_t *utf8_string,
3669 size_t utf8_string_size,
3670 const uint8_t *scsu_stream,
3671 size_t scsu_stream_size,
3672 libcerror_error_t **error )
3673 {
3674 static char *function = "libuna_utf8_string_copy_from_scsu_stream";
3675 size_t utf8_string_index = 0;
3676
3677 if( libuna_utf8_string_with_index_copy_from_scsu_stream(
3678 utf8_string,
3679 utf8_string_size,
3680 &utf8_string_index,
3681 scsu_stream,
3682 scsu_stream_size,
3683 error ) != 1 )
3684 {
3685 libcerror_error_set(
3686 error,
3687 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3688 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
3689 "%s: unable to SCSU stream to UTF-8 string.",
3690 function );
3691
3692 return( -1 );
3693 }
3694 return( 1 );
3695 }
3696
3697 /* Copies an UTF-8 string from a Standard Compression Scheme for Unicode (SCSU) stream
3698 * Returns 1 if successful or -1 on error
3699 */
libuna_utf8_string_with_index_copy_from_scsu_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const uint8_t * scsu_stream,size_t scsu_stream_size,libcerror_error_t ** error)3700 int libuna_utf8_string_with_index_copy_from_scsu_stream(
3701 libuna_utf8_character_t *utf8_string,
3702 size_t utf8_string_size,
3703 size_t *utf8_string_index,
3704 const uint8_t *scsu_stream,
3705 size_t scsu_stream_size,
3706 libcerror_error_t **error )
3707 {
3708 uint32_t scsu_dynamic_window_positions[ 8 ] = {
3709 0x0080, 0x00c0, 0x0400, 0x0600, 0x0900, 0x3040, 0x30a0, 0xff00 };
3710
3711 static char *function = "libuna_utf8_string_with_index_copy_from_scsu_stream";
3712 libuna_unicode_character_t unicode_character = 0;
3713 size_t scsu_stream_index = 0;
3714 uint32_t scsu_window_position = 0;
3715 uint8_t byte_value1 = 0;
3716 uint8_t byte_value2 = 0;
3717 uint8_t byte_value3 = 0;
3718 uint8_t dynamic_window_position_index = 0;
3719 uint8_t in_unicode_mode = 0;
3720 uint8_t unicode_character_set = 0;
3721
3722 if( utf8_string == NULL )
3723 {
3724 libcerror_error_set(
3725 error,
3726 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3727 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3728 "%s: invalid UTF-8 string.",
3729 function );
3730
3731 return( -1 );
3732 }
3733 if( utf8_string_size > (size_t) SSIZE_MAX )
3734 {
3735 libcerror_error_set(
3736 error,
3737 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3738 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3739 "%s: invalid UTF-8 string size value exceeds maximum.",
3740 function );
3741
3742 return( -1 );
3743 }
3744 if( utf8_string_index == NULL )
3745 {
3746 libcerror_error_set(
3747 error,
3748 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3749 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3750 "%s: invalid UTF-8 string index.",
3751 function );
3752
3753 return( -1 );
3754 }
3755 if( scsu_stream == NULL )
3756 {
3757 libcerror_error_set(
3758 error,
3759 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3760 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3761 "%s: invalid SCSU stream.",
3762 function );
3763
3764 return( -1 );
3765 }
3766 if( scsu_stream_size > (size_t) SSIZE_MAX )
3767 {
3768 libcerror_error_set(
3769 error,
3770 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3771 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3772 "%s: invalid SCSU stream size value exceeds maximum.",
3773 function );
3774
3775 return( -1 );
3776 }
3777 if( scsu_stream_size == 0 )
3778 {
3779 libcerror_error_set(
3780 error,
3781 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3782 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
3783 "%s: missing SCSU stream value.",
3784 function );
3785
3786 return( -1 );
3787 }
3788 while( scsu_stream_index < scsu_stream_size )
3789 {
3790 unicode_character_set = 0;
3791
3792 if( scsu_stream_index >= scsu_stream_size )
3793 {
3794 libcerror_error_set(
3795 error,
3796 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3797 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3798 "%s: SCSU stream too small.",
3799 function );
3800
3801 return( -1 );
3802 }
3803 byte_value1 = scsu_stream[ scsu_stream_index++ ];
3804
3805 if( in_unicode_mode != 0 )
3806 {
3807 if( ( byte_value1 <= 0xdf )
3808 || ( byte_value1 >= 0xf3 ) )
3809 {
3810 if( scsu_stream_index >= scsu_stream_size )
3811 {
3812 libcerror_error_set(
3813 error,
3814 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3815 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3816 "%s: SCSU stream too small.",
3817 function );
3818
3819 return( -1 );
3820 }
3821 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3822
3823 unicode_character = byte_value1;
3824 unicode_character <<= 8;
3825 unicode_character |= byte_value2;
3826
3827 unicode_character_set = 1;
3828 }
3829 /* UCn tags
3830 */
3831 else if( ( byte_value1 >= 0xe0 )
3832 && ( byte_value1 <= 0xe7 ) )
3833 {
3834 dynamic_window_position_index = byte_value1 - 0xe0;
3835
3836 in_unicode_mode = 0;
3837 }
3838 /* UDn tags
3839 */
3840 else if( ( byte_value1 >= 0xe8 )
3841 && ( byte_value1 <= 0xef ) )
3842 {
3843 if( scsu_stream_index >= scsu_stream_size )
3844 {
3845 libcerror_error_set(
3846 error,
3847 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3848 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3849 "%s: SCSU stream too small.",
3850 function );
3851
3852 return( -1 );
3853 }
3854 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3855
3856 dynamic_window_position_index = byte_value1 - 0xe8;
3857 scsu_window_position = libuna_scsu_window_offset_table[ byte_value2 ];
3858
3859 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3860
3861 in_unicode_mode = 0;
3862 }
3863 /* UQU tag
3864 */
3865 else if( byte_value1 == 0xf0 )
3866 {
3867 if( ( scsu_stream_size < 2 )
3868 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3869 {
3870 libcerror_error_set(
3871 error,
3872 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3873 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3874 "%s: SCSU stream too small.",
3875 function );
3876
3877 return( -1 );
3878 }
3879 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3880 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3881
3882 unicode_character = byte_value2;
3883 unicode_character <<= 8;
3884 unicode_character |= byte_value3;
3885
3886 unicode_character_set = 1;
3887 }
3888 /* UDX tag
3889 */
3890 else if( byte_value1 == 0xf1 )
3891 {
3892 if( ( scsu_stream_size < 2 )
3893 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3894 {
3895 libcerror_error_set(
3896 error,
3897 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3898 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3899 "%s: SCSU stream too small.",
3900 function );
3901
3902 return( -1 );
3903 }
3904 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3905 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3906
3907 dynamic_window_position_index = byte_value2 >> 5;
3908 scsu_window_position = byte_value2 & 0x1f;
3909 scsu_window_position <<= 8;
3910 scsu_window_position |= byte_value3;
3911 scsu_window_position <<= 7;
3912 scsu_window_position += 0x00010000UL;
3913
3914 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3915
3916 in_unicode_mode = 0;
3917 }
3918 }
3919 else
3920 {
3921 if( ( byte_value1 == 0x00 )
3922 || ( byte_value1 == 0x09 )
3923 || ( byte_value1 == 0x0a )
3924 || ( byte_value1 == 0x0c )
3925 || ( byte_value1 == 0x0d )
3926 || ( ( byte_value1 >= 0x20 )
3927 && ( byte_value1 <= 0x7f ) ) )
3928 {
3929 unicode_character = byte_value1;
3930
3931 unicode_character_set = 1;
3932 }
3933 /* SQn tags
3934 */
3935 else if( ( byte_value1 >= 0x01 )
3936 && ( byte_value1 <= 0x08 ) )
3937 {
3938 if( scsu_stream_index >= scsu_stream_size )
3939 {
3940 libcerror_error_set(
3941 error,
3942 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3943 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3944 "%s: SCSU stream too small.",
3945 function );
3946
3947 return( -1 );
3948 }
3949 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3950
3951 unicode_character = byte_value2;
3952
3953 if( byte_value2 < 0x80 )
3954 {
3955 unicode_character += libuna_scsu_static_window_positions[ byte_value1 - 0x01 ];
3956 }
3957 else
3958 {
3959 unicode_character -= 0x80;
3960 unicode_character += scsu_dynamic_window_positions[ byte_value1 - 0x01 ];
3961 }
3962 unicode_character_set = 1;
3963 }
3964 /* SDX tag
3965 */
3966 else if( byte_value1 == 0x0b )
3967 {
3968 if( ( scsu_stream_size < 2 )
3969 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3970 {
3971 libcerror_error_set(
3972 error,
3973 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3974 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3975 "%s: SCSU stream too small.",
3976 function );
3977
3978 return( -1 );
3979 }
3980 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3981 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3982
3983 dynamic_window_position_index = byte_value2 >> 5;
3984 scsu_window_position = byte_value2 & 0x1f;
3985 scsu_window_position <<= 8;
3986 scsu_window_position |= byte_value3;
3987 scsu_window_position <<= 7;
3988 scsu_window_position += 0x00010000UL;
3989
3990 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3991 }
3992 /* SQU tag
3993 */
3994 else if( byte_value1 == 0x0e )
3995 {
3996 if( ( scsu_stream_size < 2 )
3997 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3998 {
3999 libcerror_error_set(
4000 error,
4001 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4002 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4003 "%s: SCSU stream too small.",
4004 function );
4005
4006 return( -1 );
4007 }
4008 byte_value2 = scsu_stream[ scsu_stream_index++ ];
4009 byte_value3 = scsu_stream[ scsu_stream_index++ ];
4010
4011 unicode_character = byte_value2;
4012 unicode_character <<= 8;
4013 unicode_character |= byte_value3;
4014
4015 unicode_character_set = 1;
4016 }
4017 /* SCU tag
4018 */
4019 else if( byte_value1 == 0x0f )
4020 {
4021 in_unicode_mode = 1;
4022 }
4023 /* SCn tags
4024 */
4025 else if( ( byte_value1 >= 0x10 )
4026 && ( byte_value1 <= 0x17 ) )
4027 {
4028 dynamic_window_position_index = byte_value1 - 0x10;
4029 }
4030 /* SDn tags
4031 */
4032 else if( ( byte_value1 >= 0x18 )
4033 && ( byte_value1 <= 0x1f ) )
4034 {
4035 if( scsu_stream_index >= scsu_stream_size )
4036 {
4037 libcerror_error_set(
4038 error,
4039 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4040 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4041 "%s: SCSU stream too small.",
4042 function );
4043
4044 return( -1 );
4045 }
4046 byte_value2 = scsu_stream[ scsu_stream_index++ ];
4047
4048 dynamic_window_position_index = byte_value1 - 0x18;
4049 scsu_window_position = libuna_scsu_window_offset_table[ byte_value2 ];
4050
4051 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
4052 }
4053 else if( byte_value1 >= 0x80 )
4054 {
4055 unicode_character = byte_value1 - 0x80;
4056 unicode_character += scsu_dynamic_window_positions[ dynamic_window_position_index ];
4057
4058 unicode_character_set = 1;
4059 }
4060 }
4061 if( unicode_character_set != 0 )
4062 {
4063 /* Convert the Unicode character into UTF-8 character bytes
4064 */
4065 if( libuna_unicode_character_copy_to_utf8(
4066 unicode_character,
4067 utf8_string,
4068 utf8_string_size,
4069 utf8_string_index,
4070 error ) != 1 )
4071 {
4072 libcerror_error_set(
4073 error,
4074 LIBCERROR_ERROR_DOMAIN_CONVERSION,
4075 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
4076 "%s: unable to copy Unicode character to UTF-8.",
4077 function );
4078
4079 return( -1 );
4080 }
4081 if( unicode_character == 0 )
4082 {
4083 break;
4084 }
4085 }
4086 }
4087 /* Check if the string is terminated with an end-of-string character
4088 */
4089 if( unicode_character != 0 )
4090 {
4091 if( *utf8_string_index >= utf8_string_size )
4092 {
4093 libcerror_error_set(
4094 error,
4095 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4096 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4097 "%s: UTF-8 string too small.",
4098 function );
4099
4100 return( -1 );
4101 }
4102 utf8_string[ *utf8_string_index ] = 0;
4103
4104 *utf8_string_index += 1;
4105 }
4106 return( 1 );
4107 }
4108
4109