1 /*
2 * UTF-32 string functions
3 *
4 * Copyright (C) 2008-2020, Joachim Metz <joachim.metz@gmail.com>
5 *
6 * Refer to AUTHORS for acknowledgements.
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this program. If not, see <https://www.gnu.org/licenses/>.
20 */
21
22 #include <common.h>
23 #include <types.h>
24
25 #include "libuna_definitions.h"
26 #include "libuna_libcerror.h"
27 #include "libuna_scsu.h"
28 #include "libuna_types.h"
29 #include "libuna_unicode_character.h"
30 #include "libuna_utf32_string.h"
31
32 /* Determines the size of an UTF-32 string from a byte stream
33 * Returns 1 if successful or -1 on error
34 */
libuna_utf32_string_size_from_byte_stream(const uint8_t * byte_stream,size_t byte_stream_size,int codepage,size_t * utf32_string_size,libcerror_error_t ** error)35 int libuna_utf32_string_size_from_byte_stream(
36 const uint8_t *byte_stream,
37 size_t byte_stream_size,
38 int codepage,
39 size_t *utf32_string_size,
40 libcerror_error_t **error )
41 {
42 static char *function = "libuna_utf32_string_size_from_byte_stream";
43 size_t byte_stream_index = 0;
44 libuna_unicode_character_t unicode_character = 0;
45
46 if( byte_stream == NULL )
47 {
48 libcerror_error_set(
49 error,
50 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
51 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
52 "%s: invalid byte stream.",
53 function );
54
55 return( -1 );
56 }
57 if( byte_stream_size > (size_t) SSIZE_MAX )
58 {
59 libcerror_error_set(
60 error,
61 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
62 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
63 "%s: invalid byte stream size value exceeds maximum.",
64 function );
65
66 return( -1 );
67 }
68 if( utf32_string_size == NULL )
69 {
70 libcerror_error_set(
71 error,
72 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
73 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
74 "%s: invalid UTF-32 string size.",
75 function );
76
77 return( -1 );
78 }
79 *utf32_string_size = 0;
80
81 if( byte_stream_size == 0 )
82 {
83 return( 1 );
84 }
85 while( byte_stream_index < byte_stream_size )
86 {
87 /* Convert the byte stream bytes into an Unicode character
88 */
89 if( libuna_unicode_character_copy_from_byte_stream(
90 &unicode_character,
91 byte_stream,
92 byte_stream_size,
93 &byte_stream_index,
94 codepage,
95 error ) != 1 )
96 {
97 libcerror_error_set(
98 error,
99 LIBCERROR_ERROR_DOMAIN_CONVERSION,
100 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
101 "%s: unable to copy Unicode character from byte stream.",
102 function );
103
104 return( -1 );
105 }
106 /* Determine how many UTF-32 character byte double words are required
107 */
108 if( libuna_unicode_character_size_to_utf32(
109 unicode_character,
110 utf32_string_size,
111 error ) != 1 )
112 {
113 libcerror_error_set(
114 error,
115 LIBCERROR_ERROR_DOMAIN_CONVERSION,
116 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
117 "%s: unable to unable to determine size of Unicode character in UTF-32.",
118 function );
119
120 return( -1 );
121 }
122 }
123 /* Check if the string is terminated with an end-of-string character
124 */
125 if( unicode_character != 0 )
126 {
127 *utf32_string_size += 1;
128 }
129 return( 1 );
130 }
131
132 /* Copies an UTF-32 string from a byte stream
133 * Returns 1 if successful or -1 on error
134 */
libuna_utf32_string_copy_from_byte_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint8_t * byte_stream,size_t byte_stream_size,int codepage,libcerror_error_t ** error)135 int libuna_utf32_string_copy_from_byte_stream(
136 libuna_utf32_character_t *utf32_string,
137 size_t utf32_string_size,
138 const uint8_t *byte_stream,
139 size_t byte_stream_size,
140 int codepage,
141 libcerror_error_t **error )
142 {
143 static char *function = "libuna_utf32_string_copy_from_byte_stream";
144 size_t utf32_string_index = 0;
145
146 if( libuna_utf32_string_with_index_copy_from_byte_stream(
147 utf32_string,
148 utf32_string_size,
149 &utf32_string_index,
150 byte_stream,
151 byte_stream_size,
152 codepage,
153 error ) != 1 )
154 {
155 libcerror_error_set(
156 error,
157 LIBCERROR_ERROR_DOMAIN_RUNTIME,
158 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
159 "%s: unable to copy byte stream to UTF-32 string.",
160 function );
161
162 return( -1 );
163 }
164 return( 1 );
165 }
166
167 /* Copies an UTF-32 string from a byte stream
168 * Returns 1 if successful or -1 on error
169 */
libuna_utf32_string_with_index_copy_from_byte_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,const uint8_t * byte_stream,size_t byte_stream_size,int codepage,libcerror_error_t ** error)170 int libuna_utf32_string_with_index_copy_from_byte_stream(
171 libuna_utf32_character_t *utf32_string,
172 size_t utf32_string_size,
173 size_t *utf32_string_index,
174 const uint8_t *byte_stream,
175 size_t byte_stream_size,
176 int codepage,
177 libcerror_error_t **error )
178 {
179 static char *function = "libuna_utf32_string_with_index_copy_from_byte_stream";
180 size_t byte_stream_index = 0;
181 libuna_unicode_character_t unicode_character = 0;
182
183 if( utf32_string == NULL )
184 {
185 libcerror_error_set(
186 error,
187 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
188 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
189 "%s: invalid UTF-32 string.",
190 function );
191
192 return( -1 );
193 }
194 if( utf32_string_size > (size_t) SSIZE_MAX )
195 {
196 libcerror_error_set(
197 error,
198 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
199 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
200 "%s: invalid UTF-32 string size value exceeds maximum.",
201 function );
202
203 return( -1 );
204 }
205 if( utf32_string_index == NULL )
206 {
207 libcerror_error_set(
208 error,
209 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
210 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
211 "%s: invalid UTF-32 string index.",
212 function );
213
214 return( -1 );
215 }
216 if( byte_stream == NULL )
217 {
218 libcerror_error_set(
219 error,
220 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
221 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
222 "%s: invalid byte stream.",
223 function );
224
225 return( -1 );
226 }
227 if( byte_stream_size > (size_t) SSIZE_MAX )
228 {
229 libcerror_error_set(
230 error,
231 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
232 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
233 "%s: invalid byte stream size value exceeds maximum.",
234 function );
235
236 return( -1 );
237 }
238 if( byte_stream_size == 0 )
239 {
240 libcerror_error_set(
241 error,
242 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
243 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
244 "%s: missing byte stream value.",
245 function );
246
247 return( -1 );
248 }
249 while( byte_stream_index < byte_stream_size )
250 {
251 /* Convert the byte stream bytes into an Unicode character
252 */
253 if( libuna_unicode_character_copy_from_byte_stream(
254 &unicode_character,
255 byte_stream,
256 byte_stream_size,
257 &byte_stream_index,
258 codepage,
259 error ) != 1 )
260 {
261 libcerror_error_set(
262 error,
263 LIBCERROR_ERROR_DOMAIN_CONVERSION,
264 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
265 "%s: unable to copy Unicode character from byte stream.",
266 function );
267
268 return( -1 );
269 }
270 /* Convert the Unicode character into UTF-32 character byte double words
271 */
272 if( libuna_unicode_character_copy_to_utf32(
273 unicode_character,
274 utf32_string,
275 utf32_string_size,
276 utf32_string_index,
277 error ) != 1 )
278 {
279 libcerror_error_set(
280 error,
281 LIBCERROR_ERROR_DOMAIN_CONVERSION,
282 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
283 "%s: unable to copy Unicode character to UTF-32.",
284 function );
285
286 return( -1 );
287 }
288 }
289 /* Check if the string is terminated with an end-of-string character
290 */
291 if( unicode_character != 0 )
292 {
293 if( *utf32_string_index >= utf32_string_size )
294 {
295 libcerror_error_set(
296 error,
297 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
298 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
299 "%s: UTF-32 string too small.",
300 function );
301
302 return( -1 );
303 }
304 utf32_string[ *utf32_string_index ] = 0;
305
306 *utf32_string_index += 1;
307 }
308 return( 1 );
309 }
310
311 /* Compares an UTF-32 string with a byte stream
312 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
313 */
libuna_utf32_string_compare_with_byte_stream(const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint8_t * byte_stream,size_t byte_stream_size,int codepage,libcerror_error_t ** error)314 int libuna_utf32_string_compare_with_byte_stream(
315 const libuna_utf32_character_t *utf32_string,
316 size_t utf32_string_size,
317 const uint8_t *byte_stream,
318 size_t byte_stream_size,
319 int codepage,
320 libcerror_error_t **error )
321 {
322 static char *function = "libuna_utf32_string_compare_with_byte_stream";
323 size_t byte_stream_index = 0;
324 size_t utf32_string_index = 0;
325 libuna_unicode_character_t utf32_unicode_character = 0;
326 libuna_unicode_character_t byte_stream_unicode_character = 0;
327
328 if( utf32_string == NULL )
329 {
330 libcerror_error_set(
331 error,
332 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
333 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
334 "%s: invalid UTF-32 string.",
335 function );
336
337 return( -1 );
338 }
339 if( utf32_string_size > (size_t) SSIZE_MAX )
340 {
341 libcerror_error_set(
342 error,
343 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
344 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
345 "%s: invalid UTF-32 string size value exceeds maximum.",
346 function );
347
348 return( -1 );
349 }
350 if( byte_stream == NULL )
351 {
352 libcerror_error_set(
353 error,
354 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
355 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
356 "%s: invalid byte stream.",
357 function );
358
359 return( -1 );
360 }
361 if( byte_stream_size > (size_t) SSIZE_MAX )
362 {
363 libcerror_error_set(
364 error,
365 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
366 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
367 "%s: invalid byte stream size value exceeds maximum.",
368 function );
369
370 return( -1 );
371 }
372 if( byte_stream_size == 0 )
373 {
374 libcerror_error_set(
375 error,
376 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
377 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
378 "%s: missing byte stream value.",
379 function );
380
381 return( -1 );
382 }
383 if( ( utf32_string_size >= 1 )
384 && ( utf32_string[ utf32_string_size - 1 ] == 0 ) )
385 {
386 utf32_string_size -= 1;
387 }
388 /* Check if the byte stream is terminated with zero bytes
389 */
390 if( ( byte_stream_size >= 1 )
391 && ( byte_stream[ byte_stream_size - 1 ] == 0 ) )
392 {
393 byte_stream_size -= 1;
394 }
395 while( ( utf32_string_index < utf32_string_size )
396 && ( byte_stream_index < byte_stream_size ) )
397 {
398 /* Convert the UTF-32 character bytes into an Unicode character
399 */
400 if( libuna_unicode_character_copy_from_utf32(
401 &utf32_unicode_character,
402 utf32_string,
403 utf32_string_size,
404 &utf32_string_index,
405 error ) != 1 )
406 {
407 libcerror_error_set(
408 error,
409 LIBCERROR_ERROR_DOMAIN_CONVERSION,
410 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
411 "%s: unable to copy Unicode character from UTF-32.",
412 function );
413
414 return( -1 );
415 }
416 /* Convert the byte stream bytes into an Unicode character
417 */
418 if( libuna_unicode_character_copy_from_byte_stream(
419 &byte_stream_unicode_character,
420 byte_stream,
421 byte_stream_size,
422 &byte_stream_index,
423 codepage,
424 error ) != 1 )
425 {
426 libcerror_error_set(
427 error,
428 LIBCERROR_ERROR_DOMAIN_CONVERSION,
429 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
430 "%s: unable to copy Unicode character from byte stream.",
431 function );
432
433 return( -1 );
434 }
435 if( utf32_unicode_character < byte_stream_unicode_character )
436 {
437 return( LIBUNA_COMPARE_LESS );
438 }
439 else if( utf32_unicode_character > byte_stream_unicode_character )
440 {
441 return( LIBUNA_COMPARE_GREATER );
442 }
443 }
444 /* Check if both strings were entirely processed
445 */
446 if( utf32_string_index < utf32_string_size )
447 {
448 return( LIBUNA_COMPARE_GREATER );
449 }
450 else if( byte_stream_index < byte_stream_size )
451 {
452 return( LIBUNA_COMPARE_LESS );
453 }
454 return( LIBUNA_COMPARE_EQUAL );
455 }
456
457 /* Determines the size of an UTF-32 string from an UTF-7 stream
458 * Returns 1 if successful or -1 on error
459 */
libuna_utf32_string_size_from_utf7_stream(const uint8_t * utf7_stream,size_t utf7_stream_size,size_t * utf32_string_size,libcerror_error_t ** error)460 int libuna_utf32_string_size_from_utf7_stream(
461 const uint8_t *utf7_stream,
462 size_t utf7_stream_size,
463 size_t *utf32_string_size,
464 libcerror_error_t **error )
465 {
466 static char *function = "libuna_utf32_string_size_from_utf7_stream";
467 size_t utf7_stream_index = 0;
468 libuna_unicode_character_t unicode_character = 0;
469 uint32_t utf7_stream_base64_data = 0;
470
471 if( utf7_stream == NULL )
472 {
473 libcerror_error_set(
474 error,
475 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
476 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
477 "%s: invalid UTF-7 stream.",
478 function );
479
480 return( -1 );
481 }
482 if( utf7_stream_size > (size_t) SSIZE_MAX )
483 {
484 libcerror_error_set(
485 error,
486 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
487 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
488 "%s: invalid UTF-7 stream size value exceeds maximum.",
489 function );
490
491 return( -1 );
492 }
493 if( utf32_string_size == NULL )
494 {
495 libcerror_error_set(
496 error,
497 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
498 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
499 "%s: invalid UTF-32 string size.",
500 function );
501
502 return( -1 );
503 }
504 *utf32_string_size = 0;
505
506 if( utf7_stream_size == 0 )
507 {
508 return( 1 );
509 }
510 while( utf7_stream_index < utf7_stream_size )
511 {
512 /* Convert the UTF-7 stream bytes into an Unicode character
513 */
514 if( libuna_unicode_character_copy_from_utf7_stream(
515 &unicode_character,
516 utf7_stream,
517 utf7_stream_size,
518 &utf7_stream_index,
519 &utf7_stream_base64_data,
520 error ) != 1 )
521 {
522 libcerror_error_set(
523 error,
524 LIBCERROR_ERROR_DOMAIN_CONVERSION,
525 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
526 "%s: unable to copy Unicode character from UTF-7 stream.",
527 function );
528
529 return( -1 );
530 }
531 /* Determine how many UTF-32 character bytes are required
532 */
533 if( libuna_unicode_character_size_to_utf32(
534 unicode_character,
535 utf32_string_size,
536 error ) != 1 )
537 {
538 libcerror_error_set(
539 error,
540 LIBCERROR_ERROR_DOMAIN_CONVERSION,
541 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
542 "%s: unable to unable to determine size of Unicode character in UTF-32.",
543 function );
544
545 return( -1 );
546 }
547 }
548 /* Check if the string is terminated with an end-of-string character
549 */
550 if( unicode_character != 0 )
551 {
552 *utf32_string_size += 1;
553 }
554 return( 1 );
555 }
556
557 /* Copies an UTF-32 string from an UTF-7 stream
558 * Returns 1 if successful or -1 on error
559 */
libuna_utf32_string_copy_from_utf7_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint8_t * utf7_stream,size_t utf7_stream_size,libcerror_error_t ** error)560 int libuna_utf32_string_copy_from_utf7_stream(
561 libuna_utf32_character_t *utf32_string,
562 size_t utf32_string_size,
563 const uint8_t *utf7_stream,
564 size_t utf7_stream_size,
565 libcerror_error_t **error )
566 {
567 static char *function = "libuna_utf32_string_copy_from_utf7_stream";
568 size_t utf32_string_index = 0;
569
570 if( libuna_utf32_string_with_index_copy_from_utf7_stream(
571 utf32_string,
572 utf32_string_size,
573 &utf32_string_index,
574 utf7_stream,
575 utf7_stream_size,
576 error ) != 1 )
577 {
578 libcerror_error_set(
579 error,
580 LIBCERROR_ERROR_DOMAIN_RUNTIME,
581 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
582 "%s: unable to UTF-7 stream to UTF-32 string.",
583 function );
584
585 return( -1 );
586 }
587 return( 1 );
588 }
589
590 /* Copies an UTF-32 string from an UTF-7 stream
591 * Returns 1 if successful or -1 on error
592 */
libuna_utf32_string_with_index_copy_from_utf7_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,const uint8_t * utf7_stream,size_t utf7_stream_size,libcerror_error_t ** error)593 int libuna_utf32_string_with_index_copy_from_utf7_stream(
594 libuna_utf32_character_t *utf32_string,
595 size_t utf32_string_size,
596 size_t *utf32_string_index,
597 const uint8_t *utf7_stream,
598 size_t utf7_stream_size,
599 libcerror_error_t **error )
600 {
601 static char *function = "libuna_utf32_string_with_index_copy_from_utf7_stream";
602 size_t utf7_stream_index = 0;
603 libuna_unicode_character_t unicode_character = 0;
604 uint32_t utf7_stream_base64_data = 0;
605
606 if( utf32_string == NULL )
607 {
608 libcerror_error_set(
609 error,
610 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
611 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
612 "%s: invalid UTF-32 string.",
613 function );
614
615 return( -1 );
616 }
617 if( utf32_string_size > (size_t) SSIZE_MAX )
618 {
619 libcerror_error_set(
620 error,
621 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
622 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
623 "%s: invalid UTF-32 string size value exceeds maximum.",
624 function );
625
626 return( -1 );
627 }
628 if( utf32_string_index == NULL )
629 {
630 libcerror_error_set(
631 error,
632 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
633 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
634 "%s: invalid UTF-32 string index.",
635 function );
636
637 return( -1 );
638 }
639 if( utf7_stream == NULL )
640 {
641 libcerror_error_set(
642 error,
643 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
644 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
645 "%s: invalid UTF-7 stream.",
646 function );
647
648 return( -1 );
649 }
650 if( utf7_stream_size > (size_t) SSIZE_MAX )
651 {
652 libcerror_error_set(
653 error,
654 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
655 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
656 "%s: invalid UTF-7 stream size value exceeds maximum.",
657 function );
658
659 return( -1 );
660 }
661 if( utf7_stream_size == 0 )
662 {
663 libcerror_error_set(
664 error,
665 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
666 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
667 "%s: missing UTF-7 stream value.",
668 function );
669
670 return( -1 );
671 }
672 while( utf7_stream_index < utf7_stream_size )
673 {
674 /* Convert the UTF-7 stream bytes into an Unicode character
675 */
676 if( libuna_unicode_character_copy_from_utf7_stream(
677 &unicode_character,
678 utf7_stream,
679 utf7_stream_size,
680 &utf7_stream_index,
681 &utf7_stream_base64_data,
682 error ) != 1 )
683 {
684 libcerror_error_set(
685 error,
686 LIBCERROR_ERROR_DOMAIN_CONVERSION,
687 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
688 "%s: unable to copy Unicode character from UTF-7 stream.",
689 function );
690
691 return( -1 );
692 }
693 /* Convert the Unicode character into UTF-32 character bytes
694 */
695 if( libuna_unicode_character_copy_to_utf32(
696 unicode_character,
697 utf32_string,
698 utf32_string_size,
699 utf32_string_index,
700 error ) != 1 )
701 {
702 libcerror_error_set(
703 error,
704 LIBCERROR_ERROR_DOMAIN_CONVERSION,
705 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
706 "%s: unable to copy Unicode character to UTF-32.",
707 function );
708
709 return( -1 );
710 }
711 }
712 /* Check if the string is terminated with an end-of-string character
713 */
714 if( unicode_character != 0 )
715 {
716 if( *utf32_string_index >= utf32_string_size )
717 {
718 libcerror_error_set(
719 error,
720 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
721 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
722 "%s: UTF-32 string too small.",
723 function );
724
725 return( -1 );
726 }
727 utf32_string[ *utf32_string_index ] = 0;
728
729 *utf32_string_index += 1;
730 }
731 return( 1 );
732 }
733
734 /* Compares an UTF-32 string with an UTF-7 stream
735 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
736 */
libuna_utf32_string_compare_with_utf7_stream(const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint8_t * utf7_stream,size_t utf7_stream_size,libcerror_error_t ** error)737 int libuna_utf32_string_compare_with_utf7_stream(
738 const libuna_utf32_character_t *utf32_string,
739 size_t utf32_string_size,
740 const uint8_t *utf7_stream,
741 size_t utf7_stream_size,
742 libcerror_error_t **error )
743 {
744 static char *function = "libuna_utf32_string_compare_with_utf7_stream";
745 size_t utf32_string_index = 0;
746 size_t utf7_stream_index = 0;
747 libuna_unicode_character_t utf32_unicode_character = 0;
748 libuna_unicode_character_t utf7_stream_unicode_character = 0;
749 uint32_t utf7_stream_base64_data = 0;
750
751 if( utf32_string == NULL )
752 {
753 libcerror_error_set(
754 error,
755 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
756 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
757 "%s: invalid UTF-32 string.",
758 function );
759
760 return( -1 );
761 }
762 if( utf32_string_size > (size_t) SSIZE_MAX )
763 {
764 libcerror_error_set(
765 error,
766 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
767 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
768 "%s: invalid UTF-32 string size value exceeds maximum.",
769 function );
770
771 return( -1 );
772 }
773 if( utf7_stream == NULL )
774 {
775 libcerror_error_set(
776 error,
777 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
778 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
779 "%s: invalid UTF-7 stream.",
780 function );
781
782 return( -1 );
783 }
784 if( utf7_stream_size > (size_t) SSIZE_MAX )
785 {
786 libcerror_error_set(
787 error,
788 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
789 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
790 "%s: invalid UTF-7 stream size value exceeds maximum.",
791 function );
792
793 return( -1 );
794 }
795 if( utf7_stream_size == 0 )
796 {
797 libcerror_error_set(
798 error,
799 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
800 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
801 "%s: missing UTF-7 stream value.",
802 function );
803
804 return( -1 );
805 }
806 if( ( utf32_string_size >= 1 )
807 && ( utf32_string[ utf32_string_size - 1 ] == 0 ) )
808 {
809 utf32_string_size -= 1;
810 }
811 /* Check if the UTF-7 stream is terminated with zero bytes
812 */
813 if( ( utf7_stream_size >= 1 )
814 && ( utf7_stream[ utf7_stream_size - 1 ] == 0 ) )
815 {
816 utf7_stream_size -= 1;
817 }
818 while( ( utf32_string_index < utf32_string_size )
819 && ( utf7_stream_index < utf7_stream_size ) )
820 {
821 /* Convert the UTF-32 character bytes into an Unicode character
822 */
823 if( libuna_unicode_character_copy_from_utf32(
824 &utf32_unicode_character,
825 utf32_string,
826 utf32_string_size,
827 &utf32_string_index,
828 error ) != 1 )
829 {
830 libcerror_error_set(
831 error,
832 LIBCERROR_ERROR_DOMAIN_CONVERSION,
833 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
834 "%s: unable to copy Unicode character from UTF-32.",
835 function );
836
837 return( -1 );
838 }
839 /* Convert the UTF-7 character bytes into an Unicode character
840 */
841 if( libuna_unicode_character_copy_from_utf7_stream(
842 &utf7_stream_unicode_character,
843 utf7_stream,
844 utf7_stream_size,
845 &utf7_stream_index,
846 &utf7_stream_base64_data,
847 error ) != 1 )
848 {
849 libcerror_error_set(
850 error,
851 LIBCERROR_ERROR_DOMAIN_CONVERSION,
852 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
853 "%s: unable to copy Unicode character from UTF-7 stream.",
854 function );
855
856 return( -1 );
857 }
858 if( utf32_unicode_character < utf7_stream_unicode_character )
859 {
860 return( LIBUNA_COMPARE_LESS );
861 }
862 else if( utf32_unicode_character > utf7_stream_unicode_character )
863 {
864 return( LIBUNA_COMPARE_GREATER );
865 }
866 }
867 /* Check if both strings were entirely processed
868 */
869 if( utf32_string_index < utf32_string_size )
870 {
871 return( LIBUNA_COMPARE_GREATER );
872 }
873 else if( utf7_stream_index < utf7_stream_size )
874 {
875 return( LIBUNA_COMPARE_LESS );
876 }
877 return( LIBUNA_COMPARE_EQUAL );
878 }
879
880 /* Determines the size of an UTF-32 string from an UTF-8 string
881 * Returns 1 if successful or -1 on error
882 */
libuna_utf32_string_size_from_utf8(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf32_string_size,libcerror_error_t ** error)883 int libuna_utf32_string_size_from_utf8(
884 const libuna_utf8_character_t *utf8_string,
885 size_t utf8_string_size,
886 size_t *utf32_string_size,
887 libcerror_error_t **error )
888 {
889 static char *function = "libuna_utf32_string_size_from_utf8";
890 size_t utf8_string_index = 0;
891 libuna_unicode_character_t unicode_character = 0;
892
893 if( utf8_string == NULL )
894 {
895 libcerror_error_set(
896 error,
897 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
898 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
899 "%s: invalid UTF-8 string.",
900 function );
901
902 return( -1 );
903 }
904 if( utf8_string_size > (size_t) SSIZE_MAX )
905 {
906 libcerror_error_set(
907 error,
908 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
909 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
910 "%s: invalid UTF-8 string size value exceeds maximum.",
911 function );
912
913 return( -1 );
914 }
915 if( utf32_string_size == NULL )
916 {
917 libcerror_error_set(
918 error,
919 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
920 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
921 "%s: invalid UTF-32 string size.",
922 function );
923
924 return( -1 );
925 }
926 *utf32_string_size = 0;
927
928 if( utf8_string_size == 0 )
929 {
930 return( 1 );
931 }
932 while( utf8_string_index < utf8_string_size )
933 {
934 /* Convert the UTF-8 character bytes into an Unicode character
935 */
936 if( libuna_unicode_character_copy_from_utf8(
937 &unicode_character,
938 utf8_string,
939 utf8_string_size,
940 &utf8_string_index,
941 error ) != 1 )
942 {
943 libcerror_error_set(
944 error,
945 LIBCERROR_ERROR_DOMAIN_CONVERSION,
946 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
947 "%s: unable to copy Unicode character from UTF-8.",
948 function );
949
950 return( -1 );
951 }
952 /* Determine how many UTF-32 character bytes are required
953 */
954 if( libuna_unicode_character_size_to_utf32(
955 unicode_character,
956 utf32_string_size,
957 error ) != 1 )
958 {
959 libcerror_error_set(
960 error,
961 LIBCERROR_ERROR_DOMAIN_CONVERSION,
962 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
963 "%s: unable to unable to determine size of Unicode character in UTF-32.",
964 function );
965
966 return( -1 );
967 }
968 }
969 /* Check if the string is terminated with an end-of-string character
970 */
971 if( unicode_character != 0 )
972 {
973 *utf32_string_size += 1;
974 }
975 return( 1 );
976 }
977
978 /* Copies an UTF-32 string from an UTF-8 string
979 * Returns 1 if successful or -1 on error
980 */
libuna_utf32_string_copy_from_utf8(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,libcerror_error_t ** error)981 int libuna_utf32_string_copy_from_utf8(
982 libuna_utf32_character_t *utf32_string,
983 size_t utf32_string_size,
984 const libuna_utf8_character_t *utf8_string,
985 size_t utf8_string_size,
986 libcerror_error_t **error )
987 {
988 static char *function = "libuna_utf32_string_copy_from_utf8";
989 size_t utf32_string_index = 0;
990
991 if( libuna_utf32_string_with_index_copy_from_utf8(
992 utf32_string,
993 utf32_string_size,
994 &utf32_string_index,
995 utf8_string,
996 utf8_string_size,
997 error ) != 1 )
998 {
999 libcerror_error_set(
1000 error,
1001 LIBCERROR_ERROR_DOMAIN_RUNTIME,
1002 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1003 "%s: unable to copy UTF-8 string to UTF-32 string.",
1004 function );
1005
1006 return( -1 );
1007 }
1008 return( 1 );
1009 }
1010
1011 /* Copies an UTF-32 string from an UTF-8 string
1012 * Returns 1 if successful or -1 on error
1013 */
libuna_utf32_string_with_index_copy_from_utf8(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,libcerror_error_t ** error)1014 int libuna_utf32_string_with_index_copy_from_utf8(
1015 libuna_utf32_character_t *utf32_string,
1016 size_t utf32_string_size,
1017 size_t *utf32_string_index,
1018 const libuna_utf8_character_t *utf8_string,
1019 size_t utf8_string_size,
1020 libcerror_error_t **error )
1021 {
1022 static char *function = "libuna_utf32_string_with_index_copy_from_utf8";
1023 size_t utf8_string_index = 0;
1024 libuna_unicode_character_t unicode_character = 0;
1025
1026 if( utf32_string == NULL )
1027 {
1028 libcerror_error_set(
1029 error,
1030 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1031 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1032 "%s: invalid UTF-32 string.",
1033 function );
1034
1035 return( -1 );
1036 }
1037 if( utf32_string_size > (size_t) SSIZE_MAX )
1038 {
1039 libcerror_error_set(
1040 error,
1041 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1042 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1043 "%s: invalid UTF-32 string size value exceeds maximum.",
1044 function );
1045
1046 return( -1 );
1047 }
1048 if( utf32_string_index == NULL )
1049 {
1050 libcerror_error_set(
1051 error,
1052 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1053 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1054 "%s: invalid UTF-32 string index.",
1055 function );
1056
1057 return( -1 );
1058 }
1059 if( utf8_string == NULL )
1060 {
1061 libcerror_error_set(
1062 error,
1063 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1064 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1065 "%s: invalid UTF-8 string.",
1066 function );
1067
1068 return( -1 );
1069 }
1070 if( utf8_string_size > (size_t) SSIZE_MAX )
1071 {
1072 libcerror_error_set(
1073 error,
1074 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1075 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1076 "%s: invalid UTF-8 string size value exceeds maximum.",
1077 function );
1078
1079 return( -1 );
1080 }
1081 if( utf8_string_size == 0 )
1082 {
1083 libcerror_error_set(
1084 error,
1085 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1086 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1087 "%s: missing UTF-8 string value.",
1088 function );
1089
1090 return( -1 );
1091 }
1092 while( utf8_string_index < utf8_string_size )
1093 {
1094 /* Convert the UTF-8 character bytes into an Unicode character
1095 */
1096 if( libuna_unicode_character_copy_from_utf8(
1097 &unicode_character,
1098 utf8_string,
1099 utf8_string_size,
1100 &utf8_string_index,
1101 error ) != 1 )
1102 {
1103 libcerror_error_set(
1104 error,
1105 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1106 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1107 "%s: unable to copy Unicode character from UTF-8.",
1108 function );
1109
1110 return( -1 );
1111 }
1112 /* Convert the Unicode character into UTF-32 character bytes
1113 */
1114 if( libuna_unicode_character_copy_to_utf32(
1115 unicode_character,
1116 utf32_string,
1117 utf32_string_size,
1118 utf32_string_index,
1119 error ) != 1 )
1120 {
1121 libcerror_error_set(
1122 error,
1123 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1124 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1125 "%s: unable to copy Unicode character to UTF-32.",
1126 function );
1127
1128 return( -1 );
1129 }
1130 }
1131 /* Check if the string is terminated with an end-of-string character
1132 */
1133 if( unicode_character != 0 )
1134 {
1135 if( *utf32_string_index >= utf32_string_size )
1136 {
1137 libcerror_error_set(
1138 error,
1139 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1140 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1141 "%s: UTF-32 string too small.",
1142 function );
1143
1144 return( -1 );
1145 }
1146 utf32_string[ *utf32_string_index ] = 0;
1147
1148 *utf32_string_index += 1;
1149 }
1150 return( 1 );
1151 }
1152
1153 /* Determines the size of an UTF-32 string from an UTF-8 stream
1154 * Returns 1 if successful or -1 on error
1155 */
libuna_utf32_string_size_from_utf8_stream(const uint8_t * utf8_stream,size_t utf8_stream_size,size_t * utf32_string_size,libcerror_error_t ** error)1156 int libuna_utf32_string_size_from_utf8_stream(
1157 const uint8_t *utf8_stream,
1158 size_t utf8_stream_size,
1159 size_t *utf32_string_size,
1160 libcerror_error_t **error )
1161 {
1162 static char *function = "libuna_utf32_string_size_from_utf8_stream";
1163 size_t utf8_stream_index = 0;
1164 libuna_unicode_character_t unicode_character = 0;
1165
1166 if( utf8_stream == NULL )
1167 {
1168 libcerror_error_set(
1169 error,
1170 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1171 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1172 "%s: invalid UTF-8 stream.",
1173 function );
1174
1175 return( -1 );
1176 }
1177 if( utf8_stream_size > (size_t) SSIZE_MAX )
1178 {
1179 libcerror_error_set(
1180 error,
1181 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1182 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1183 "%s: invalid UTF-8 stream size value exceeds maximum.",
1184 function );
1185
1186 return( -1 );
1187 }
1188 if( utf32_string_size == NULL )
1189 {
1190 libcerror_error_set(
1191 error,
1192 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1193 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1194 "%s: invalid UTF-32 string size.",
1195 function );
1196
1197 return( -1 );
1198 }
1199 *utf32_string_size = 0;
1200
1201 if( utf8_stream_size == 0 )
1202 {
1203 return( 1 );
1204 }
1205 /* Check if UTF-8 stream starts with a byte order mark (BOM)
1206 */
1207 if( utf8_stream_size >= 3 )
1208 {
1209 if( ( utf8_stream[ 0 ] == 0x0ef )
1210 && ( utf8_stream[ 1 ] == 0x0bb )
1211 && ( utf8_stream[ 2 ] == 0x0bf ) )
1212 {
1213 utf8_stream_index += 3;
1214 }
1215 }
1216 while( utf8_stream_index < utf8_stream_size )
1217 {
1218 /* Convert the UTF-8 stream bytes into an Unicode character
1219 */
1220 if( libuna_unicode_character_copy_from_utf8(
1221 &unicode_character,
1222 utf8_stream,
1223 utf8_stream_size,
1224 &utf8_stream_index,
1225 error ) != 1 )
1226 {
1227 libcerror_error_set(
1228 error,
1229 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1230 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1231 "%s: unable to copy Unicode character from UTF-8 stream.",
1232 function );
1233
1234 return( -1 );
1235 }
1236 /* Determine how many UTF-32 character bytes are required
1237 */
1238 if( libuna_unicode_character_size_to_utf32(
1239 unicode_character,
1240 utf32_string_size,
1241 error ) != 1 )
1242 {
1243 libcerror_error_set(
1244 error,
1245 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1246 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1247 "%s: unable to unable to determine size of Unicode character in UTF-32.",
1248 function );
1249
1250 return( -1 );
1251 }
1252 }
1253 /* Check if the string is terminated with an end-of-string character
1254 */
1255 if( unicode_character != 0 )
1256 {
1257 *utf32_string_size += 1;
1258 }
1259 return( 1 );
1260 }
1261
1262 /* Copies an UTF-32 string from an UTF-8 stream
1263 * Returns 1 if successful or -1 on error
1264 */
libuna_utf32_string_copy_from_utf8_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint8_t * utf8_stream,size_t utf8_stream_size,libcerror_error_t ** error)1265 int libuna_utf32_string_copy_from_utf8_stream(
1266 libuna_utf32_character_t *utf32_string,
1267 size_t utf32_string_size,
1268 const uint8_t *utf8_stream,
1269 size_t utf8_stream_size,
1270 libcerror_error_t **error )
1271 {
1272 static char *function = "libuna_utf32_string_copy_from_utf8_stream";
1273 size_t utf32_string_index = 0;
1274
1275 if( libuna_utf32_string_with_index_copy_from_utf8_stream(
1276 utf32_string,
1277 utf32_string_size,
1278 &utf32_string_index,
1279 utf8_stream,
1280 utf8_stream_size,
1281 error ) != 1 )
1282 {
1283 libcerror_error_set(
1284 error,
1285 LIBCERROR_ERROR_DOMAIN_RUNTIME,
1286 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1287 "%s: unable to UTF-8 stream to UTF-32 string.",
1288 function );
1289
1290 return( -1 );
1291 }
1292 return( 1 );
1293 }
1294
1295 /* Copies an UTF-32 string from an UTF-8 stream
1296 * Returns 1 if successful or -1 on error
1297 */
libuna_utf32_string_with_index_copy_from_utf8_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,const uint8_t * utf8_stream,size_t utf8_stream_size,libcerror_error_t ** error)1298 int libuna_utf32_string_with_index_copy_from_utf8_stream(
1299 libuna_utf32_character_t *utf32_string,
1300 size_t utf32_string_size,
1301 size_t *utf32_string_index,
1302 const uint8_t *utf8_stream,
1303 size_t utf8_stream_size,
1304 libcerror_error_t **error )
1305 {
1306 static char *function = "libuna_utf32_string_with_index_copy_from_utf8_stream";
1307 size_t utf8_stream_index = 0;
1308 libuna_unicode_character_t unicode_character = 0;
1309
1310 if( utf32_string == NULL )
1311 {
1312 libcerror_error_set(
1313 error,
1314 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1315 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1316 "%s: invalid UTF-32 string.",
1317 function );
1318
1319 return( -1 );
1320 }
1321 if( utf32_string_size > (size_t) SSIZE_MAX )
1322 {
1323 libcerror_error_set(
1324 error,
1325 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1326 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1327 "%s: invalid UTF-32 string size value exceeds maximum.",
1328 function );
1329
1330 return( -1 );
1331 }
1332 if( utf32_string_index == NULL )
1333 {
1334 libcerror_error_set(
1335 error,
1336 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1337 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1338 "%s: invalid UTF-32 string index.",
1339 function );
1340
1341 return( -1 );
1342 }
1343 if( utf8_stream == NULL )
1344 {
1345 libcerror_error_set(
1346 error,
1347 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1348 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1349 "%s: invalid UTF-8 stream.",
1350 function );
1351
1352 return( -1 );
1353 }
1354 if( utf8_stream_size > (size_t) SSIZE_MAX )
1355 {
1356 libcerror_error_set(
1357 error,
1358 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1359 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1360 "%s: invalid UTF-8 stream size value exceeds maximum.",
1361 function );
1362
1363 return( -1 );
1364 }
1365 if( utf8_stream_size == 0 )
1366 {
1367 libcerror_error_set(
1368 error,
1369 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1370 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1371 "%s: missing UTF-8 stream value.",
1372 function );
1373
1374 return( -1 );
1375 }
1376 /* Check if UTF-8 stream starts with a byte order mark (BOM)
1377 */
1378 if( utf8_stream_size >= 3 )
1379 {
1380 if( ( utf8_stream[ 0 ] == 0x0ef )
1381 && ( utf8_stream[ 1 ] == 0x0bb )
1382 && ( utf8_stream[ 2 ] == 0x0bf ) )
1383 {
1384 utf8_stream_index += 3;
1385 }
1386 }
1387 while( utf8_stream_index < utf8_stream_size )
1388 {
1389 /* Convert the UTF-8 stream bytes into an Unicode character
1390 */
1391 if( libuna_unicode_character_copy_from_utf8(
1392 &unicode_character,
1393 utf8_stream,
1394 utf8_stream_size,
1395 &utf8_stream_index,
1396 error ) != 1 )
1397 {
1398 libcerror_error_set(
1399 error,
1400 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1401 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1402 "%s: unable to copy Unicode character from UTF-8 stream.",
1403 function );
1404
1405 return( -1 );
1406 }
1407 /* Convert the Unicode character into UTF-32 character bytes
1408 */
1409 if( libuna_unicode_character_copy_to_utf32(
1410 unicode_character,
1411 utf32_string,
1412 utf32_string_size,
1413 utf32_string_index,
1414 error ) != 1 )
1415 {
1416 libcerror_error_set(
1417 error,
1418 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1419 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1420 "%s: unable to copy Unicode character to UTF-32.",
1421 function );
1422
1423 return( -1 );
1424 }
1425 }
1426 /* Check if the string is terminated with an end-of-string character
1427 */
1428 if( unicode_character != 0 )
1429 {
1430 if( *utf32_string_index >= utf32_string_size )
1431 {
1432 libcerror_error_set(
1433 error,
1434 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1435 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1436 "%s: UTF-32 string too small.",
1437 function );
1438
1439 return( -1 );
1440 }
1441 utf32_string[ *utf32_string_index ] = 0;
1442
1443 *utf32_string_index += 1;
1444 }
1445 return( 1 );
1446 }
1447
1448 /* Compares an UTF-32 string with an UTF-8 stream
1449 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
1450 */
libuna_utf32_string_compare_with_utf8_stream(const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint8_t * utf8_stream,size_t utf8_stream_size,libcerror_error_t ** error)1451 int libuna_utf32_string_compare_with_utf8_stream(
1452 const libuna_utf32_character_t *utf32_string,
1453 size_t utf32_string_size,
1454 const uint8_t *utf8_stream,
1455 size_t utf8_stream_size,
1456 libcerror_error_t **error )
1457 {
1458 static char *function = "libuna_utf32_string_compare_with_utf8_stream";
1459 size_t utf32_string_index = 0;
1460 size_t utf8_stream_index = 0;
1461 libuna_unicode_character_t utf32_unicode_character = 0;
1462 libuna_unicode_character_t utf8_stream_unicode_character = 0;
1463
1464 if( utf32_string == NULL )
1465 {
1466 libcerror_error_set(
1467 error,
1468 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1469 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1470 "%s: invalid UTF-32 string.",
1471 function );
1472
1473 return( -1 );
1474 }
1475 if( utf32_string_size > (size_t) SSIZE_MAX )
1476 {
1477 libcerror_error_set(
1478 error,
1479 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1480 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1481 "%s: invalid UTF-32 string size value exceeds maximum.",
1482 function );
1483
1484 return( -1 );
1485 }
1486 if( utf8_stream == NULL )
1487 {
1488 libcerror_error_set(
1489 error,
1490 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1491 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1492 "%s: invalid UTF-8 stream.",
1493 function );
1494
1495 return( -1 );
1496 }
1497 if( utf8_stream_size > (size_t) SSIZE_MAX )
1498 {
1499 libcerror_error_set(
1500 error,
1501 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1502 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1503 "%s: invalid UTF-8 stream size value exceeds maximum.",
1504 function );
1505
1506 return( -1 );
1507 }
1508 if( utf8_stream_size == 0 )
1509 {
1510 libcerror_error_set(
1511 error,
1512 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1513 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1514 "%s: missing UTF-8 stream value.",
1515 function );
1516
1517 return( -1 );
1518 }
1519 /* Check if UTF-8 stream starts with a byte order mark (BOM)
1520 */
1521 if( utf8_stream_size >= 3 )
1522 {
1523 if( ( utf8_stream[ 0 ] == 0x0ef )
1524 && ( utf8_stream[ 1 ] == 0x0bb )
1525 && ( utf8_stream[ 2 ] == 0x0bf ) )
1526 {
1527 utf8_stream_index += 3;
1528 }
1529 }
1530 if( ( utf32_string_size >= 1 )
1531 && ( utf32_string[ utf32_string_size - 1 ] == 0 ) )
1532 {
1533 utf32_string_size -= 1;
1534 }
1535 /* Check if the UTF-8 stream is terminated with zero bytes
1536 */
1537 if( ( utf8_stream_size >= 1 )
1538 && ( utf8_stream[ utf8_stream_size - 1 ] == 0 ) )
1539 {
1540 utf8_stream_size -= 1;
1541 }
1542 while( ( utf32_string_index < utf32_string_size )
1543 && ( utf8_stream_index < utf8_stream_size ) )
1544 {
1545 /* Convert the UTF-32 character bytes into an Unicode character
1546 */
1547 if( libuna_unicode_character_copy_from_utf32(
1548 &utf32_unicode_character,
1549 utf32_string,
1550 utf32_string_size,
1551 &utf32_string_index,
1552 error ) != 1 )
1553 {
1554 libcerror_error_set(
1555 error,
1556 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1557 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1558 "%s: unable to copy Unicode character from UTF-32.",
1559 function );
1560
1561 return( -1 );
1562 }
1563 /* Convert the UTF-8 character bytes into an Unicode character
1564 */
1565 if( libuna_unicode_character_copy_from_utf8(
1566 &utf8_stream_unicode_character,
1567 utf8_stream,
1568 utf8_stream_size,
1569 &utf8_stream_index,
1570 error ) != 1 )
1571 {
1572 libcerror_error_set(
1573 error,
1574 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1575 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1576 "%s: unable to copy Unicode character from UTF-8 stream.",
1577 function );
1578
1579 return( -1 );
1580 }
1581 if( utf32_unicode_character < utf8_stream_unicode_character )
1582 {
1583 return( LIBUNA_COMPARE_LESS );
1584 }
1585 else if( utf32_unicode_character > utf8_stream_unicode_character )
1586 {
1587 return( LIBUNA_COMPARE_GREATER );
1588 }
1589 }
1590 /* Check if both strings were entirely processed
1591 */
1592 if( utf32_string_index < utf32_string_size )
1593 {
1594 return( LIBUNA_COMPARE_GREATER );
1595 }
1596 else if( utf8_stream_index < utf8_stream_size )
1597 {
1598 return( LIBUNA_COMPARE_LESS );
1599 }
1600 return( LIBUNA_COMPARE_EQUAL );
1601 }
1602
1603 /* Determines the size of an UTF-32 string from an UTF-16 string
1604 * Returns 1 if successful or -1 on error
1605 */
libuna_utf32_string_size_from_utf16(const libuna_utf16_character_t * utf16_string,size_t utf16_string_size,size_t * utf32_string_size,libcerror_error_t ** error)1606 int libuna_utf32_string_size_from_utf16(
1607 const libuna_utf16_character_t *utf16_string,
1608 size_t utf16_string_size,
1609 size_t *utf32_string_size,
1610 libcerror_error_t **error )
1611 {
1612 static char *function = "libuna_utf32_string_size_from_utf16";
1613 size_t utf16_string_index = 0;
1614 libuna_unicode_character_t unicode_character = 0;
1615
1616 if( utf16_string == NULL )
1617 {
1618 libcerror_error_set(
1619 error,
1620 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1621 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1622 "%s: invalid UTF-16 string.",
1623 function );
1624
1625 return( -1 );
1626 }
1627 if( utf16_string_size > (size_t) SSIZE_MAX )
1628 {
1629 libcerror_error_set(
1630 error,
1631 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1632 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1633 "%s: invalid UTF-16 string size value exceeds maximum.",
1634 function );
1635
1636 return( -1 );
1637 }
1638 if( utf32_string_size == NULL )
1639 {
1640 libcerror_error_set(
1641 error,
1642 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1643 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1644 "%s: invalid UTF-32 string size.",
1645 function );
1646
1647 return( -1 );
1648 }
1649 *utf32_string_size = 0;
1650
1651 if( utf16_string_size == 0 )
1652 {
1653 return( 1 );
1654 }
1655 while( utf16_string_index < utf16_string_size )
1656 {
1657 /* Convert the UTF-16 character bytes into an Unicode character
1658 */
1659 if( libuna_unicode_character_copy_from_utf16(
1660 &unicode_character,
1661 utf16_string,
1662 utf16_string_size,
1663 &utf16_string_index,
1664 error ) != 1 )
1665 {
1666 libcerror_error_set(
1667 error,
1668 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1669 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1670 "%s: unable to copy Unicode character from UTF-16.",
1671 function );
1672
1673 return( -1 );
1674 }
1675 /* Determine how many UTF-32 character bytes are required
1676 */
1677 if( libuna_unicode_character_size_to_utf32(
1678 unicode_character,
1679 utf32_string_size,
1680 error ) != 1 )
1681 {
1682 libcerror_error_set(
1683 error,
1684 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1685 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1686 "%s: unable to unable to determine size of Unicode character in UTF-32.",
1687 function );
1688
1689 return( -1 );
1690 }
1691 }
1692 /* Check if the string is terminated with an end-of-string character
1693 */
1694 if( unicode_character != 0 )
1695 {
1696 *utf32_string_size += 1;
1697 }
1698 return( 1 );
1699 }
1700
1701 /* Copies an UTF-32 string from an UTF-16 string
1702 * Returns 1 if successful or -1 on error
1703 */
libuna_utf32_string_copy_from_utf16(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint16_t * utf16_string,size_t utf16_string_size,libcerror_error_t ** error)1704 int libuna_utf32_string_copy_from_utf16(
1705 libuna_utf32_character_t *utf32_string,
1706 size_t utf32_string_size,
1707 const uint16_t *utf16_string,
1708 size_t utf16_string_size,
1709 libcerror_error_t **error )
1710 {
1711 static char *function = "libuna_utf32_string_copy_from_utf16";
1712 size_t utf32_string_index = 0;
1713
1714 if( libuna_utf32_string_with_index_copy_from_utf16(
1715 utf32_string,
1716 utf32_string_size,
1717 &utf32_string_index,
1718 utf16_string,
1719 utf16_string_size,
1720 error ) != 1 )
1721 {
1722 libcerror_error_set(
1723 error,
1724 LIBCERROR_ERROR_DOMAIN_RUNTIME,
1725 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1726 "%s: unable to copy UTF-16 string to UTF-32 string.",
1727 function );
1728
1729 return( -1 );
1730 }
1731 return( 1 );
1732 }
1733
1734 /* Copies an UTF-32 string from an UTF-16 string
1735 * Returns 1 if successful or -1 on error
1736 */
libuna_utf32_string_with_index_copy_from_utf16(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,const uint16_t * utf16_string,size_t utf16_string_size,libcerror_error_t ** error)1737 int libuna_utf32_string_with_index_copy_from_utf16(
1738 libuna_utf32_character_t *utf32_string,
1739 size_t utf32_string_size,
1740 size_t *utf32_string_index,
1741 const uint16_t *utf16_string,
1742 size_t utf16_string_size,
1743 libcerror_error_t **error )
1744 {
1745 static char *function = "libuna_utf32_string_with_index_copy_from_utf16";
1746 size_t utf16_string_index = 0;
1747 libuna_unicode_character_t unicode_character = 0;
1748
1749 if( utf32_string == NULL )
1750 {
1751 libcerror_error_set(
1752 error,
1753 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1754 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1755 "%s: invalid UTF-32 string.",
1756 function );
1757
1758 return( -1 );
1759 }
1760 if( utf32_string_size > (size_t) SSIZE_MAX )
1761 {
1762 libcerror_error_set(
1763 error,
1764 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1765 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1766 "%s: invalid UTF-32 string size value exceeds maximum.",
1767 function );
1768
1769 return( -1 );
1770 }
1771 if( utf32_string_index == NULL )
1772 {
1773 libcerror_error_set(
1774 error,
1775 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1776 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1777 "%s: invalid UTF-32 string index.",
1778 function );
1779
1780 return( -1 );
1781 }
1782 if( utf16_string == NULL )
1783 {
1784 libcerror_error_set(
1785 error,
1786 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1787 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1788 "%s: invalid UTF-16 string.",
1789 function );
1790
1791 return( -1 );
1792 }
1793 if( utf16_string_size > (size_t) SSIZE_MAX )
1794 {
1795 libcerror_error_set(
1796 error,
1797 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1798 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1799 "%s: invalid UTF-16 string size value exceeds maximum.",
1800 function );
1801
1802 return( -1 );
1803 }
1804 if( utf16_string_size == 0 )
1805 {
1806 libcerror_error_set(
1807 error,
1808 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1809 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1810 "%s: missing UTF-16 string value.",
1811 function );
1812
1813 return( -1 );
1814 }
1815 while( utf16_string_index < utf16_string_size )
1816 {
1817 /* Convert the UTF-16 character bytes into an Unicode character
1818 */
1819 if( libuna_unicode_character_copy_from_utf16(
1820 &unicode_character,
1821 utf16_string,
1822 utf16_string_size,
1823 &utf16_string_index,
1824 error ) != 1 )
1825 {
1826 libcerror_error_set(
1827 error,
1828 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1829 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1830 "%s: unable to copy Unicode character from UTF-16.",
1831 function );
1832
1833 return( -1 );
1834 }
1835 /* Convert the Unicode character into UTF-32 character bytes
1836 */
1837 if( libuna_unicode_character_copy_to_utf32(
1838 unicode_character,
1839 utf32_string,
1840 utf32_string_size,
1841 utf32_string_index,
1842 error ) != 1 )
1843 {
1844 libcerror_error_set(
1845 error,
1846 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1847 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1848 "%s: unable to copy Unicode character to UTF-32.",
1849 function );
1850
1851 return( -1 );
1852 }
1853 }
1854 /* Check if the string is terminated with an end-of-string character
1855 */
1856 if( unicode_character != 0 )
1857 {
1858 if( *utf32_string_index >= utf32_string_size )
1859 {
1860 libcerror_error_set(
1861 error,
1862 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1863 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1864 "%s: UTF-32 string too small.",
1865 function );
1866
1867 return( -1 );
1868 }
1869 utf32_string[ *utf32_string_index ] = 0;
1870
1871 *utf32_string_index += 1;
1872 }
1873 return( 1 );
1874 }
1875
1876 /* Determines the size of an UTF-32 string from an UTF-16 stream
1877 * Returns 1 if successful or -1 on error
1878 */
libuna_utf32_string_size_from_utf16_stream(const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,size_t * utf32_string_size,libcerror_error_t ** error)1879 int libuna_utf32_string_size_from_utf16_stream(
1880 const uint8_t *utf16_stream,
1881 size_t utf16_stream_size,
1882 int byte_order,
1883 size_t *utf32_string_size,
1884 libcerror_error_t **error )
1885 {
1886 static char *function = "libuna_utf32_string_size_from_utf16_stream";
1887 size_t utf16_stream_index = 0;
1888 libuna_unicode_character_t unicode_character = 0;
1889 int read_byte_order = 0;
1890
1891 if( utf16_stream == NULL )
1892 {
1893 libcerror_error_set(
1894 error,
1895 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1896 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1897 "%s: invalid UTF-16 stream.",
1898 function );
1899
1900 return( -1 );
1901 }
1902 if( utf16_stream_size > (size_t) SSIZE_MAX )
1903 {
1904 libcerror_error_set(
1905 error,
1906 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1907 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1908 "%s: invalid UTF-16 stream size value exceeds maximum.",
1909 function );
1910
1911 return( -1 );
1912 }
1913 if( ( utf16_stream_size % 2 ) != 0 )
1914 {
1915 libcerror_error_set(
1916 error,
1917 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1918 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1919 "%s: missing UTF-16 stream value.",
1920 function );
1921
1922 return( -1 );
1923 }
1924 if( utf32_string_size == NULL )
1925 {
1926 libcerror_error_set(
1927 error,
1928 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1929 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1930 "%s: invalid UTF-32 string size.",
1931 function );
1932
1933 return( -1 );
1934 }
1935 *utf32_string_size = 0;
1936
1937 if( utf16_stream_size == 0 )
1938 {
1939 return( 1 );
1940 }
1941 /* Check if UTF-16 stream is in big or little endian
1942 */
1943 if( utf16_stream_size >= 2 )
1944 {
1945 if( ( utf16_stream[ 0 ] == 0x0ff )
1946 && ( utf16_stream[ 1 ] == 0x0fe ) )
1947 {
1948 read_byte_order = LIBUNA_ENDIAN_LITTLE;
1949 utf16_stream_index = 2;
1950 }
1951 else if( ( utf16_stream[ 0 ] == 0x0fe )
1952 && ( utf16_stream[ 1 ] == 0x0ff ) )
1953 {
1954 read_byte_order = LIBUNA_ENDIAN_BIG;
1955 utf16_stream_index = 2;
1956 }
1957 if( byte_order == 0 )
1958 {
1959 byte_order = read_byte_order;
1960 }
1961 }
1962 if( ( byte_order != LIBUNA_ENDIAN_BIG )
1963 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
1964 {
1965 libcerror_error_set(
1966 error,
1967 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1968 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
1969 "%s: unsupported byte order.",
1970 function );
1971
1972 return( -1 );
1973 }
1974 while( ( utf16_stream_index + 1 ) < utf16_stream_size )
1975 {
1976 /* Convert the UTF-16 stream bytes into an Unicode character
1977 */
1978 if( libuna_unicode_character_copy_from_utf16_stream(
1979 &unicode_character,
1980 utf16_stream,
1981 utf16_stream_size,
1982 &utf16_stream_index,
1983 byte_order,
1984 error ) != 1 )
1985 {
1986 libcerror_error_set(
1987 error,
1988 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1989 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1990 "%s: unable to copy Unicode character from UTF-16 stream.",
1991 function );
1992
1993 return( -1 );
1994 }
1995 /* Determine how many UTF-8 character bytes are required
1996 */
1997 if( libuna_unicode_character_size_to_utf32(
1998 unicode_character,
1999 utf32_string_size,
2000 error ) != 1 )
2001 {
2002 libcerror_error_set(
2003 error,
2004 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2005 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2006 "%s: unable to unable to determine size of Unicode character in UTF-32.",
2007 function );
2008
2009 return( -1 );
2010 }
2011 }
2012 /* Check if the string is terminated with an end-of-string character
2013 */
2014 if( unicode_character != 0 )
2015 {
2016 *utf32_string_size += 1;
2017 }
2018 return( 1 );
2019 }
2020
2021 /* Copies an UTF-32 string from an UTF-16 stream
2022 * Returns 1 if successful or -1 on error
2023 */
libuna_utf32_string_copy_from_utf16_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,libcerror_error_t ** error)2024 int libuna_utf32_string_copy_from_utf16_stream(
2025 libuna_utf32_character_t *utf32_string,
2026 size_t utf32_string_size,
2027 const uint8_t *utf16_stream,
2028 size_t utf16_stream_size,
2029 int byte_order,
2030 libcerror_error_t **error )
2031 {
2032 static char *function = "libuna_utf32_string_copy_from_utf16_stream";
2033 size_t utf32_string_index = 0;
2034
2035 if( libuna_utf32_string_with_index_copy_from_utf16_stream(
2036 utf32_string,
2037 utf32_string_size,
2038 &utf32_string_index,
2039 utf16_stream,
2040 utf16_stream_size,
2041 byte_order,
2042 error ) != 1 )
2043 {
2044 libcerror_error_set(
2045 error,
2046 LIBCERROR_ERROR_DOMAIN_RUNTIME,
2047 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2048 "%s: unable to copy UTF-16 stream to UTF-32 string.",
2049 function );
2050
2051 return( -1 );
2052 }
2053 return( 1 );
2054 }
2055
2056 /* Copies an UTF-32 string from an UTF-16 stream
2057 * Returns 1 if successful or -1 on error
2058 */
libuna_utf32_string_with_index_copy_from_utf16_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,libcerror_error_t ** error)2059 int libuna_utf32_string_with_index_copy_from_utf16_stream(
2060 libuna_utf32_character_t *utf32_string,
2061 size_t utf32_string_size,
2062 size_t *utf32_string_index,
2063 const uint8_t *utf16_stream,
2064 size_t utf16_stream_size,
2065 int byte_order,
2066 libcerror_error_t **error )
2067 {
2068 static char *function = "libuna_utf32_string_with_index_copy_from_utf16_stream";
2069 size_t utf16_stream_index = 0;
2070 libuna_unicode_character_t unicode_character = 0;
2071 int read_byte_order = 0;
2072
2073 if( utf32_string == NULL )
2074 {
2075 libcerror_error_set(
2076 error,
2077 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2078 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2079 "%s: invalid UTF-32 string.",
2080 function );
2081
2082 return( -1 );
2083 }
2084 if( utf32_string_size > (size_t) SSIZE_MAX )
2085 {
2086 libcerror_error_set(
2087 error,
2088 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2089 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2090 "%s: invalid UTF-32 string size value exceeds maximum.",
2091 function );
2092
2093 return( -1 );
2094 }
2095 if( utf32_string_index == NULL )
2096 {
2097 libcerror_error_set(
2098 error,
2099 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2100 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2101 "%s: invalid UTF-32 string index.",
2102 function );
2103
2104 return( -1 );
2105 }
2106 if( utf16_stream == NULL )
2107 {
2108 libcerror_error_set(
2109 error,
2110 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2111 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2112 "%s: invalid UTF-16 stream.",
2113 function );
2114
2115 return( -1 );
2116 }
2117 if( utf16_stream_size > (size_t) SSIZE_MAX )
2118 {
2119 libcerror_error_set(
2120 error,
2121 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2122 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2123 "%s: invalid UTF-16 stream size value exceeds maximum.",
2124 function );
2125
2126 return( -1 );
2127 }
2128 if( ( utf16_stream_size == 0 )
2129 || ( ( utf16_stream_size % 2 ) != 0 ) )
2130 {
2131 libcerror_error_set(
2132 error,
2133 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2134 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2135 "%s: missing UTF-16 stream value.",
2136 function );
2137
2138 return( -1 );
2139 }
2140 /* Check if UTF-16 stream is in big or little endian
2141 */
2142 if( utf16_stream_size >= 2 )
2143 {
2144 if( ( utf16_stream[ 0 ] == 0x0ff )
2145 && ( utf16_stream[ 1 ] == 0x0fe ) )
2146 {
2147 read_byte_order = LIBUNA_ENDIAN_LITTLE;
2148 utf16_stream_index = 2;
2149 }
2150 else if( ( utf16_stream[ 0 ] == 0x0fe )
2151 && ( utf16_stream[ 1 ] == 0x0ff ) )
2152 {
2153 read_byte_order = LIBUNA_ENDIAN_BIG;
2154 utf16_stream_index = 2;
2155 }
2156 if( byte_order == 0 )
2157 {
2158 byte_order = read_byte_order;
2159 }
2160 }
2161 if( ( byte_order != LIBUNA_ENDIAN_BIG )
2162 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
2163 {
2164 libcerror_error_set(
2165 error,
2166 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2167 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2168 "%s: unsupported byte order.",
2169 function );
2170
2171 return( -1 );
2172 }
2173 while( ( utf16_stream_index + 1 ) < utf16_stream_size )
2174 {
2175 /* Convert the UTF-16 stream bytes into an Unicode character
2176 */
2177 if( libuna_unicode_character_copy_from_utf16_stream(
2178 &unicode_character,
2179 utf16_stream,
2180 utf16_stream_size,
2181 &utf16_stream_index,
2182 byte_order,
2183 error ) != 1 )
2184 {
2185 libcerror_error_set(
2186 error,
2187 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2188 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2189 "%s: unable to copy Unicode character from UTF-16 stream.",
2190 function );
2191
2192 return( -1 );
2193 }
2194 /* Convert the Unicode character into UTF-32 character bytes
2195 */
2196 if( libuna_unicode_character_copy_to_utf32(
2197 unicode_character,
2198 utf32_string,
2199 utf32_string_size,
2200 utf32_string_index,
2201 error ) != 1 )
2202 {
2203 libcerror_error_set(
2204 error,
2205 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2206 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2207 "%s: unable to copy Unicode character to UTF-32.",
2208 function );
2209
2210 return( -1 );
2211 }
2212 }
2213 /* Check if the string is terminated with an end-of-string character
2214 */
2215 if( unicode_character != 0 )
2216 {
2217 if( *utf32_string_index >= utf32_string_size )
2218 {
2219 libcerror_error_set(
2220 error,
2221 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2222 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2223 "%s: UTF-32 string too small.",
2224 function );
2225
2226 return( -1 );
2227 }
2228 utf32_string[ *utf32_string_index ] = 0;
2229
2230 *utf32_string_index += 1;
2231 }
2232 return( 1 );
2233 }
2234
2235 /* Compares an UTF-32 string with an UTF-16 stream
2236 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
2237 */
libuna_utf32_string_compare_with_utf16_stream(const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,libcerror_error_t ** error)2238 int libuna_utf32_string_compare_with_utf16_stream(
2239 const libuna_utf32_character_t *utf32_string,
2240 size_t utf32_string_size,
2241 const uint8_t *utf16_stream,
2242 size_t utf16_stream_size,
2243 int byte_order,
2244 libcerror_error_t **error )
2245 {
2246 static char *function = "libuna_utf32_string_compare_with_utf16_stream";
2247 size_t utf16_stream_index = 0;
2248 size_t utf32_string_index = 0;
2249 libuna_unicode_character_t utf32_unicode_character = 0;
2250 libuna_unicode_character_t utf16_stream_unicode_character = 0;
2251 int read_byte_order = 0;
2252
2253 if( utf32_string == NULL )
2254 {
2255 libcerror_error_set(
2256 error,
2257 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2258 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2259 "%s: invalid UTF-32 string.",
2260 function );
2261
2262 return( -1 );
2263 }
2264 if( utf32_string_size > (size_t) SSIZE_MAX )
2265 {
2266 libcerror_error_set(
2267 error,
2268 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2269 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2270 "%s: invalid UTF-32 string size value exceeds maximum.",
2271 function );
2272
2273 return( -1 );
2274 }
2275 if( utf16_stream == NULL )
2276 {
2277 libcerror_error_set(
2278 error,
2279 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2280 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2281 "%s: invalid UTF-16 stream.",
2282 function );
2283
2284 return( -1 );
2285 }
2286 if( utf16_stream_size > (size_t) SSIZE_MAX )
2287 {
2288 libcerror_error_set(
2289 error,
2290 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2291 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2292 "%s: invalid UTF-16 stream size value exceeds maximum.",
2293 function );
2294
2295 return( -1 );
2296 }
2297 if( ( utf16_stream_size == 0 )
2298 || ( ( utf16_stream_size % 2 ) != 0 ) )
2299 {
2300 libcerror_error_set(
2301 error,
2302 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2303 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2304 "%s: missing UTF-16 stream value.",
2305 function );
2306
2307 return( -1 );
2308 }
2309 /* Check if UTF-16 stream is in big or little endian
2310 */
2311 if( utf16_stream_size >= 2 )
2312 {
2313 if( ( utf16_stream[ 0 ] == 0xfe )
2314 && ( utf16_stream[ 1 ] == 0xff ) )
2315 {
2316 read_byte_order = LIBUNA_ENDIAN_BIG;
2317 utf16_stream_index = 2;
2318 }
2319 else if( ( utf16_stream[ 0 ] == 0xff )
2320 && ( utf16_stream[ 1 ] == 0xfe ) )
2321 {
2322 read_byte_order = LIBUNA_ENDIAN_LITTLE;
2323 utf16_stream_index = 2;
2324 }
2325 if( byte_order == 0 )
2326 {
2327 byte_order = read_byte_order;
2328 }
2329 }
2330 if( ( byte_order != LIBUNA_ENDIAN_BIG )
2331 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
2332 {
2333 libcerror_error_set(
2334 error,
2335 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2336 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2337 "%s: unsupported byte order.",
2338 function );
2339
2340 return( -1 );
2341 }
2342 if( ( utf32_string_size >= 1 )
2343 && ( utf32_string[ utf32_string_size - 1 ] == 0 ) )
2344 {
2345 utf32_string_size -= 1;
2346 }
2347 /* Check if the UTF-16 stream is terminated with zero bytes
2348 */
2349 if( ( utf16_stream_size >= 2 )
2350 && ( utf16_stream[ utf16_stream_size - 2 ] == 0 )
2351 && ( utf16_stream[ utf16_stream_size - 1 ] == 0 ) )
2352 {
2353 utf16_stream_size -= 2;
2354 }
2355 while( ( utf32_string_index < utf32_string_size )
2356 && ( utf16_stream_index < utf16_stream_size ) )
2357 {
2358 /* Convert the UTF-32 character bytes into an Unicode character
2359 */
2360 if( libuna_unicode_character_copy_from_utf32(
2361 &utf32_unicode_character,
2362 utf32_string,
2363 utf32_string_size,
2364 &utf32_string_index,
2365 error ) != 1 )
2366 {
2367 libcerror_error_set(
2368 error,
2369 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2370 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2371 "%s: unable to copy Unicode character from UTF-32.",
2372 function );
2373
2374 return( -1 );
2375 }
2376 /* Convert the UTF-16 stream bytes into an Unicode character
2377 */
2378 if( libuna_unicode_character_copy_from_utf16_stream(
2379 &utf16_stream_unicode_character,
2380 utf16_stream,
2381 utf16_stream_size,
2382 &utf16_stream_index,
2383 byte_order,
2384 error ) != 1 )
2385 {
2386 libcerror_error_set(
2387 error,
2388 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2389 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2390 "%s: unable to copy Unicode character from UTF-16 stream.",
2391 function );
2392
2393 return( -1 );
2394 }
2395 if( utf32_unicode_character < utf16_stream_unicode_character )
2396 {
2397 return( LIBUNA_COMPARE_LESS );
2398 }
2399 else if( utf32_unicode_character > utf16_stream_unicode_character )
2400 {
2401 return( LIBUNA_COMPARE_GREATER );
2402 }
2403 }
2404 /* Check if both strings were entirely processed
2405 */
2406 if( utf32_string_index < utf32_string_size )
2407 {
2408 return( LIBUNA_COMPARE_GREATER );
2409 }
2410 else if( utf16_stream_index < utf16_stream_size )
2411 {
2412 return( LIBUNA_COMPARE_LESS );
2413 }
2414 return( LIBUNA_COMPARE_EQUAL );
2415 }
2416
2417 /* Determines the size of an UTF-32 string from an UTF-32 stream
2418 * Returns 1 if successful or -1 on error
2419 */
libuna_utf32_string_size_from_utf32_stream(const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,size_t * utf32_string_size,libcerror_error_t ** error)2420 int libuna_utf32_string_size_from_utf32_stream(
2421 const uint8_t *utf32_stream,
2422 size_t utf32_stream_size,
2423 int byte_order,
2424 size_t *utf32_string_size,
2425 libcerror_error_t **error )
2426 {
2427 static char *function = "libuna_utf32_string_size_from_utf32_stream";
2428 size_t utf32_stream_index = 0;
2429 libuna_unicode_character_t unicode_character = 0;
2430 int read_byte_order = 0;
2431
2432 if( utf32_stream == NULL )
2433 {
2434 libcerror_error_set(
2435 error,
2436 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2437 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2438 "%s: invalid UTF-32 stream.",
2439 function );
2440
2441 return( -1 );
2442 }
2443 if( utf32_stream_size > (size_t) SSIZE_MAX )
2444 {
2445 libcerror_error_set(
2446 error,
2447 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2448 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2449 "%s: invalid UTF-32 stream size value exceeds maximum.",
2450 function );
2451
2452 return( -1 );
2453 }
2454 if( ( utf32_stream_size % 4 ) != 0 )
2455 {
2456 libcerror_error_set(
2457 error,
2458 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2459 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2460 "%s: missing UTF-32 stream value.",
2461 function );
2462
2463 return( -1 );
2464 }
2465 if( utf32_string_size == NULL )
2466 {
2467 libcerror_error_set(
2468 error,
2469 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2470 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2471 "%s: invalid UTF-32 string size.",
2472 function );
2473
2474 return( -1 );
2475 }
2476 *utf32_string_size = 0;
2477
2478 if( utf32_stream_size == 0 )
2479 {
2480 return( 1 );
2481 }
2482 /* Check if UTF-32 stream is in big or little endian
2483 */
2484 if( utf32_stream_size >= 4 )
2485 {
2486 if( ( utf32_stream[ 0 ] == 0x00 )
2487 && ( utf32_stream[ 1 ] == 0x00 )
2488 && ( utf32_stream[ 2 ] == 0xfe )
2489 && ( utf32_stream[ 3 ] == 0xff ) )
2490 {
2491 read_byte_order = LIBUNA_ENDIAN_BIG;
2492 utf32_stream_index = 4;
2493 }
2494 else if( ( utf32_stream[ 0 ] == 0xff )
2495 && ( utf32_stream[ 1 ] == 0xfe )
2496 && ( utf32_stream[ 2 ] == 0x00 )
2497 && ( utf32_stream[ 3 ] == 0x00 ) )
2498 {
2499 read_byte_order = LIBUNA_ENDIAN_LITTLE;
2500 utf32_stream_index = 4;
2501 }
2502 if( byte_order == 0 )
2503 {
2504 byte_order = read_byte_order;
2505 }
2506 }
2507 if( ( byte_order != LIBUNA_ENDIAN_BIG )
2508 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
2509 {
2510 libcerror_error_set(
2511 error,
2512 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2513 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2514 "%s: unsupported byte order.",
2515 function );
2516
2517 return( -1 );
2518 }
2519 while( ( utf32_stream_index + 1 ) < utf32_stream_size )
2520 {
2521 /* Convert the UTF-32 stream bytes into an Unicode character
2522 */
2523 if( libuna_unicode_character_copy_from_utf32_stream(
2524 &unicode_character,
2525 utf32_stream,
2526 utf32_stream_size,
2527 &utf32_stream_index,
2528 byte_order,
2529 error ) != 1 )
2530 {
2531 libcerror_error_set(
2532 error,
2533 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2534 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2535 "%s: unable to copy Unicode character from UTF-32 stream.",
2536 function );
2537
2538 return( -1 );
2539 }
2540 /* Determine how many UTF-32 character bytes are required
2541 */
2542 if( libuna_unicode_character_size_to_utf32(
2543 unicode_character,
2544 utf32_string_size,
2545 error ) != 1 )
2546 {
2547 libcerror_error_set(
2548 error,
2549 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2550 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2551 "%s: unable to unable to determine size of Unicode character in UTF-32.",
2552 function );
2553
2554 return( -1 );
2555 }
2556 }
2557 /* Check if the string is terminated with an end-of-string character
2558 */
2559 if( unicode_character != 0 )
2560 {
2561 *utf32_string_size += 1;
2562 }
2563 return( 1 );
2564 }
2565
2566 /* Copies an UTF-32 string from an UTF-32 stream
2567 * Returns 1 if successful or -1 on error
2568 */
libuna_utf32_string_copy_from_utf32_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,libcerror_error_t ** error)2569 int libuna_utf32_string_copy_from_utf32_stream(
2570 libuna_utf32_character_t *utf32_string,
2571 size_t utf32_string_size,
2572 const uint8_t *utf32_stream,
2573 size_t utf32_stream_size,
2574 int byte_order,
2575 libcerror_error_t **error )
2576 {
2577 static char *function = "libuna_utf32_string_copy_from_utf32_stream";
2578 size_t utf32_string_index = 0;
2579
2580 if( libuna_utf32_string_with_index_copy_from_utf32_stream(
2581 utf32_string,
2582 utf32_string_size,
2583 &utf32_string_index,
2584 utf32_stream,
2585 utf32_stream_size,
2586 byte_order,
2587 error ) != 1 )
2588 {
2589 libcerror_error_set(
2590 error,
2591 LIBCERROR_ERROR_DOMAIN_RUNTIME,
2592 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2593 "%s: unable to copy UTF-32 stream to UTF-32 string.",
2594 function );
2595
2596 return( -1 );
2597 }
2598 return( 1 );
2599 }
2600
2601 /* Copies an UTF-32 string from an UTF-32 stream
2602 * Returns 1 if successful or -1 on error
2603 */
libuna_utf32_string_with_index_copy_from_utf32_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,libcerror_error_t ** error)2604 int libuna_utf32_string_with_index_copy_from_utf32_stream(
2605 libuna_utf32_character_t *utf32_string,
2606 size_t utf32_string_size,
2607 size_t *utf32_string_index,
2608 const uint8_t *utf32_stream,
2609 size_t utf32_stream_size,
2610 int byte_order,
2611 libcerror_error_t **error )
2612 {
2613 static char *function = "libuna_utf32_string_with_index_copy_from_utf32_stream";
2614 size_t utf32_stream_index = 0;
2615 libuna_unicode_character_t unicode_character = 0;
2616 int read_byte_order = 0;
2617
2618 if( utf32_string == NULL )
2619 {
2620 libcerror_error_set(
2621 error,
2622 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2623 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2624 "%s: invalid UTF-32 string.",
2625 function );
2626
2627 return( -1 );
2628 }
2629 if( utf32_string_size > (size_t) SSIZE_MAX )
2630 {
2631 libcerror_error_set(
2632 error,
2633 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2634 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2635 "%s: invalid UTF-32 string size value exceeds maximum.",
2636 function );
2637
2638 return( -1 );
2639 }
2640 if( utf32_string_index == NULL )
2641 {
2642 libcerror_error_set(
2643 error,
2644 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2645 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2646 "%s: invalid UTF-32 string index.",
2647 function );
2648
2649 return( -1 );
2650 }
2651 if( utf32_stream == NULL )
2652 {
2653 libcerror_error_set(
2654 error,
2655 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2656 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2657 "%s: invalid UTF-32 stream.",
2658 function );
2659
2660 return( -1 );
2661 }
2662 if( utf32_stream_size > (size_t) SSIZE_MAX )
2663 {
2664 libcerror_error_set(
2665 error,
2666 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2667 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2668 "%s: invalid UTF-32 stream size value exceeds maximum.",
2669 function );
2670
2671 return( -1 );
2672 }
2673 if( ( utf32_stream_size == 0 )
2674 || ( ( utf32_stream_size % 4 ) != 0 ) )
2675 {
2676 libcerror_error_set(
2677 error,
2678 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2679 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2680 "%s: missing UTF-32 stream value.",
2681 function );
2682
2683 return( -1 );
2684 }
2685 /* Check if UTF-32 stream is in big or little endian
2686 */
2687 if( utf32_stream_size >= 4 )
2688 {
2689 if( ( utf32_stream[ 0 ] == 0x00 )
2690 && ( utf32_stream[ 1 ] == 0x00 )
2691 && ( utf32_stream[ 2 ] == 0xfe )
2692 && ( utf32_stream[ 3 ] == 0xff ) )
2693 {
2694 read_byte_order = LIBUNA_ENDIAN_BIG;
2695 utf32_stream_index = 4;
2696 }
2697 else if( ( utf32_stream[ 0 ] == 0xff )
2698 && ( utf32_stream[ 1 ] == 0xfe )
2699 && ( utf32_stream[ 2 ] == 0x00 )
2700 && ( utf32_stream[ 3 ] == 0x00 ) )
2701 {
2702 read_byte_order = LIBUNA_ENDIAN_LITTLE;
2703 utf32_stream_index = 4;
2704 }
2705 if( byte_order == 0 )
2706 {
2707 byte_order = read_byte_order;
2708 }
2709 }
2710 if( ( byte_order != LIBUNA_ENDIAN_BIG )
2711 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
2712 {
2713 libcerror_error_set(
2714 error,
2715 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2716 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2717 "%s: unsupported byte order.",
2718 function );
2719
2720 return( -1 );
2721 }
2722 while( ( utf32_stream_index + 1 ) < utf32_stream_size )
2723 {
2724 /* Convert the UTF-32 stream bytes into an Unicode character
2725 */
2726 if( libuna_unicode_character_copy_from_utf32_stream(
2727 &unicode_character,
2728 utf32_stream,
2729 utf32_stream_size,
2730 &utf32_stream_index,
2731 byte_order,
2732 error ) != 1 )
2733 {
2734 libcerror_error_set(
2735 error,
2736 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2737 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2738 "%s: unable to copy Unicode character from UTF-32 stream.",
2739 function );
2740
2741 return( -1 );
2742 }
2743 /* Convert the Unicode character into UTF-32 character bytes
2744 */
2745 if( libuna_unicode_character_copy_to_utf32(
2746 unicode_character,
2747 utf32_string,
2748 utf32_string_size,
2749 utf32_string_index,
2750 error ) != 1 )
2751 {
2752 libcerror_error_set(
2753 error,
2754 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2755 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2756 "%s: unable to copy Unicode character to UTF-32.",
2757 function );
2758
2759 return( -1 );
2760 }
2761 }
2762 /* Check if the string is terminated with an end-of-string character
2763 */
2764 if( unicode_character != 0 )
2765 {
2766 if( *utf32_string_index >= utf32_string_size )
2767 {
2768 libcerror_error_set(
2769 error,
2770 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2771 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2772 "%s: UTF-32 string too small.",
2773 function );
2774
2775 return( -1 );
2776 }
2777 utf32_string[ *utf32_string_index ] = 0;
2778
2779 *utf32_string_index += 1;
2780 }
2781 return( 1 );
2782 }
2783
2784 /* Compares an UTF-32 string with an UTF-32 stream
2785 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
2786 */
libuna_utf32_string_compare_with_utf32_stream(const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,libcerror_error_t ** error)2787 int libuna_utf32_string_compare_with_utf32_stream(
2788 const libuna_utf32_character_t *utf32_string,
2789 size_t utf32_string_size,
2790 const uint8_t *utf32_stream,
2791 size_t utf32_stream_size,
2792 int byte_order,
2793 libcerror_error_t **error )
2794 {
2795 static char *function = "libuna_utf32_string_compare_with_utf32_stream";
2796 size_t utf32_stream_index = 0;
2797 size_t utf32_string_index = 0;
2798 libuna_unicode_character_t utf32_unicode_character = 0;
2799 libuna_unicode_character_t utf32_stream_unicode_character = 0;
2800 int read_byte_order = 0;
2801
2802 if( utf32_string == NULL )
2803 {
2804 libcerror_error_set(
2805 error,
2806 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2807 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2808 "%s: invalid UTF-32 string.",
2809 function );
2810
2811 return( -1 );
2812 }
2813 if( utf32_string_size > (size_t) SSIZE_MAX )
2814 {
2815 libcerror_error_set(
2816 error,
2817 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2818 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2819 "%s: invalid UTF-32 string size value exceeds maximum.",
2820 function );
2821
2822 return( -1 );
2823 }
2824 if( utf32_stream == NULL )
2825 {
2826 libcerror_error_set(
2827 error,
2828 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2829 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2830 "%s: invalid UTF-32 stream.",
2831 function );
2832
2833 return( -1 );
2834 }
2835 if( utf32_stream_size > (size_t) SSIZE_MAX )
2836 {
2837 libcerror_error_set(
2838 error,
2839 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2840 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2841 "%s: invalid UTF-32 stream size value exceeds maximum.",
2842 function );
2843
2844 return( -1 );
2845 }
2846 if( ( utf32_stream_size == 0 )
2847 || ( ( utf32_stream_size % 4 ) != 0 ) )
2848 {
2849 libcerror_error_set(
2850 error,
2851 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2852 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2853 "%s: missing UTF-32 stream value.",
2854 function );
2855
2856 return( -1 );
2857 }
2858 /* Check if UTF-32 stream is in big or little endian
2859 */
2860 if( utf32_stream_size >= 4 )
2861 {
2862 if( ( utf32_stream[ 0 ] == 0x00 )
2863 && ( utf32_stream[ 1 ] == 0x00 )
2864 && ( utf32_stream[ 2 ] == 0xfe )
2865 && ( utf32_stream[ 3 ] == 0xff ) )
2866 {
2867 read_byte_order = LIBUNA_ENDIAN_BIG;
2868 utf32_stream_index = 4;
2869 }
2870 else if( ( utf32_stream[ 0 ] == 0xff )
2871 && ( utf32_stream[ 1 ] == 0xfe )
2872 && ( utf32_stream[ 2 ] == 0x00 )
2873 && ( utf32_stream[ 3 ] == 0x00 ) )
2874 {
2875 read_byte_order = LIBUNA_ENDIAN_LITTLE;
2876 utf32_stream_index = 4;
2877 }
2878 if( byte_order == 0 )
2879 {
2880 byte_order = read_byte_order;
2881 }
2882 }
2883 if( ( byte_order != LIBUNA_ENDIAN_BIG )
2884 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
2885 {
2886 libcerror_error_set(
2887 error,
2888 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2889 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2890 "%s: unsupported byte order.",
2891 function );
2892
2893 return( -1 );
2894 }
2895 if( ( utf32_string_size >= 1 )
2896 && ( utf32_string[ utf32_string_size - 1 ] == 0 ) )
2897 {
2898 utf32_string_size -= 1;
2899 }
2900 /* Check if the UTF-32 stream is terminated with zero bytes
2901 */
2902 if( ( utf32_stream_size >= 4 )
2903 && ( utf32_stream[ utf32_stream_size - 4 ] == 0 )
2904 && ( utf32_stream[ utf32_stream_size - 3 ] == 0 )
2905 && ( utf32_stream[ utf32_stream_size - 2 ] == 0 )
2906 && ( utf32_stream[ utf32_stream_size - 1 ] == 0 ) )
2907 {
2908 utf32_stream_size -= 1;
2909 }
2910 while( ( utf32_string_index < utf32_string_size )
2911 && ( utf32_stream_index < utf32_stream_size ) )
2912 {
2913 /* Convert the UTF-32 character bytes into an Unicode character
2914 */
2915 if( libuna_unicode_character_copy_from_utf32(
2916 &utf32_unicode_character,
2917 utf32_string,
2918 utf32_string_size,
2919 &utf32_string_index,
2920 error ) != 1 )
2921 {
2922 libcerror_error_set(
2923 error,
2924 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2925 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2926 "%s: unable to copy Unicode character from UTF-32.",
2927 function );
2928
2929 return( -1 );
2930 }
2931 /* Convert the UTF-32 stream bytes into an Unicode character
2932 */
2933 if( libuna_unicode_character_copy_from_utf32_stream(
2934 &utf32_stream_unicode_character,
2935 utf32_stream,
2936 utf32_stream_size,
2937 &utf32_stream_index,
2938 byte_order,
2939 error ) != 1 )
2940 {
2941 libcerror_error_set(
2942 error,
2943 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2944 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2945 "%s: unable to copy Unicode character from UTF-32 stream.",
2946 function );
2947
2948 return( -1 );
2949 }
2950 if( utf32_unicode_character < utf32_stream_unicode_character )
2951 {
2952 return( LIBUNA_COMPARE_LESS );
2953 }
2954 else if( utf32_unicode_character > utf32_stream_unicode_character )
2955 {
2956 return( LIBUNA_COMPARE_GREATER );
2957 }
2958 }
2959 /* Check if both strings were entirely processed
2960 */
2961 if( utf32_string_index < utf32_string_size )
2962 {
2963 return( LIBUNA_COMPARE_GREATER );
2964 }
2965 else if( utf32_stream_index < utf32_stream_size )
2966 {
2967 return( LIBUNA_COMPARE_LESS );
2968 }
2969 return( LIBUNA_COMPARE_EQUAL );
2970 }
2971
2972 /* Determines the size of an UTF-32 string from a Standard Compression Scheme for Unicode (SCSU) stream
2973 * Returns 1 if successful or -1 on error
2974 */
libuna_utf32_string_size_from_scsu_stream(const uint8_t * scsu_stream,size_t scsu_stream_size,size_t * utf32_string_size,libcerror_error_t ** error)2975 int libuna_utf32_string_size_from_scsu_stream(
2976 const uint8_t *scsu_stream,
2977 size_t scsu_stream_size,
2978 size_t *utf32_string_size,
2979 libcerror_error_t **error )
2980 {
2981 uint32_t scsu_dynamic_window_positions[ 8 ] = {
2982 0x0080, 0x00c0, 0x0400, 0x0600, 0x0900, 0x3040, 0x30a0, 0xff00 };
2983
2984 static char *function = "libuna_utf8_string_size_from_scsu_stream";
2985 libuna_unicode_character_t unicode_character = 0;
2986 size_t scsu_stream_index = 0;
2987 uint32_t scsu_window_position = 0;
2988 uint8_t byte_value1 = 0;
2989 uint8_t byte_value2 = 0;
2990 uint8_t byte_value3 = 0;
2991 uint8_t dynamic_window_position_index = 0;
2992 uint8_t in_unicode_mode = 0;
2993 uint8_t unicode_character_set = 0;
2994
2995 if( scsu_stream == NULL )
2996 {
2997 libcerror_error_set(
2998 error,
2999 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3000 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3001 "%s: invalid SCSU stream.",
3002 function );
3003
3004 return( -1 );
3005 }
3006 if( scsu_stream_size > (size_t) SSIZE_MAX )
3007 {
3008 libcerror_error_set(
3009 error,
3010 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3011 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3012 "%s: invalid SCSU stream size value exceeds maximum.",
3013 function );
3014
3015 return( -1 );
3016 }
3017 if( utf32_string_size == NULL )
3018 {
3019 libcerror_error_set(
3020 error,
3021 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3022 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3023 "%s: invalid UTF-32 string size.",
3024 function );
3025
3026 return( -1 );
3027 }
3028 *utf32_string_size = 0;
3029
3030 if( scsu_stream_size == 0 )
3031 {
3032 return( 1 );
3033 }
3034 while( scsu_stream_index < scsu_stream_size )
3035 {
3036 unicode_character_set = 0;
3037
3038 if( scsu_stream_index >= scsu_stream_size )
3039 {
3040 libcerror_error_set(
3041 error,
3042 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3043 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3044 "%s: SCSU stream too small.",
3045 function );
3046
3047 return( -1 );
3048 }
3049 byte_value1 = scsu_stream[ scsu_stream_index++ ];
3050
3051 if( in_unicode_mode != 0 )
3052 {
3053 if( ( byte_value1 <= 0xdf )
3054 || ( byte_value1 >= 0xf3 ) )
3055 {
3056 if( scsu_stream_index >= scsu_stream_size )
3057 {
3058 libcerror_error_set(
3059 error,
3060 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3061 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3062 "%s: SCSU stream too small.",
3063 function );
3064
3065 return( -1 );
3066 }
3067 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3068
3069 unicode_character = byte_value1;
3070 unicode_character <<= 8;
3071 unicode_character |= byte_value2;
3072
3073 unicode_character_set = 1;
3074 }
3075 /* UCn tags
3076 */
3077 else if( ( byte_value1 >= 0xe0 )
3078 && ( byte_value1 <= 0xe7 ) )
3079 {
3080 dynamic_window_position_index = byte_value1 - 0xe0;
3081
3082 in_unicode_mode = 0;
3083 }
3084 /* UDn tags
3085 */
3086 else if( ( byte_value1 >= 0xe8 )
3087 && ( byte_value1 <= 0xef ) )
3088 {
3089 if( scsu_stream_index >= scsu_stream_size )
3090 {
3091 libcerror_error_set(
3092 error,
3093 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3094 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3095 "%s: SCSU stream too small.",
3096 function );
3097
3098 return( -1 );
3099 }
3100 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3101
3102 dynamic_window_position_index = byte_value1 - 0xe8;
3103 scsu_window_position = libuna_scsu_window_offset_table[ byte_value2 ];
3104
3105 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3106
3107 in_unicode_mode = 0;
3108 }
3109 /* UQU tag
3110 */
3111 else if( byte_value1 == 0xf0 )
3112 {
3113 if( ( scsu_stream_size < 2 )
3114 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3115 {
3116 libcerror_error_set(
3117 error,
3118 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3119 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3120 "%s: SCSU stream too small.",
3121 function );
3122
3123 return( -1 );
3124 }
3125 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3126 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3127
3128 unicode_character = byte_value2;
3129 unicode_character <<= 8;
3130 unicode_character |= byte_value3;
3131
3132 unicode_character_set = 1;
3133 }
3134 /* UDX tag
3135 */
3136 else if( byte_value1 == 0xf1 )
3137 {
3138 if( ( scsu_stream_size < 2 )
3139 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3140 {
3141 libcerror_error_set(
3142 error,
3143 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3144 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3145 "%s: SCSU stream too small.",
3146 function );
3147
3148 return( -1 );
3149 }
3150 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3151 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3152
3153 dynamic_window_position_index = byte_value2 >> 5;
3154 scsu_window_position = byte_value2 & 0x1f;
3155 scsu_window_position <<= 8;
3156 scsu_window_position |= byte_value3;
3157 scsu_window_position <<= 7;
3158 scsu_window_position += 0x00010000UL;
3159
3160 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3161
3162 in_unicode_mode = 0;
3163 }
3164 }
3165 else
3166 {
3167 if( ( byte_value1 == 0x00 )
3168 || ( byte_value1 == 0x09 )
3169 || ( byte_value1 == 0x0a )
3170 || ( byte_value1 == 0x0c )
3171 || ( byte_value1 == 0x0d )
3172 || ( ( byte_value1 >= 0x20 )
3173 && ( byte_value1 <= 0x7f ) ) )
3174 {
3175 unicode_character = byte_value1;
3176
3177 unicode_character_set = 1;
3178 }
3179 /* SQn tags
3180 */
3181 else if( ( byte_value1 >= 0x01 )
3182 && ( byte_value1 <= 0x08 ) )
3183 {
3184 if( scsu_stream_index >= scsu_stream_size )
3185 {
3186 libcerror_error_set(
3187 error,
3188 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3189 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3190 "%s: SCSU stream too small.",
3191 function );
3192
3193 return( -1 );
3194 }
3195 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3196
3197 unicode_character = byte_value2;
3198
3199 if( byte_value2 < 0x80 )
3200 {
3201 unicode_character += libuna_scsu_static_window_positions[ byte_value1 - 0x01 ];
3202 }
3203 else
3204 {
3205 unicode_character -= 0x80;
3206 unicode_character += scsu_dynamic_window_positions[ byte_value1 - 0x01 ];
3207 }
3208 unicode_character_set = 1;
3209 }
3210 /* SDX tag
3211 */
3212 else if( byte_value1 == 0x0b )
3213 {
3214 if( ( scsu_stream_size < 2 )
3215 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3216 {
3217 libcerror_error_set(
3218 error,
3219 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3220 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3221 "%s: SCSU stream too small.",
3222 function );
3223
3224 return( -1 );
3225 }
3226 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3227 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3228
3229 dynamic_window_position_index = byte_value2 >> 5;
3230 scsu_window_position = byte_value2 & 0x1f;
3231 scsu_window_position <<= 8;
3232 scsu_window_position |= byte_value3;
3233 scsu_window_position <<= 7;
3234 scsu_window_position += 0x00010000UL;
3235
3236 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3237 }
3238 /* SQU tag
3239 */
3240 else if( byte_value1 == 0x0e )
3241 {
3242 if( ( scsu_stream_size < 2 )
3243 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3244 {
3245 libcerror_error_set(
3246 error,
3247 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3248 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3249 "%s: SCSU stream too small.",
3250 function );
3251
3252 return( -1 );
3253 }
3254 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3255 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3256
3257 unicode_character = byte_value2;
3258 unicode_character <<= 8;
3259 unicode_character |= byte_value3;
3260
3261 unicode_character_set = 1;
3262 }
3263 /* SCU tag
3264 */
3265 else if( byte_value1 == 0x0f )
3266 {
3267 in_unicode_mode = 1;
3268 }
3269 /* SCn tags
3270 */
3271 else if( ( byte_value1 >= 0x10 )
3272 && ( byte_value1 <= 0x17 ) )
3273 {
3274 dynamic_window_position_index = byte_value1 - 0x10;
3275 }
3276 /* SDn tags
3277 */
3278 else if( ( byte_value1 >= 0x18 )
3279 && ( byte_value1 <= 0x1f ) )
3280 {
3281 if( scsu_stream_index >= scsu_stream_size )
3282 {
3283 libcerror_error_set(
3284 error,
3285 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3286 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3287 "%s: SCSU stream too small.",
3288 function );
3289
3290 return( -1 );
3291 }
3292 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3293
3294 dynamic_window_position_index = byte_value1 - 0x18;
3295 scsu_window_position = libuna_scsu_window_offset_table[ byte_value2 ];
3296
3297 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3298 }
3299 else if( byte_value1 >= 0x80 )
3300 {
3301 unicode_character = byte_value1 - 0x80;
3302 unicode_character += scsu_dynamic_window_positions[ dynamic_window_position_index ];
3303
3304 unicode_character_set = 1;
3305 }
3306 }
3307 if( unicode_character_set != 0 )
3308 {
3309 /* Determine how many UTF-32 character bytes are required
3310 */
3311 if( libuna_unicode_character_size_to_utf32(
3312 unicode_character,
3313 utf32_string_size,
3314 error ) != 1 )
3315 {
3316 libcerror_error_set(
3317 error,
3318 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3319 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
3320 "%s: unable to unable to determine size of Unicode character in UTF-32.",
3321 function );
3322
3323 return( -1 );
3324 }
3325 if( unicode_character == 0 )
3326 {
3327 break;
3328 }
3329 }
3330 }
3331 /* Check if the string is terminated with an end-of-string character
3332 */
3333 if( unicode_character != 0 )
3334 {
3335 *utf32_string_size += 1;
3336 }
3337 return( 1 );
3338 }
3339
3340 /* Copies an UTF-32 string from a Standard Compression Scheme for Unicode (SCSU) stream
3341 * Returns 1 if successful or -1 on error
3342 */
libuna_utf32_string_copy_from_scsu_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,const uint8_t * scsu_stream,size_t scsu_stream_size,libcerror_error_t ** error)3343 int libuna_utf32_string_copy_from_scsu_stream(
3344 libuna_utf32_character_t *utf32_string,
3345 size_t utf32_string_size,
3346 const uint8_t *scsu_stream,
3347 size_t scsu_stream_size,
3348 libcerror_error_t **error )
3349 {
3350 static char *function = "libuna_utf32_string_copy_from_scsu_stream";
3351 size_t utf32_string_index = 0;
3352
3353 if( libuna_utf32_string_with_index_copy_from_scsu_stream(
3354 utf32_string,
3355 utf32_string_size,
3356 &utf32_string_index,
3357 scsu_stream,
3358 scsu_stream_size,
3359 error ) != 1 )
3360 {
3361 libcerror_error_set(
3362 error,
3363 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3364 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
3365 "%s: unable to SCSU stream to UTF-32 string.",
3366 function );
3367
3368 return( -1 );
3369 }
3370 return( 1 );
3371 }
3372
3373 /* Copies an UTF-32 string from a Standard Compression Scheme for Unicode (SCSU) stream
3374 * Returns 1 if successful or -1 on error
3375 */
libuna_utf32_string_with_index_copy_from_scsu_stream(libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,const uint8_t * scsu_stream,size_t scsu_stream_size,libcerror_error_t ** error)3376 int libuna_utf32_string_with_index_copy_from_scsu_stream(
3377 libuna_utf32_character_t *utf32_string,
3378 size_t utf32_string_size,
3379 size_t *utf32_string_index,
3380 const uint8_t *scsu_stream,
3381 size_t scsu_stream_size,
3382 libcerror_error_t **error )
3383 {
3384 uint32_t scsu_dynamic_window_positions[ 8 ] = {
3385 0x0080, 0x00c0, 0x0400, 0x0600, 0x0900, 0x3040, 0x30a0, 0xff00 };
3386
3387 static char *function = "libuna_utf32_string_with_index_copy_from_scsu_stream";
3388 libuna_unicode_character_t unicode_character = 0;
3389 size_t scsu_stream_index = 0;
3390 uint32_t scsu_window_position = 0;
3391 uint8_t byte_value1 = 0;
3392 uint8_t byte_value2 = 0;
3393 uint8_t byte_value3 = 0;
3394 uint8_t dynamic_window_position_index = 0;
3395 uint8_t in_unicode_mode = 0;
3396 uint8_t unicode_character_set = 0;
3397
3398 if( utf32_string == NULL )
3399 {
3400 libcerror_error_set(
3401 error,
3402 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3403 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3404 "%s: invalid UTF-32 string.",
3405 function );
3406
3407 return( -1 );
3408 }
3409 if( utf32_string_size > (size_t) SSIZE_MAX )
3410 {
3411 libcerror_error_set(
3412 error,
3413 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3414 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3415 "%s: invalid UTF-32 string size value exceeds maximum.",
3416 function );
3417
3418 return( -1 );
3419 }
3420 if( utf32_string_index == NULL )
3421 {
3422 libcerror_error_set(
3423 error,
3424 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3425 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3426 "%s: invalid UTF-32 string index.",
3427 function );
3428
3429 return( -1 );
3430 }
3431 if( scsu_stream == NULL )
3432 {
3433 libcerror_error_set(
3434 error,
3435 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3436 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3437 "%s: invalid SCSU stream.",
3438 function );
3439
3440 return( -1 );
3441 }
3442 if( scsu_stream_size > (size_t) SSIZE_MAX )
3443 {
3444 libcerror_error_set(
3445 error,
3446 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3447 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3448 "%s: invalid SCSU stream size value exceeds maximum.",
3449 function );
3450
3451 return( -1 );
3452 }
3453 if( scsu_stream_size == 0 )
3454 {
3455 libcerror_error_set(
3456 error,
3457 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3458 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
3459 "%s: missing SCSU stream value.",
3460 function );
3461
3462 return( -1 );
3463 }
3464 while( scsu_stream_index < scsu_stream_size )
3465 {
3466 unicode_character_set = 0;
3467
3468 if( scsu_stream_index >= scsu_stream_size )
3469 {
3470 libcerror_error_set(
3471 error,
3472 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3473 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3474 "%s: SCSU stream too small.",
3475 function );
3476
3477 return( -1 );
3478 }
3479 byte_value1 = scsu_stream[ scsu_stream_index++ ];
3480
3481 if( in_unicode_mode != 0 )
3482 {
3483 if( ( byte_value1 <= 0xdf )
3484 || ( byte_value1 >= 0xf3 ) )
3485 {
3486 if( scsu_stream_index >= scsu_stream_size )
3487 {
3488 libcerror_error_set(
3489 error,
3490 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3491 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3492 "%s: SCSU stream too small.",
3493 function );
3494
3495 return( -1 );
3496 }
3497 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3498
3499 unicode_character = byte_value1;
3500 unicode_character <<= 8;
3501 unicode_character |= byte_value2;
3502
3503 unicode_character_set = 1;
3504 }
3505 /* UCn tags
3506 */
3507 else if( ( byte_value1 >= 0xe0 )
3508 && ( byte_value1 <= 0xe7 ) )
3509 {
3510 dynamic_window_position_index = byte_value1 - 0xe0;
3511
3512 in_unicode_mode = 0;
3513 }
3514 /* UDn tags
3515 */
3516 else if( ( byte_value1 >= 0xe8 )
3517 && ( byte_value1 <= 0xef ) )
3518 {
3519 if( scsu_stream_index >= scsu_stream_size )
3520 {
3521 libcerror_error_set(
3522 error,
3523 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3524 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3525 "%s: SCSU stream too small.",
3526 function );
3527
3528 return( -1 );
3529 }
3530 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3531
3532 dynamic_window_position_index = byte_value1 - 0xe8;
3533 scsu_window_position = libuna_scsu_window_offset_table[ byte_value2 ];
3534
3535 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3536
3537 in_unicode_mode = 0;
3538 }
3539 /* UQU tag
3540 */
3541 else if( byte_value1 == 0xf0 )
3542 {
3543 if( ( scsu_stream_size < 2 )
3544 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3545 {
3546 libcerror_error_set(
3547 error,
3548 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3549 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3550 "%s: SCSU stream too small.",
3551 function );
3552
3553 return( -1 );
3554 }
3555 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3556 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3557
3558 unicode_character = byte_value2;
3559 unicode_character <<= 8;
3560 unicode_character |= byte_value3;
3561
3562 unicode_character_set = 1;
3563 }
3564 /* UDX tag
3565 */
3566 else if( byte_value1 == 0xf1 )
3567 {
3568 if( ( scsu_stream_size < 2 )
3569 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3570 {
3571 libcerror_error_set(
3572 error,
3573 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3574 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3575 "%s: SCSU stream too small.",
3576 function );
3577
3578 return( -1 );
3579 }
3580 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3581 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3582
3583 dynamic_window_position_index = byte_value2 >> 5;
3584 scsu_window_position = byte_value2 & 0x1f;
3585 scsu_window_position <<= 8;
3586 scsu_window_position |= byte_value3;
3587 scsu_window_position <<= 7;
3588 scsu_window_position += 0x00010000UL;
3589
3590 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3591
3592 in_unicode_mode = 0;
3593 }
3594 }
3595 else
3596 {
3597 if( ( byte_value1 == 0x00 )
3598 || ( byte_value1 == 0x09 )
3599 || ( byte_value1 == 0x0a )
3600 || ( byte_value1 == 0x0c )
3601 || ( byte_value1 == 0x0d )
3602 || ( ( byte_value1 >= 0x20 )
3603 && ( byte_value1 <= 0x7f ) ) )
3604 {
3605 unicode_character = byte_value1;
3606
3607 unicode_character_set = 1;
3608 }
3609 /* SQn tags
3610 */
3611 else if( ( byte_value1 >= 0x01 )
3612 && ( byte_value1 <= 0x08 ) )
3613 {
3614 if( scsu_stream_index >= scsu_stream_size )
3615 {
3616 libcerror_error_set(
3617 error,
3618 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3619 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3620 "%s: SCSU stream too small.",
3621 function );
3622
3623 return( -1 );
3624 }
3625 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3626
3627 unicode_character = byte_value2;
3628
3629 if( byte_value2 < 0x80 )
3630 {
3631 unicode_character += libuna_scsu_static_window_positions[ byte_value1 - 0x01 ];
3632 }
3633 else
3634 {
3635 unicode_character -= 0x80;
3636 unicode_character += scsu_dynamic_window_positions[ byte_value1 - 0x01 ];
3637 }
3638 unicode_character_set = 1;
3639 }
3640 /* SDX tag
3641 */
3642 else if( byte_value1 == 0x0b )
3643 {
3644 if( ( scsu_stream_size < 2 )
3645 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3646 {
3647 libcerror_error_set(
3648 error,
3649 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3650 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3651 "%s: SCSU stream too small.",
3652 function );
3653
3654 return( -1 );
3655 }
3656 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3657 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3658
3659 dynamic_window_position_index = byte_value2 >> 5;
3660 scsu_window_position = byte_value2 & 0x1f;
3661 scsu_window_position <<= 8;
3662 scsu_window_position |= byte_value3;
3663 scsu_window_position <<= 7;
3664 scsu_window_position += 0x00010000UL;
3665
3666 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3667 }
3668 /* SQU tag
3669 */
3670 else if( byte_value1 == 0x0e )
3671 {
3672 if( ( scsu_stream_size < 2 )
3673 || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3674 {
3675 libcerror_error_set(
3676 error,
3677 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3678 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3679 "%s: SCSU stream too small.",
3680 function );
3681
3682 return( -1 );
3683 }
3684 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3685 byte_value3 = scsu_stream[ scsu_stream_index++ ];
3686
3687 unicode_character = byte_value2;
3688 unicode_character <<= 8;
3689 unicode_character |= byte_value3;
3690
3691 unicode_character_set = 1;
3692 }
3693 /* SCU tag
3694 */
3695 else if( byte_value1 == 0x0f )
3696 {
3697 in_unicode_mode = 1;
3698 }
3699 /* SCn tags
3700 */
3701 else if( ( byte_value1 >= 0x10 )
3702 && ( byte_value1 <= 0x17 ) )
3703 {
3704 dynamic_window_position_index = byte_value1 - 0x10;
3705 }
3706 /* SDn tags
3707 */
3708 else if( ( byte_value1 >= 0x18 )
3709 && ( byte_value1 <= 0x1f ) )
3710 {
3711 if( scsu_stream_index >= scsu_stream_size )
3712 {
3713 libcerror_error_set(
3714 error,
3715 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3716 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3717 "%s: SCSU stream too small.",
3718 function );
3719
3720 return( -1 );
3721 }
3722 byte_value2 = scsu_stream[ scsu_stream_index++ ];
3723
3724 dynamic_window_position_index = byte_value1 - 0x18;
3725 scsu_window_position = libuna_scsu_window_offset_table[ byte_value2 ];
3726
3727 scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3728 }
3729 else if( byte_value1 >= 0x80 )
3730 {
3731 unicode_character = byte_value1 - 0x80;
3732 unicode_character += scsu_dynamic_window_positions[ dynamic_window_position_index ];
3733
3734 unicode_character_set = 1;
3735 }
3736 }
3737 if( unicode_character_set != 0 )
3738 {
3739 /* Convert the Unicode character into UTF-32 character bytes
3740 */
3741 if( libuna_unicode_character_copy_to_utf32(
3742 unicode_character,
3743 utf32_string,
3744 utf32_string_size,
3745 utf32_string_index,
3746 error ) != 1 )
3747 {
3748 libcerror_error_set(
3749 error,
3750 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3751 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3752 "%s: unable to copy Unicode character to UTF-32.",
3753 function );
3754
3755 return( -1 );
3756 }
3757 if( unicode_character == 0 )
3758 {
3759 break;
3760 }
3761 }
3762 }
3763 /* Check if the string is terminated with an end-of-string character
3764 */
3765 if( unicode_character != 0 )
3766 {
3767 if( *utf32_string_index >= utf32_string_size )
3768 {
3769 libcerror_error_set(
3770 error,
3771 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3772 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3773 "%s: UTF-32 string too small.",
3774 function );
3775
3776 return( -1 );
3777 }
3778 utf32_string[ *utf32_string_index ] = 0;
3779
3780 *utf32_string_index += 1;
3781 }
3782 return( 1 );
3783 }
3784
3785