1 /*
2 * UTF-8 string functions
3 *
4 * Copyright (C) 2008-2019, Joachim Metz <joachim.metz@gmail.com>
5 *
6 * Refer to AUTHORS for acknowledgements.
7 *
8 * This software is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * This software is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this software. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <common.h>
23 #include <types.h>
24
25 #include "libuna_definitions.h"
26 #include "libuna_libcerror.h"
27 #include "libuna_types.h"
28 #include "libuna_unicode_character.h"
29 #include "libuna_utf8_string.h"
30
31 /* Determines the size of an UTF-8 string from a byte stream
32 * Returns 1 if successful or -1 on error
33 */
libuna_utf8_string_size_from_byte_stream(const uint8_t * byte_stream,size_t byte_stream_size,int codepage,size_t * utf8_string_size,libcerror_error_t ** error)34 int libuna_utf8_string_size_from_byte_stream(
35 const uint8_t *byte_stream,
36 size_t byte_stream_size,
37 int codepage,
38 size_t *utf8_string_size,
39 libcerror_error_t **error )
40 {
41 static char *function = "libuna_utf8_string_size_from_byte_stream";
42 size_t byte_stream_index = 0;
43 libuna_unicode_character_t unicode_character = 0;
44
45 if( byte_stream == NULL )
46 {
47 libcerror_error_set(
48 error,
49 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
50 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
51 "%s: invalid byte stream.",
52 function );
53
54 return( -1 );
55 }
56 if( byte_stream_size > (size_t) SSIZE_MAX )
57 {
58 libcerror_error_set(
59 error,
60 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
61 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
62 "%s: invalid byte stream size value exceeds maximum.",
63 function );
64
65 return( -1 );
66 }
67 if( utf8_string_size == NULL )
68 {
69 libcerror_error_set(
70 error,
71 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
72 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
73 "%s: invalid UTF-8 string size.",
74 function );
75
76 return( -1 );
77 }
78 *utf8_string_size = 0;
79
80 if( byte_stream_size == 0 )
81 {
82 return( 1 );
83 }
84 while( byte_stream_index < byte_stream_size )
85 {
86 /* Convert the byte stream bytes into an Unicode character
87 */
88 if( libuna_unicode_character_copy_from_byte_stream(
89 &unicode_character,
90 byte_stream,
91 byte_stream_size,
92 &byte_stream_index,
93 codepage,
94 error ) != 1 )
95 {
96 libcerror_error_set(
97 error,
98 LIBCERROR_ERROR_DOMAIN_CONVERSION,
99 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
100 "%s: unable to copy Unicode character from byte stream.",
101 function );
102
103 return( -1 );
104 }
105 /* Determine how many UTF-8 character bytes are required
106 */
107 if( libuna_unicode_character_size_to_utf8(
108 unicode_character,
109 utf8_string_size,
110 error ) != 1 )
111 {
112 libcerror_error_set(
113 error,
114 LIBCERROR_ERROR_DOMAIN_CONVERSION,
115 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
116 "%s: unable to unable to determine size of Unicode character in UTF-8.",
117 function );
118
119 return( -1 );
120 }
121 if( unicode_character == 0 )
122 {
123 break;
124 }
125 }
126 /* Check if the string is terminated with an end-of-string character
127 */
128 if( unicode_character != 0 )
129 {
130 *utf8_string_size += 1;
131 }
132 return( 1 );
133 }
134
135 /* Copies an UTF-8 string from a byte stream
136 * Returns 1 if successful or -1 on error
137 */
libuna_utf8_string_copy_from_byte_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * byte_stream,size_t byte_stream_size,int codepage,libcerror_error_t ** error)138 int libuna_utf8_string_copy_from_byte_stream(
139 libuna_utf8_character_t *utf8_string,
140 size_t utf8_string_size,
141 const uint8_t *byte_stream,
142 size_t byte_stream_size,
143 int codepage,
144 libcerror_error_t **error )
145 {
146 static char *function = "libuna_utf8_string_copy_from_byte_stream";
147 size_t utf8_string_index = 0;
148
149 if( libuna_utf8_string_with_index_copy_from_byte_stream(
150 utf8_string,
151 utf8_string_size,
152 &utf8_string_index,
153 byte_stream,
154 byte_stream_size,
155 codepage,
156 error ) != 1 )
157 {
158 libcerror_error_set(
159 error,
160 LIBCERROR_ERROR_DOMAIN_RUNTIME,
161 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
162 "%s: unable to copy byte stream to UTF-8 string.",
163 function );
164
165 return( -1 );
166 }
167 return( 1 );
168 }
169
170 /* Copies an UTF-8 string from a byte stream
171 * Returns 1 if successful or -1 on error
172 */
libuna_utf8_string_with_index_copy_from_byte_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const uint8_t * byte_stream,size_t byte_stream_size,int codepage,libcerror_error_t ** error)173 int libuna_utf8_string_with_index_copy_from_byte_stream(
174 libuna_utf8_character_t *utf8_string,
175 size_t utf8_string_size,
176 size_t *utf8_string_index,
177 const uint8_t *byte_stream,
178 size_t byte_stream_size,
179 int codepage,
180 libcerror_error_t **error )
181 {
182 static char *function = "libuna_utf8_string_with_index_copy_from_byte_stream";
183 size_t byte_stream_index = 0;
184 libuna_unicode_character_t unicode_character = 0;
185
186 if( utf8_string == NULL )
187 {
188 libcerror_error_set(
189 error,
190 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
191 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
192 "%s: invalid UTF-8 string.",
193 function );
194
195 return( -1 );
196 }
197 if( utf8_string_size > (size_t) SSIZE_MAX )
198 {
199 libcerror_error_set(
200 error,
201 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
202 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
203 "%s: invalid UTF-8 string size value exceeds maximum.",
204 function );
205
206 return( -1 );
207 }
208 if( utf8_string_index == NULL )
209 {
210 libcerror_error_set(
211 error,
212 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
213 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
214 "%s: invalid UTF-8 string index.",
215 function );
216
217 return( -1 );
218 }
219 if( byte_stream == NULL )
220 {
221 libcerror_error_set(
222 error,
223 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
224 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
225 "%s: invalid byte stream.",
226 function );
227
228 return( -1 );
229 }
230 if( byte_stream_size > (size_t) SSIZE_MAX )
231 {
232 libcerror_error_set(
233 error,
234 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
235 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
236 "%s: invalid byte stream size value exceeds maximum.",
237 function );
238
239 return( -1 );
240 }
241 if( byte_stream_size == 0 )
242 {
243 libcerror_error_set(
244 error,
245 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
246 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
247 "%s: missing byte stream value.",
248 function );
249
250 return( -1 );
251 }
252 while( byte_stream_index < byte_stream_size )
253 {
254 /* Convert the byte stream bytes into an Unicode character
255 */
256 if( libuna_unicode_character_copy_from_byte_stream(
257 &unicode_character,
258 byte_stream,
259 byte_stream_size,
260 &byte_stream_index,
261 codepage,
262 error ) != 1 )
263 {
264 libcerror_error_set(
265 error,
266 LIBCERROR_ERROR_DOMAIN_CONVERSION,
267 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
268 "%s: unable to copy Unicode character from byte stream.",
269 function );
270
271 return( -1 );
272 }
273 /* Convert the Unicode character into UTF-8 character bytes
274 */
275 if( libuna_unicode_character_copy_to_utf8(
276 unicode_character,
277 utf8_string,
278 utf8_string_size,
279 utf8_string_index,
280 error ) != 1 )
281 {
282 libcerror_error_set(
283 error,
284 LIBCERROR_ERROR_DOMAIN_CONVERSION,
285 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
286 "%s: unable to copy Unicode character to UTF-8.",
287 function );
288
289 return( -1 );
290 }
291 if( unicode_character == 0 )
292 {
293 break;
294 }
295 }
296 /* Check if the string is terminated with an end-of-string character
297 */
298 if( unicode_character != 0 )
299 {
300 if( *utf8_string_index >= utf8_string_size )
301 {
302 libcerror_error_set(
303 error,
304 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
305 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
306 "%s: UTF-8 string too small.",
307 function );
308
309 return( -1 );
310 }
311 utf8_string[ *utf8_string_index ] = 0;
312
313 *utf8_string_index += 1;
314 }
315 return( 1 );
316 }
317
318 /* Compares an UTF-8 string with a byte stream
319 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
320 */
libuna_utf8_string_compare_with_byte_stream(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * byte_stream,size_t byte_stream_size,int codepage,libcerror_error_t ** error)321 int libuna_utf8_string_compare_with_byte_stream(
322 const libuna_utf8_character_t *utf8_string,
323 size_t utf8_string_size,
324 const uint8_t *byte_stream,
325 size_t byte_stream_size,
326 int codepage,
327 libcerror_error_t **error )
328 {
329 static char *function = "libuna_utf8_string_compare_with_byte_stream";
330 size_t byte_stream_index = 0;
331 size_t utf8_string_index = 0;
332 libuna_unicode_character_t utf8_unicode_character = 0;
333 libuna_unicode_character_t byte_stream_unicode_character = 0;
334
335 if( utf8_string == NULL )
336 {
337 libcerror_error_set(
338 error,
339 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
340 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
341 "%s: invalid UTF-8 string.",
342 function );
343
344 return( -1 );
345 }
346 if( utf8_string_size > (size_t) SSIZE_MAX )
347 {
348 libcerror_error_set(
349 error,
350 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
351 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
352 "%s: invalid UTF-8 string size value exceeds maximum.",
353 function );
354
355 return( -1 );
356 }
357 if( byte_stream == NULL )
358 {
359 libcerror_error_set(
360 error,
361 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
362 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
363 "%s: invalid byte stream.",
364 function );
365
366 return( -1 );
367 }
368 if( byte_stream_size > (size_t) SSIZE_MAX )
369 {
370 libcerror_error_set(
371 error,
372 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
373 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
374 "%s: invalid byte stream size value exceeds maximum.",
375 function );
376
377 return( -1 );
378 }
379 if( byte_stream_size == 0 )
380 {
381 libcerror_error_set(
382 error,
383 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
384 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
385 "%s: missing byte stream value.",
386 function );
387
388 return( -1 );
389 }
390 if( ( utf8_string_size >= 1 )
391 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
392 {
393 utf8_string_size -= 1;
394 }
395 /* Check if the byte stream is terminated with zero bytes
396 */
397 if( ( byte_stream_size >= 1 )
398 && ( byte_stream[ byte_stream_size - 1 ] == 0 ) )
399 {
400 byte_stream_size -= 1;
401 }
402 while( ( utf8_string_index < utf8_string_size )
403 && ( byte_stream_index < byte_stream_size ) )
404 {
405 /* Convert the UTF-8 character bytes into an Unicode character
406 */
407 if( libuna_unicode_character_copy_from_utf8(
408 &utf8_unicode_character,
409 utf8_string,
410 utf8_string_size,
411 &utf8_string_index,
412 error ) != 1 )
413 {
414 libcerror_error_set(
415 error,
416 LIBCERROR_ERROR_DOMAIN_CONVERSION,
417 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
418 "%s: unable to copy Unicode character from UTF-8.",
419 function );
420
421 return( -1 );
422 }
423 /* Convert the byte stream bytes into an Unicode character
424 */
425 if( libuna_unicode_character_copy_from_byte_stream(
426 &byte_stream_unicode_character,
427 byte_stream,
428 byte_stream_size,
429 &byte_stream_index,
430 codepage,
431 error ) != 1 )
432 {
433 libcerror_error_set(
434 error,
435 LIBCERROR_ERROR_DOMAIN_CONVERSION,
436 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
437 "%s: unable to copy Unicode character from byte stream.",
438 function );
439
440 return( -1 );
441 }
442 if( utf8_unicode_character < byte_stream_unicode_character )
443 {
444 return( LIBUNA_COMPARE_LESS );
445 }
446 else if( utf8_unicode_character > byte_stream_unicode_character )
447 {
448 return( LIBUNA_COMPARE_GREATER );
449 }
450 }
451 /* Check if both strings were entirely processed
452 */
453 if( utf8_string_index < utf8_string_size )
454 {
455 return( LIBUNA_COMPARE_GREATER );
456 }
457 else if( byte_stream_index < byte_stream_size )
458 {
459 return( LIBUNA_COMPARE_LESS );
460 }
461 return( LIBUNA_COMPARE_EQUAL );
462 }
463
464 /* Determines the size of an UTF-8 string from an UTF-7 stream
465 * Returns 1 if successful or -1 on error
466 */
libuna_utf8_string_size_from_utf7_stream(const uint8_t * utf7_stream,size_t utf7_stream_size,size_t * utf8_string_size,libcerror_error_t ** error)467 int libuna_utf8_string_size_from_utf7_stream(
468 const uint8_t *utf7_stream,
469 size_t utf7_stream_size,
470 size_t *utf8_string_size,
471 libcerror_error_t **error )
472 {
473 static char *function = "libuna_utf8_string_size_from_utf7_stream";
474 size_t utf7_stream_index = 0;
475 libuna_unicode_character_t unicode_character = 0;
476 uint32_t utf7_stream_base64_data = 0;
477
478 if( utf7_stream == NULL )
479 {
480 libcerror_error_set(
481 error,
482 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
483 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
484 "%s: invalid UTF-7 stream.",
485 function );
486
487 return( -1 );
488 }
489 if( utf7_stream_size > (size_t) SSIZE_MAX )
490 {
491 libcerror_error_set(
492 error,
493 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
494 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
495 "%s: invalid UTF-7 stream size value exceeds maximum.",
496 function );
497
498 return( -1 );
499 }
500 if( utf8_string_size == NULL )
501 {
502 libcerror_error_set(
503 error,
504 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
505 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
506 "%s: invalid UTF-8 string size.",
507 function );
508
509 return( -1 );
510 }
511 *utf8_string_size = 0;
512
513 if( utf7_stream_size == 0 )
514 {
515 return( 1 );
516 }
517 while( utf7_stream_index < utf7_stream_size )
518 {
519 /* Convert the UTF-7 stream bytes into an Unicode character
520 */
521 if( libuna_unicode_character_copy_from_utf7_stream(
522 &unicode_character,
523 utf7_stream,
524 utf7_stream_size,
525 &utf7_stream_index,
526 &utf7_stream_base64_data,
527 error ) != 1 )
528 {
529 libcerror_error_set(
530 error,
531 LIBCERROR_ERROR_DOMAIN_CONVERSION,
532 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
533 "%s: unable to copy Unicode character from UTF-7 stream.",
534 function );
535
536 return( -1 );
537 }
538 /* Determine how many UTF-8 character bytes are required
539 */
540 if( libuna_unicode_character_size_to_utf8(
541 unicode_character,
542 utf8_string_size,
543 error ) != 1 )
544 {
545 libcerror_error_set(
546 error,
547 LIBCERROR_ERROR_DOMAIN_CONVERSION,
548 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
549 "%s: unable to unable to determine size of Unicode character in UTF-8.",
550 function );
551
552 return( -1 );
553 }
554 if( unicode_character == 0 )
555 {
556 break;
557 }
558 }
559 /* Check if the string is terminated with an end-of-string character
560 */
561 if( unicode_character != 0 )
562 {
563 *utf8_string_size += 1;
564 }
565 return( 1 );
566 }
567
568 /* Copies an UTF-8 string from an UTF-7 stream
569 * Returns 1 if successful or -1 on error
570 */
libuna_utf8_string_copy_from_utf7_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf7_stream,size_t utf7_stream_size,libcerror_error_t ** error)571 int libuna_utf8_string_copy_from_utf7_stream(
572 libuna_utf8_character_t *utf8_string,
573 size_t utf8_string_size,
574 const uint8_t *utf7_stream,
575 size_t utf7_stream_size,
576 libcerror_error_t **error )
577 {
578 static char *function = "libuna_utf8_string_copy_from_utf7_stream";
579 size_t utf8_string_index = 0;
580
581 if( libuna_utf8_string_with_index_copy_from_utf7_stream(
582 utf8_string,
583 utf8_string_size,
584 &utf8_string_index,
585 utf7_stream,
586 utf7_stream_size,
587 error ) != 1 )
588 {
589 libcerror_error_set(
590 error,
591 LIBCERROR_ERROR_DOMAIN_RUNTIME,
592 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
593 "%s: unable to UTF-7 stream to UTF-8 string.",
594 function );
595
596 return( -1 );
597 }
598 return( 1 );
599 }
600
601 /* Copies an UTF-8 string from an UTF-7 stream
602 * Returns 1 if successful or -1 on error
603 */
libuna_utf8_string_with_index_copy_from_utf7_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const uint8_t * utf7_stream,size_t utf7_stream_size,libcerror_error_t ** error)604 int libuna_utf8_string_with_index_copy_from_utf7_stream(
605 libuna_utf8_character_t *utf8_string,
606 size_t utf8_string_size,
607 size_t *utf8_string_index,
608 const uint8_t *utf7_stream,
609 size_t utf7_stream_size,
610 libcerror_error_t **error )
611 {
612 static char *function = "libuna_utf8_string_with_index_copy_from_utf7_stream";
613 size_t utf7_stream_index = 0;
614 libuna_unicode_character_t unicode_character = 0;
615 uint32_t utf7_stream_base64_data = 0;
616
617 if( utf8_string == NULL )
618 {
619 libcerror_error_set(
620 error,
621 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
622 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
623 "%s: invalid UTF-8 string.",
624 function );
625
626 return( -1 );
627 }
628 if( utf8_string_size > (size_t) SSIZE_MAX )
629 {
630 libcerror_error_set(
631 error,
632 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
633 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
634 "%s: invalid UTF-8 string size value exceeds maximum.",
635 function );
636
637 return( -1 );
638 }
639 if( utf8_string_index == NULL )
640 {
641 libcerror_error_set(
642 error,
643 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
644 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
645 "%s: invalid UTF-8 string index.",
646 function );
647
648 return( -1 );
649 }
650 if( utf7_stream == NULL )
651 {
652 libcerror_error_set(
653 error,
654 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
655 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
656 "%s: invalid UTF-7 stream.",
657 function );
658
659 return( -1 );
660 }
661 if( utf7_stream_size > (size_t) SSIZE_MAX )
662 {
663 libcerror_error_set(
664 error,
665 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
666 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
667 "%s: invalid UTF-7 stream size value exceeds maximum.",
668 function );
669
670 return( -1 );
671 }
672 if( utf7_stream_size == 0 )
673 {
674 libcerror_error_set(
675 error,
676 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
677 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
678 "%s: missing UTF-7 stream value.",
679 function );
680
681 return( -1 );
682 }
683 while( utf7_stream_index < utf7_stream_size )
684 {
685 /* Convert the UTF-7 stream bytes into an Unicode character
686 */
687 if( libuna_unicode_character_copy_from_utf7_stream(
688 &unicode_character,
689 utf7_stream,
690 utf7_stream_size,
691 &utf7_stream_index,
692 &utf7_stream_base64_data,
693 error ) != 1 )
694 {
695 libcerror_error_set(
696 error,
697 LIBCERROR_ERROR_DOMAIN_CONVERSION,
698 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
699 "%s: unable to copy Unicode character from UTF-7 stream.",
700 function );
701
702 return( -1 );
703 }
704 /* Convert the Unicode character into UTF-8 character bytes
705 */
706 if( libuna_unicode_character_copy_to_utf8(
707 unicode_character,
708 utf8_string,
709 utf8_string_size,
710 utf8_string_index,
711 error ) != 1 )
712 {
713 libcerror_error_set(
714 error,
715 LIBCERROR_ERROR_DOMAIN_CONVERSION,
716 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
717 "%s: unable to copy Unicode character to UTF-8.",
718 function );
719
720 return( -1 );
721 }
722 if( unicode_character == 0 )
723 {
724 break;
725 }
726 }
727 /* Check if the string is terminated with an end-of-string character
728 */
729 if( unicode_character != 0 )
730 {
731 if( *utf8_string_index >= utf8_string_size )
732 {
733 libcerror_error_set(
734 error,
735 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
736 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
737 "%s: UTF-8 string too small.",
738 function );
739
740 return( -1 );
741 }
742 utf8_string[ *utf8_string_index ] = 0;
743
744 *utf8_string_index += 1;
745 }
746 return( 1 );
747 }
748
749 /* Compares an UTF-8 string with an UTF-7 stream
750 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
751 */
libuna_utf8_string_compare_with_utf7_stream(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf7_stream,size_t utf7_stream_size,libcerror_error_t ** error)752 int libuna_utf8_string_compare_with_utf7_stream(
753 const libuna_utf8_character_t *utf8_string,
754 size_t utf8_string_size,
755 const uint8_t *utf7_stream,
756 size_t utf7_stream_size,
757 libcerror_error_t **error )
758 {
759 static char *function = "libuna_utf8_string_compare_with_utf7_stream";
760 size_t utf7_stream_index = 0;
761 size_t utf8_string_index = 0;
762 libuna_unicode_character_t utf8_unicode_character = 0;
763 libuna_unicode_character_t utf7_stream_unicode_character = 0;
764 uint32_t utf7_stream_base64_data = 0;
765
766 if( utf8_string == NULL )
767 {
768 libcerror_error_set(
769 error,
770 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
771 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
772 "%s: invalid UTF-8 string.",
773 function );
774
775 return( -1 );
776 }
777 if( utf8_string_size > (size_t) SSIZE_MAX )
778 {
779 libcerror_error_set(
780 error,
781 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
782 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
783 "%s: invalid UTF-8 string size value exceeds maximum.",
784 function );
785
786 return( -1 );
787 }
788 if( utf7_stream == NULL )
789 {
790 libcerror_error_set(
791 error,
792 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
793 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
794 "%s: invalid UTF-7 stream.",
795 function );
796
797 return( -1 );
798 }
799 if( utf7_stream_size > (size_t) SSIZE_MAX )
800 {
801 libcerror_error_set(
802 error,
803 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
804 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
805 "%s: invalid UTF-7 stream size value exceeds maximum.",
806 function );
807
808 return( -1 );
809 }
810 if( utf7_stream_size == 0 )
811 {
812 libcerror_error_set(
813 error,
814 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
815 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
816 "%s: missing UTF-7 stream value.",
817 function );
818
819 return( -1 );
820 }
821 if( ( utf8_string_size >= 1 )
822 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
823 {
824 utf8_string_size -= 1;
825 }
826 /* Check if the UTF-7 stream is terminated with zero bytes
827 */
828 if( ( utf7_stream_size >= 1 )
829 && ( utf7_stream[ utf7_stream_size - 1 ] == 0 ) )
830 {
831 utf7_stream_size -= 1;
832 }
833 while( ( utf8_string_index < utf8_string_size )
834 && ( utf7_stream_index < utf7_stream_size ) )
835 {
836 /* Convert the UTF-8 character bytes into an Unicode character
837 */
838 if( libuna_unicode_character_copy_from_utf8(
839 &utf8_unicode_character,
840 utf8_string,
841 utf8_string_size,
842 &utf8_string_index,
843 error ) != 1 )
844 {
845 libcerror_error_set(
846 error,
847 LIBCERROR_ERROR_DOMAIN_CONVERSION,
848 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
849 "%s: unable to copy Unicode character from UTF-8.",
850 function );
851
852 return( -1 );
853 }
854 /* Convert the UTF-7 character bytes into an Unicode character
855 */
856 if( libuna_unicode_character_copy_from_utf7_stream(
857 &utf7_stream_unicode_character,
858 utf7_stream,
859 utf7_stream_size,
860 &utf7_stream_index,
861 &utf7_stream_base64_data,
862 error ) != 1 )
863 {
864 libcerror_error_set(
865 error,
866 LIBCERROR_ERROR_DOMAIN_CONVERSION,
867 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
868 "%s: unable to copy Unicode character from UTF-7 stream.",
869 function );
870
871 return( -1 );
872 }
873 if( utf8_unicode_character < utf7_stream_unicode_character )
874 {
875 return( LIBUNA_COMPARE_LESS );
876 }
877 else if( utf8_unicode_character > utf7_stream_unicode_character )
878 {
879 return( LIBUNA_COMPARE_GREATER );
880 }
881 }
882 /* Check if both strings were entirely processed
883 */
884 if( utf8_string_index < utf8_string_size )
885 {
886 return( LIBUNA_COMPARE_GREATER );
887 }
888 else if( utf7_stream_index < utf7_stream_size )
889 {
890 return( LIBUNA_COMPARE_LESS );
891 }
892 return( LIBUNA_COMPARE_EQUAL );
893 }
894
895 /* Determines the size of an UTF-8 string from an UTF-8 stream
896 * Returns 1 if successful or -1 on error
897 */
libuna_utf8_string_size_from_utf8_stream(const uint8_t * utf8_stream,size_t utf8_stream_size,size_t * utf8_string_size,libcerror_error_t ** error)898 int libuna_utf8_string_size_from_utf8_stream(
899 const uint8_t *utf8_stream,
900 size_t utf8_stream_size,
901 size_t *utf8_string_size,
902 libcerror_error_t **error )
903 {
904 static char *function = "libuna_utf8_string_size_from_utf8_stream";
905 size_t utf8_stream_index = 0;
906 libuna_unicode_character_t unicode_character = 0;
907
908 if( utf8_stream == NULL )
909 {
910 libcerror_error_set(
911 error,
912 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
913 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
914 "%s: invalid UTF-8 stream.",
915 function );
916
917 return( -1 );
918 }
919 if( utf8_stream_size > (size_t) SSIZE_MAX )
920 {
921 libcerror_error_set(
922 error,
923 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
924 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
925 "%s: invalid UTF-8 stream size value exceeds maximum.",
926 function );
927
928 return( -1 );
929 }
930 if( utf8_string_size == NULL )
931 {
932 libcerror_error_set(
933 error,
934 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
935 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
936 "%s: invalid UTF-8 string size.",
937 function );
938
939 return( -1 );
940 }
941 *utf8_string_size = 0;
942
943 if( utf8_stream_size == 0 )
944 {
945 return( 1 );
946 }
947 /* Check if UTF-8 stream starts with a byte order mark (BOM)
948 */
949 if( utf8_stream_size >= 3 )
950 {
951 if( ( utf8_stream[ 0 ] == 0x0ef )
952 && ( utf8_stream[ 1 ] == 0x0bb )
953 && ( utf8_stream[ 2 ] == 0x0bf ) )
954 {
955 utf8_stream_index += 3;
956 }
957 }
958 while( utf8_stream_index < utf8_stream_size )
959 {
960 /* Convert the UTF-8 stream bytes into an Unicode character
961 */
962 if( libuna_unicode_character_copy_from_utf8(
963 &unicode_character,
964 utf8_stream,
965 utf8_stream_size,
966 &utf8_stream_index,
967 error ) != 1 )
968 {
969 libcerror_error_set(
970 error,
971 LIBCERROR_ERROR_DOMAIN_CONVERSION,
972 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
973 "%s: unable to copy Unicode character from UTF-8 stream.",
974 function );
975
976 return( -1 );
977 }
978 /* Determine how many UTF-8 character bytes are required
979 */
980 if( libuna_unicode_character_size_to_utf8(
981 unicode_character,
982 utf8_string_size,
983 error ) != 1 )
984 {
985 libcerror_error_set(
986 error,
987 LIBCERROR_ERROR_DOMAIN_CONVERSION,
988 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
989 "%s: unable to unable to determine size of Unicode character in UTF-8.",
990 function );
991
992 return( -1 );
993 }
994 if( unicode_character == 0 )
995 {
996 break;
997 }
998 }
999 /* Check if the string is terminated with an end-of-string character
1000 */
1001 if( unicode_character != 0 )
1002 {
1003 *utf8_string_size += 1;
1004 }
1005 return( 1 );
1006 }
1007
1008 /* Copies an UTF-8 string from an UTF-8 stream
1009 * Returns 1 if successful or -1 on error
1010 */
libuna_utf8_string_copy_from_utf8_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf8_stream,size_t utf8_stream_size,libcerror_error_t ** error)1011 int libuna_utf8_string_copy_from_utf8_stream(
1012 libuna_utf8_character_t *utf8_string,
1013 size_t utf8_string_size,
1014 const uint8_t *utf8_stream,
1015 size_t utf8_stream_size,
1016 libcerror_error_t **error )
1017 {
1018 static char *function = "libuna_utf8_string_copy_from_utf8_stream";
1019 size_t utf8_string_index = 0;
1020
1021 if( libuna_utf8_string_with_index_copy_from_utf8_stream(
1022 utf8_string,
1023 utf8_string_size,
1024 &utf8_string_index,
1025 utf8_stream,
1026 utf8_stream_size,
1027 error ) != 1 )
1028 {
1029 libcerror_error_set(
1030 error,
1031 LIBCERROR_ERROR_DOMAIN_RUNTIME,
1032 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1033 "%s: unable to UTF-8 stream to UTF-8 string.",
1034 function );
1035
1036 return( -1 );
1037 }
1038 return( 1 );
1039 }
1040
1041 /* Copies an UTF-8 string from an UTF-8 stream
1042 * Returns 1 if successful or -1 on error
1043 */
libuna_utf8_string_with_index_copy_from_utf8_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const uint8_t * utf8_stream,size_t utf8_stream_size,libcerror_error_t ** error)1044 int libuna_utf8_string_with_index_copy_from_utf8_stream(
1045 libuna_utf8_character_t *utf8_string,
1046 size_t utf8_string_size,
1047 size_t *utf8_string_index,
1048 const uint8_t *utf8_stream,
1049 size_t utf8_stream_size,
1050 libcerror_error_t **error )
1051 {
1052 static char *function = "libuna_utf8_string_with_index_copy_from_utf8_stream";
1053 size_t utf8_stream_index = 0;
1054 libuna_unicode_character_t unicode_character = 0;
1055
1056 if( utf8_string == NULL )
1057 {
1058 libcerror_error_set(
1059 error,
1060 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1061 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1062 "%s: invalid UTF-8 string.",
1063 function );
1064
1065 return( -1 );
1066 }
1067 if( utf8_string_size > (size_t) SSIZE_MAX )
1068 {
1069 libcerror_error_set(
1070 error,
1071 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1072 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1073 "%s: invalid UTF-8 string size value exceeds maximum.",
1074 function );
1075
1076 return( -1 );
1077 }
1078 if( utf8_string_index == NULL )
1079 {
1080 libcerror_error_set(
1081 error,
1082 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1083 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1084 "%s: invalid UTF-8 string index.",
1085 function );
1086
1087 return( -1 );
1088 }
1089 if( utf8_stream == NULL )
1090 {
1091 libcerror_error_set(
1092 error,
1093 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1094 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1095 "%s: invalid UTF-8 stream.",
1096 function );
1097
1098 return( -1 );
1099 }
1100 if( utf8_stream_size > (size_t) SSIZE_MAX )
1101 {
1102 libcerror_error_set(
1103 error,
1104 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1105 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1106 "%s: invalid UTF-8 stream size value exceeds maximum.",
1107 function );
1108
1109 return( -1 );
1110 }
1111 if( utf8_stream_size == 0 )
1112 {
1113 libcerror_error_set(
1114 error,
1115 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1116 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1117 "%s: missing UTF-8 stream value.",
1118 function );
1119
1120 return( -1 );
1121 }
1122 /* Check if UTF-8 stream starts with a byte order mark (BOM)
1123 */
1124 if( utf8_stream_size >= 3 )
1125 {
1126 if( ( utf8_stream[ 0 ] == 0x0ef )
1127 && ( utf8_stream[ 1 ] == 0x0bb )
1128 && ( utf8_stream[ 2 ] == 0x0bf ) )
1129 {
1130 utf8_stream_index += 3;
1131 }
1132 }
1133 while( utf8_stream_index < utf8_stream_size )
1134 {
1135 /* Convert the UTF-8 stream bytes into an Unicode character
1136 */
1137 if( libuna_unicode_character_copy_from_utf8(
1138 &unicode_character,
1139 utf8_stream,
1140 utf8_stream_size,
1141 &utf8_stream_index,
1142 error ) != 1 )
1143 {
1144 libcerror_error_set(
1145 error,
1146 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1147 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1148 "%s: unable to copy Unicode character from UTF-8 stream.",
1149 function );
1150
1151 return( -1 );
1152 }
1153 /* Convert the Unicode character into UTF-8 character bytes
1154 */
1155 if( libuna_unicode_character_copy_to_utf8(
1156 unicode_character,
1157 utf8_string,
1158 utf8_string_size,
1159 utf8_string_index,
1160 error ) != 1 )
1161 {
1162 libcerror_error_set(
1163 error,
1164 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1165 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1166 "%s: unable to copy Unicode character to UTF-8.",
1167 function );
1168
1169 return( -1 );
1170 }
1171 if( unicode_character == 0 )
1172 {
1173 break;
1174 }
1175 }
1176 /* Check if the string is terminated with an end-of-string character
1177 */
1178 if( unicode_character != 0 )
1179 {
1180 if( *utf8_string_index >= utf8_string_size )
1181 {
1182 libcerror_error_set(
1183 error,
1184 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1185 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1186 "%s: UTF-8 string too small.",
1187 function );
1188
1189 return( -1 );
1190 }
1191 utf8_string[ *utf8_string_index ] = 0;
1192
1193 *utf8_string_index += 1;
1194 }
1195 return( 1 );
1196 }
1197
1198 /* Compares an UTF-8 string with an UTF-8 stream
1199 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
1200 */
libuna_utf8_string_compare_with_utf8_stream(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf8_stream,size_t utf8_stream_size,libcerror_error_t ** error)1201 int libuna_utf8_string_compare_with_utf8_stream(
1202 const libuna_utf8_character_t *utf8_string,
1203 size_t utf8_string_size,
1204 const uint8_t *utf8_stream,
1205 size_t utf8_stream_size,
1206 libcerror_error_t **error )
1207 {
1208 static char *function = "libuna_utf8_string_compare_with_utf8_stream";
1209 size_t utf8_stream_index = 0;
1210 size_t utf8_string_index = 0;
1211 libuna_unicode_character_t utf8_unicode_character = 0;
1212 libuna_unicode_character_t utf8_stream_unicode_character = 0;
1213
1214 if( utf8_string == NULL )
1215 {
1216 libcerror_error_set(
1217 error,
1218 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1219 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1220 "%s: invalid UTF-8 string.",
1221 function );
1222
1223 return( -1 );
1224 }
1225 if( utf8_string_size > (size_t) SSIZE_MAX )
1226 {
1227 libcerror_error_set(
1228 error,
1229 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1230 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1231 "%s: invalid UTF-8 string size value exceeds maximum.",
1232 function );
1233
1234 return( -1 );
1235 }
1236 if( utf8_stream == NULL )
1237 {
1238 libcerror_error_set(
1239 error,
1240 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1241 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1242 "%s: invalid UTF-8 stream.",
1243 function );
1244
1245 return( -1 );
1246 }
1247 if( utf8_stream_size > (size_t) SSIZE_MAX )
1248 {
1249 libcerror_error_set(
1250 error,
1251 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1252 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1253 "%s: invalid UTF-8 stream size value exceeds maximum.",
1254 function );
1255
1256 return( -1 );
1257 }
1258 if( utf8_stream_size == 0 )
1259 {
1260 libcerror_error_set(
1261 error,
1262 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1263 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1264 "%s: missing UTF-8 stream value.",
1265 function );
1266
1267 return( -1 );
1268 }
1269 /* Check if UTF-8 stream starts with a byte order mark (BOM)
1270 */
1271 if( utf8_stream_size >= 3 )
1272 {
1273 if( ( utf8_stream[ 0 ] == 0x0ef )
1274 && ( utf8_stream[ 1 ] == 0x0bb )
1275 && ( utf8_stream[ 2 ] == 0x0bf ) )
1276 {
1277 utf8_stream_index += 3;
1278 }
1279 }
1280 if( ( utf8_string_size >= 1 )
1281 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
1282 {
1283 utf8_string_size -= 1;
1284 }
1285 /* Check if the UTF-8 stream is terminated with zero bytes
1286 */
1287 if( ( utf8_stream_size >= 1 )
1288 && ( utf8_stream[ utf8_stream_size - 1 ] == 0 ) )
1289 {
1290 utf8_stream_size -= 1;
1291 }
1292 while( ( utf8_string_index < utf8_string_size )
1293 && ( utf8_stream_index < utf8_stream_size ) )
1294 {
1295 /* Convert the UTF-8 character bytes into an Unicode character
1296 */
1297 if( libuna_unicode_character_copy_from_utf8(
1298 &utf8_unicode_character,
1299 utf8_string,
1300 utf8_string_size,
1301 &utf8_string_index,
1302 error ) != 1 )
1303 {
1304 libcerror_error_set(
1305 error,
1306 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1307 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1308 "%s: unable to copy Unicode character from UTF-8.",
1309 function );
1310
1311 return( -1 );
1312 }
1313 /* Convert the UTF-8 character bytes into an Unicode character
1314 */
1315 if( libuna_unicode_character_copy_from_utf8(
1316 &utf8_stream_unicode_character,
1317 utf8_stream,
1318 utf8_stream_size,
1319 &utf8_stream_index,
1320 error ) != 1 )
1321 {
1322 libcerror_error_set(
1323 error,
1324 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1325 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1326 "%s: unable to copy Unicode character from UTF-8 stream.",
1327 function );
1328
1329 return( -1 );
1330 }
1331 if( utf8_unicode_character < utf8_stream_unicode_character )
1332 {
1333 return( LIBUNA_COMPARE_LESS );
1334 }
1335 else if( utf8_unicode_character > utf8_stream_unicode_character )
1336 {
1337 return( LIBUNA_COMPARE_GREATER );
1338 }
1339 }
1340 /* Check if both strings were entirely processed
1341 */
1342 if( utf8_string_index < utf8_string_size )
1343 {
1344 return( LIBUNA_COMPARE_GREATER );
1345 }
1346 else if( utf8_stream_index < utf8_stream_size )
1347 {
1348 return( LIBUNA_COMPARE_LESS );
1349 }
1350 return( LIBUNA_COMPARE_EQUAL );
1351 }
1352
1353 /* Determines the size of an UTF-8 string from an UTF-16 string
1354 * Returns 1 if successful or -1 on error
1355 */
libuna_utf8_string_size_from_utf16(const libuna_utf16_character_t * utf16_string,size_t utf16_string_size,size_t * utf8_string_size,libcerror_error_t ** error)1356 int libuna_utf8_string_size_from_utf16(
1357 const libuna_utf16_character_t *utf16_string,
1358 size_t utf16_string_size,
1359 size_t *utf8_string_size,
1360 libcerror_error_t **error )
1361 {
1362 static char *function = "libuna_utf8_string_size_from_utf16";
1363 size_t utf16_string_index = 0;
1364 libuna_unicode_character_t unicode_character = 0;
1365
1366 if( utf16_string == NULL )
1367 {
1368 libcerror_error_set(
1369 error,
1370 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1371 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1372 "%s: invalid UTF-16 string.",
1373 function );
1374
1375 return( -1 );
1376 }
1377 if( utf16_string_size > (size_t) SSIZE_MAX )
1378 {
1379 libcerror_error_set(
1380 error,
1381 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1382 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1383 "%s: invalid UTF-16 string size value exceeds maximum.",
1384 function );
1385
1386 return( -1 );
1387 }
1388 if( utf8_string_size == NULL )
1389 {
1390 libcerror_error_set(
1391 error,
1392 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1393 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1394 "%s: invalid UTF-8 string size.",
1395 function );
1396
1397 return( -1 );
1398 }
1399 *utf8_string_size = 0;
1400
1401 if( utf16_string_size == 0 )
1402 {
1403 return( 1 );
1404 }
1405 while( utf16_string_index < utf16_string_size )
1406 {
1407 /* Convert the UTF-16 character bytes into an Unicode character
1408 */
1409 if( libuna_unicode_character_copy_from_utf16(
1410 &unicode_character,
1411 utf16_string,
1412 utf16_string_size,
1413 &utf16_string_index,
1414 error ) != 1 )
1415 {
1416 libcerror_error_set(
1417 error,
1418 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1419 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1420 "%s: unable to copy Unicode character from UTF-16.",
1421 function );
1422
1423 return( -1 );
1424 }
1425 /* Determine how many UTF-8 character bytes are required
1426 */
1427 if( libuna_unicode_character_size_to_utf8(
1428 unicode_character,
1429 utf8_string_size,
1430 error ) != 1 )
1431 {
1432 libcerror_error_set(
1433 error,
1434 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1435 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1436 "%s: unable to unable to determine size of Unicode character in UTF-8.",
1437 function );
1438
1439 return( -1 );
1440 }
1441 }
1442 /* Check if the string is terminated with an end-of-string character
1443 */
1444 if( unicode_character != 0 )
1445 {
1446 *utf8_string_size += 1;
1447 }
1448 return( 1 );
1449 }
1450
1451 /* Copies an UTF-8 string from an UTF-16 string
1452 * Returns 1 if successful or -1 on error
1453 */
libuna_utf8_string_copy_from_utf16(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const libuna_utf16_character_t * utf16_string,size_t utf16_string_size,libcerror_error_t ** error)1454 int libuna_utf8_string_copy_from_utf16(
1455 libuna_utf8_character_t *utf8_string,
1456 size_t utf8_string_size,
1457 const libuna_utf16_character_t *utf16_string,
1458 size_t utf16_string_size,
1459 libcerror_error_t **error )
1460 {
1461 static char *function = "libuna_utf8_string_copy_from_utf16";
1462 size_t utf8_string_index = 0;
1463
1464 if( libuna_utf8_string_with_index_copy_from_utf16(
1465 utf8_string,
1466 utf8_string_size,
1467 &utf8_string_index,
1468 utf16_string,
1469 utf16_string_size,
1470 error ) != 1 )
1471 {
1472 libcerror_error_set(
1473 error,
1474 LIBCERROR_ERROR_DOMAIN_RUNTIME,
1475 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1476 "%s: unable to copy UTF-16 string to UTF-8 string.",
1477 function );
1478
1479 return( -1 );
1480 }
1481 return( 1 );
1482 }
1483
1484 /* Copies an UTF-8 string from an UTF-16 string
1485 * Returns 1 if successful or -1 on error
1486 */
libuna_utf8_string_with_index_copy_from_utf16(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const libuna_utf16_character_t * utf16_string,size_t utf16_string_size,libcerror_error_t ** error)1487 int libuna_utf8_string_with_index_copy_from_utf16(
1488 libuna_utf8_character_t *utf8_string,
1489 size_t utf8_string_size,
1490 size_t *utf8_string_index,
1491 const libuna_utf16_character_t *utf16_string,
1492 size_t utf16_string_size,
1493 libcerror_error_t **error )
1494 {
1495 static char *function = "libuna_utf8_string_with_index_copy_from_utf16";
1496 size_t utf16_string_index = 0;
1497 libuna_unicode_character_t unicode_character = 0;
1498
1499 if( utf8_string == NULL )
1500 {
1501 libcerror_error_set(
1502 error,
1503 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1504 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1505 "%s: invalid UTF-8 string.",
1506 function );
1507
1508 return( -1 );
1509 }
1510 if( utf8_string_size > (size_t) SSIZE_MAX )
1511 {
1512 libcerror_error_set(
1513 error,
1514 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1515 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1516 "%s: invalid UTF-8 string size value exceeds maximum.",
1517 function );
1518
1519 return( -1 );
1520 }
1521 if( utf8_string_index == NULL )
1522 {
1523 libcerror_error_set(
1524 error,
1525 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1526 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1527 "%s: invalid UTF-8 string index.",
1528 function );
1529
1530 return( -1 );
1531 }
1532 if( utf16_string == NULL )
1533 {
1534 libcerror_error_set(
1535 error,
1536 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1537 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1538 "%s: invalid UTF-16 string.",
1539 function );
1540
1541 return( -1 );
1542 }
1543 if( utf16_string_size > (size_t) SSIZE_MAX )
1544 {
1545 libcerror_error_set(
1546 error,
1547 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1548 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1549 "%s: invalid UTF-16 string size value exceeds maximum.",
1550 function );
1551
1552 return( -1 );
1553 }
1554 if( utf16_string_size == 0 )
1555 {
1556 libcerror_error_set(
1557 error,
1558 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1559 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1560 "%s: missing UTF-16 string value.",
1561 function );
1562
1563 return( -1 );
1564 }
1565 while( utf16_string_index < utf16_string_size )
1566 {
1567 /* Convert the UTF-16 character bytes into an Unicode character
1568 */
1569 if( libuna_unicode_character_copy_from_utf16(
1570 &unicode_character,
1571 utf16_string,
1572 utf16_string_size,
1573 &utf16_string_index,
1574 error ) != 1 )
1575 {
1576 libcerror_error_set(
1577 error,
1578 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1579 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1580 "%s: unable to copy Unicode character from UTF-16.",
1581 function );
1582
1583 return( -1 );
1584 }
1585 /* Convert the Unicode character into UTF-8 character bytes
1586 */
1587 if( libuna_unicode_character_copy_to_utf8(
1588 unicode_character,
1589 utf8_string,
1590 utf8_string_size,
1591 utf8_string_index,
1592 error ) != 1 )
1593 {
1594 libcerror_error_set(
1595 error,
1596 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1597 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1598 "%s: unable to copy Unicode character to UTF-8.",
1599 function );
1600
1601 return( -1 );
1602 }
1603 }
1604 /* Check if the string is terminated with an end-of-string character
1605 */
1606 if( unicode_character != 0 )
1607 {
1608 if( *utf8_string_index >= utf8_string_size )
1609 {
1610 libcerror_error_set(
1611 error,
1612 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1613 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1614 "%s: UTF-8 string too small.",
1615 function );
1616
1617 return( -1 );
1618 }
1619 utf8_string[ *utf8_string_index ] = 0;
1620
1621 *utf8_string_index += 1;
1622 }
1623 return( 1 );
1624 }
1625
1626 /* Compares an UTF-8 string with an UTF-16 string
1627 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
1628 */
libuna_utf8_string_compare_with_utf16(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const libuna_utf16_character_t * utf16_string,size_t utf16_string_size,libcerror_error_t ** error)1629 int libuna_utf8_string_compare_with_utf16(
1630 const libuna_utf8_character_t *utf8_string,
1631 size_t utf8_string_size,
1632 const libuna_utf16_character_t *utf16_string,
1633 size_t utf16_string_size,
1634 libcerror_error_t **error )
1635 {
1636 static char *function = "libuna_utf8_string_compare_with_utf16";
1637 size_t utf16_string_index = 0;
1638 size_t utf8_string_index = 0;
1639 libuna_unicode_character_t utf8_unicode_character = 0;
1640 libuna_unicode_character_t utf16_unicode_character = 0;
1641
1642 if( utf8_string == NULL )
1643 {
1644 libcerror_error_set(
1645 error,
1646 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1647 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1648 "%s: invalid UTF-8 string.",
1649 function );
1650
1651 return( -1 );
1652 }
1653 if( utf8_string_size > (size_t) SSIZE_MAX )
1654 {
1655 libcerror_error_set(
1656 error,
1657 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1658 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1659 "%s: invalid UTF-8 string size value exceeds maximum.",
1660 function );
1661
1662 return( -1 );
1663 }
1664 if( utf16_string == NULL )
1665 {
1666 libcerror_error_set(
1667 error,
1668 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1669 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1670 "%s: invalid UTF-16 string.",
1671 function );
1672
1673 return( -1 );
1674 }
1675 if( utf16_string_size > (size_t) SSIZE_MAX )
1676 {
1677 libcerror_error_set(
1678 error,
1679 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1680 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1681 "%s: invalid UTF-16 string size value exceeds maximum.",
1682 function );
1683
1684 return( -1 );
1685 }
1686 if( utf16_string_size == 0 )
1687 {
1688 libcerror_error_set(
1689 error,
1690 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1691 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1692 "%s: missing UTF-16 string value.",
1693 function );
1694
1695 return( -1 );
1696 }
1697 if( ( utf8_string_size >= 1 )
1698 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
1699 {
1700 utf8_string_size -= 1;
1701 }
1702 if( ( utf16_string_size >= 1 )
1703 && ( utf16_string[ utf16_string_size - 1 ] == 0 ) )
1704 {
1705 utf16_string_size -= 1;
1706 }
1707 while( ( utf8_string_index < utf8_string_size )
1708 && ( utf16_string_index < utf16_string_size ) )
1709 {
1710 /* Convert the UTF-8 character bytes into an Unicode character
1711 */
1712 if( libuna_unicode_character_copy_from_utf8(
1713 &utf8_unicode_character,
1714 utf8_string,
1715 utf8_string_size,
1716 &utf8_string_index,
1717 error ) != 1 )
1718 {
1719 libcerror_error_set(
1720 error,
1721 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1722 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1723 "%s: unable to copy Unicode character from UTF-8.",
1724 function );
1725
1726 return( -1 );
1727 }
1728 /* Convert the UTF-16 character bytes into an Unicode character
1729 */
1730 if( libuna_unicode_character_copy_from_utf16(
1731 &utf16_unicode_character,
1732 utf16_string,
1733 utf16_string_size,
1734 &utf16_string_index,
1735 error ) != 1 )
1736 {
1737 libcerror_error_set(
1738 error,
1739 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1740 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1741 "%s: unable to copy Unicode character from UTF-16.",
1742 function );
1743
1744 return( -1 );
1745 }
1746 if( utf8_unicode_character < utf16_unicode_character )
1747 {
1748 return( LIBUNA_COMPARE_LESS );
1749 }
1750 else if( utf8_unicode_character > utf16_unicode_character )
1751 {
1752 return( LIBUNA_COMPARE_GREATER );
1753 }
1754 }
1755 /* Check if both strings were entirely processed
1756 */
1757 if( utf8_string_index < utf8_string_size )
1758 {
1759 return( LIBUNA_COMPARE_GREATER );
1760 }
1761 else if( utf16_string_index < utf16_string_size )
1762 {
1763 return( LIBUNA_COMPARE_LESS );
1764 }
1765 return( LIBUNA_COMPARE_EQUAL );
1766 }
1767
1768 /* Determines the size of an UTF-8 string from an UTF-16 stream
1769 * Returns 1 if successful or -1 on error
1770 */
libuna_utf8_string_size_from_utf16_stream(const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,size_t * utf8_string_size,libcerror_error_t ** error)1771 int libuna_utf8_string_size_from_utf16_stream(
1772 const uint8_t *utf16_stream,
1773 size_t utf16_stream_size,
1774 int byte_order,
1775 size_t *utf8_string_size,
1776 libcerror_error_t **error )
1777 {
1778 static char *function = "libuna_utf8_string_size_from_utf16_stream";
1779 size_t utf16_stream_index = 0;
1780 libuna_unicode_character_t unicode_character = 0;
1781 int read_byte_order = 0;
1782
1783 if( utf16_stream == NULL )
1784 {
1785 libcerror_error_set(
1786 error,
1787 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1788 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1789 "%s: invalid UTF-16 stream.",
1790 function );
1791
1792 return( -1 );
1793 }
1794 if( utf16_stream_size > (size_t) SSIZE_MAX )
1795 {
1796 libcerror_error_set(
1797 error,
1798 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1799 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1800 "%s: invalid UTF-16 stream size value exceeds maximum.",
1801 function );
1802
1803 return( -1 );
1804 }
1805 if( ( utf16_stream_size % 2 ) != 0 )
1806 {
1807 libcerror_error_set(
1808 error,
1809 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1810 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1811 "%s: missing UTF-16 stream value.",
1812 function );
1813
1814 return( -1 );
1815 }
1816 if( utf8_string_size == NULL )
1817 {
1818 libcerror_error_set(
1819 error,
1820 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1821 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1822 "%s: invalid UTF-8 string size.",
1823 function );
1824
1825 return( -1 );
1826 }
1827 *utf8_string_size = 0;
1828
1829 if( utf16_stream_size == 0 )
1830 {
1831 return( 1 );
1832 }
1833 /* Check if UTF-16 stream is in big or little endian
1834 */
1835 if( utf16_stream_size >= 2 )
1836 {
1837 if( ( utf16_stream[ 0 ] == 0xfe )
1838 && ( utf16_stream[ 1 ] == 0xff ) )
1839 {
1840 read_byte_order = LIBUNA_ENDIAN_BIG;
1841 utf16_stream_index = 2;
1842 }
1843 else if( ( utf16_stream[ 0 ] == 0xff )
1844 && ( utf16_stream[ 1 ] == 0xfe ) )
1845 {
1846 read_byte_order = LIBUNA_ENDIAN_LITTLE;
1847 utf16_stream_index = 2;
1848 }
1849 if( byte_order == 0 )
1850 {
1851 byte_order = read_byte_order;
1852 }
1853 }
1854 if( ( byte_order != LIBUNA_ENDIAN_BIG )
1855 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
1856 {
1857 libcerror_error_set(
1858 error,
1859 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1860 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
1861 "%s: unsupported byte order.",
1862 function );
1863
1864 return( -1 );
1865 }
1866 while( ( utf16_stream_index + 1 ) < utf16_stream_size )
1867 {
1868 /* Convert the UTF-16 stream bytes into an Unicode character
1869 */
1870 if( libuna_unicode_character_copy_from_utf16_stream(
1871 &unicode_character,
1872 utf16_stream,
1873 utf16_stream_size,
1874 &utf16_stream_index,
1875 byte_order,
1876 error ) != 1 )
1877 {
1878 libcerror_error_set(
1879 error,
1880 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1881 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1882 "%s: unable to copy Unicode character from UTF-16 stream.",
1883 function );
1884
1885 return( -1 );
1886 }
1887 /* Determine how many UTF-8 character bytes are required
1888 */
1889 if( libuna_unicode_character_size_to_utf8(
1890 unicode_character,
1891 utf8_string_size,
1892 error ) != 1 )
1893 {
1894 libcerror_error_set(
1895 error,
1896 LIBCERROR_ERROR_DOMAIN_CONVERSION,
1897 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1898 "%s: unable to unable to determine size of Unicode character in UTF-8.",
1899 function );
1900
1901 return( -1 );
1902 }
1903 if( unicode_character == 0 )
1904 {
1905 break;
1906 }
1907 }
1908 /* Check if the string is terminated with an end-of-string character
1909 */
1910 if( unicode_character != 0 )
1911 {
1912 *utf8_string_size += 1;
1913 }
1914 return( 1 );
1915 }
1916
1917 /* Copies an UTF-8 string from an UTF-16 stream
1918 * Returns 1 if successful or -1 on error
1919 */
libuna_utf8_string_copy_from_utf16_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,libcerror_error_t ** error)1920 int libuna_utf8_string_copy_from_utf16_stream(
1921 libuna_utf8_character_t *utf8_string,
1922 size_t utf8_string_size,
1923 const uint8_t *utf16_stream,
1924 size_t utf16_stream_size,
1925 int byte_order,
1926 libcerror_error_t **error )
1927 {
1928 static char *function = "libuna_utf8_string_copy_from_utf16_stream";
1929 size_t utf8_string_index = 0;
1930
1931 if( libuna_utf8_string_with_index_copy_from_utf16_stream(
1932 utf8_string,
1933 utf8_string_size,
1934 &utf8_string_index,
1935 utf16_stream,
1936 utf16_stream_size,
1937 byte_order,
1938 error ) != 1 )
1939 {
1940 libcerror_error_set(
1941 error,
1942 LIBCERROR_ERROR_DOMAIN_RUNTIME,
1943 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1944 "%s: unable to copy UTF-16 stream to UTF-8 string.",
1945 function );
1946
1947 return( -1 );
1948 }
1949 return( 1 );
1950 }
1951
1952 /* Copies an UTF-8 string from an UTF-16 stream
1953 * Returns 1 if successful or -1 on error
1954 */
libuna_utf8_string_with_index_copy_from_utf16_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,libcerror_error_t ** error)1955 int libuna_utf8_string_with_index_copy_from_utf16_stream(
1956 libuna_utf8_character_t *utf8_string,
1957 size_t utf8_string_size,
1958 size_t *utf8_string_index,
1959 const uint8_t *utf16_stream,
1960 size_t utf16_stream_size,
1961 int byte_order,
1962 libcerror_error_t **error )
1963 {
1964 static char *function = "libuna_utf8_string_with_index_copy_from_utf16_stream";
1965 size_t utf16_stream_index = 0;
1966 libuna_unicode_character_t unicode_character = 0;
1967 int read_byte_order = 0;
1968
1969 if( utf8_string == NULL )
1970 {
1971 libcerror_error_set(
1972 error,
1973 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1974 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1975 "%s: invalid UTF-8 string.",
1976 function );
1977
1978 return( -1 );
1979 }
1980 if( utf8_string_size > (size_t) SSIZE_MAX )
1981 {
1982 libcerror_error_set(
1983 error,
1984 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1985 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1986 "%s: invalid UTF-8 string size value exceeds maximum.",
1987 function );
1988
1989 return( -1 );
1990 }
1991 if( utf8_string_index == NULL )
1992 {
1993 libcerror_error_set(
1994 error,
1995 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1996 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1997 "%s: invalid UTF-8 string index.",
1998 function );
1999
2000 return( -1 );
2001 }
2002 if( utf16_stream == NULL )
2003 {
2004 libcerror_error_set(
2005 error,
2006 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2007 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2008 "%s: invalid UTF-16 stream.",
2009 function );
2010
2011 return( -1 );
2012 }
2013 if( utf16_stream_size > (size_t) SSIZE_MAX )
2014 {
2015 libcerror_error_set(
2016 error,
2017 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2018 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2019 "%s: invalid UTF-16 stream size value exceeds maximum.",
2020 function );
2021
2022 return( -1 );
2023 }
2024 if( ( utf16_stream_size == 0 )
2025 || ( ( utf16_stream_size % 2 ) != 0 ) )
2026 {
2027 libcerror_error_set(
2028 error,
2029 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2030 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2031 "%s: missing UTF-16 stream value.",
2032 function );
2033
2034 return( -1 );
2035 }
2036 /* Check if UTF-16 stream is in big or little endian
2037 */
2038 if( utf16_stream_size >= 2 )
2039 {
2040 if( ( utf16_stream[ 0 ] == 0xfe )
2041 && ( utf16_stream[ 1 ] == 0xff ) )
2042 {
2043 read_byte_order = LIBUNA_ENDIAN_BIG;
2044 utf16_stream_index = 2;
2045 }
2046 else if( ( utf16_stream[ 0 ] == 0xff )
2047 && ( utf16_stream[ 1 ] == 0xfe ) )
2048 {
2049 read_byte_order = LIBUNA_ENDIAN_LITTLE;
2050 utf16_stream_index = 2;
2051 }
2052 if( byte_order == 0 )
2053 {
2054 byte_order = read_byte_order;
2055 }
2056 }
2057 if( ( byte_order != LIBUNA_ENDIAN_BIG )
2058 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
2059 {
2060 libcerror_error_set(
2061 error,
2062 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2063 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2064 "%s: unsupported byte order.",
2065 function );
2066
2067 return( -1 );
2068 }
2069 while( ( utf16_stream_index + 1 ) < utf16_stream_size )
2070 {
2071 /* Convert the UTF-16 stream bytes into an Unicode character
2072 */
2073 if( libuna_unicode_character_copy_from_utf16_stream(
2074 &unicode_character,
2075 utf16_stream,
2076 utf16_stream_size,
2077 &utf16_stream_index,
2078 byte_order,
2079 error ) != 1 )
2080 {
2081 libcerror_error_set(
2082 error,
2083 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2084 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2085 "%s: unable to copy Unicode character from UTF-16 stream.",
2086 function );
2087
2088 return( -1 );
2089 }
2090 /* Convert the Unicode character into UTF-8 character bytes
2091 */
2092 if( libuna_unicode_character_copy_to_utf8(
2093 unicode_character,
2094 utf8_string,
2095 utf8_string_size,
2096 utf8_string_index,
2097 error ) != 1 )
2098 {
2099 libcerror_error_set(
2100 error,
2101 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2102 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2103 "%s: unable to copy Unicode character to UTF-8.",
2104 function );
2105
2106 return( -1 );
2107 }
2108 if( unicode_character == 0 )
2109 {
2110 break;
2111 }
2112 }
2113 /* Check if the string is terminated with an end-of-string character
2114 */
2115 if( unicode_character != 0 )
2116 {
2117 if( *utf8_string_index >= utf8_string_size )
2118 {
2119 libcerror_error_set(
2120 error,
2121 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2122 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2123 "%s: UTF-8 string too small.",
2124 function );
2125
2126 return( -1 );
2127 }
2128 utf8_string[ *utf8_string_index ] = 0;
2129
2130 *utf8_string_index += 1;
2131 }
2132 return( 1 );
2133 }
2134
2135 /* Compares an UTF-8 string with an UTF-16 stream
2136 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
2137 */
libuna_utf8_string_compare_with_utf16_stream(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf16_stream,size_t utf16_stream_size,int byte_order,libcerror_error_t ** error)2138 int libuna_utf8_string_compare_with_utf16_stream(
2139 const libuna_utf8_character_t *utf8_string,
2140 size_t utf8_string_size,
2141 const uint8_t *utf16_stream,
2142 size_t utf16_stream_size,
2143 int byte_order,
2144 libcerror_error_t **error )
2145 {
2146 static char *function = "libuna_utf8_string_compare_with_utf16_stream";
2147 size_t utf16_stream_index = 0;
2148 size_t utf8_string_index = 0;
2149 libuna_unicode_character_t utf8_unicode_character = 0;
2150 libuna_unicode_character_t utf16_stream_unicode_character = 0;
2151 int read_byte_order = 0;
2152
2153 if( utf8_string == NULL )
2154 {
2155 libcerror_error_set(
2156 error,
2157 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2158 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2159 "%s: invalid UTF-8 string.",
2160 function );
2161
2162 return( -1 );
2163 }
2164 if( utf8_string_size > (size_t) SSIZE_MAX )
2165 {
2166 libcerror_error_set(
2167 error,
2168 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2169 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2170 "%s: invalid UTF-8 string size value exceeds maximum.",
2171 function );
2172
2173 return( -1 );
2174 }
2175 if( utf16_stream == NULL )
2176 {
2177 libcerror_error_set(
2178 error,
2179 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2180 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2181 "%s: invalid UTF-16 stream.",
2182 function );
2183
2184 return( -1 );
2185 }
2186 if( utf16_stream_size > (size_t) SSIZE_MAX )
2187 {
2188 libcerror_error_set(
2189 error,
2190 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2191 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2192 "%s: invalid UTF-16 stream size value exceeds maximum.",
2193 function );
2194
2195 return( -1 );
2196 }
2197 if( ( utf16_stream_size == 0 )
2198 || ( ( utf16_stream_size % 2 ) != 0 ) )
2199 {
2200 libcerror_error_set(
2201 error,
2202 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2203 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2204 "%s: missing UTF-16 stream value.",
2205 function );
2206
2207 return( -1 );
2208 }
2209 /* Check if UTF-16 stream is in big or little endian
2210 */
2211 if( utf16_stream_size >= 2 )
2212 {
2213 if( ( utf16_stream[ 0 ] == 0xfe )
2214 && ( utf16_stream[ 1 ] == 0xff ) )
2215 {
2216 read_byte_order = LIBUNA_ENDIAN_BIG;
2217 utf16_stream_index = 2;
2218 }
2219 else if( ( utf16_stream[ 0 ] == 0xff )
2220 && ( utf16_stream[ 1 ] == 0xfe ) )
2221 {
2222 read_byte_order = LIBUNA_ENDIAN_LITTLE;
2223 utf16_stream_index = 2;
2224 }
2225 if( byte_order == 0 )
2226 {
2227 byte_order = read_byte_order;
2228 }
2229 }
2230 if( ( byte_order != LIBUNA_ENDIAN_BIG )
2231 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
2232 {
2233 libcerror_error_set(
2234 error,
2235 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2236 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2237 "%s: unsupported byte order.",
2238 function );
2239
2240 return( -1 );
2241 }
2242 if( ( utf8_string_size >= 1 )
2243 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
2244 {
2245 utf8_string_size -= 1;
2246 }
2247 /* Check if the UTF-16 stream is terminated with zero bytes
2248 */
2249 if( ( utf16_stream_size >= 2 )
2250 && ( utf16_stream[ utf16_stream_size - 2 ] == 0 )
2251 && ( utf16_stream[ utf16_stream_size - 1 ] == 0 ) )
2252 {
2253 utf16_stream_size -= 2;
2254 }
2255 while( ( utf8_string_index < utf8_string_size )
2256 && ( utf16_stream_index < utf16_stream_size ) )
2257 {
2258 /* Convert the UTF-8 character bytes into an Unicode character
2259 */
2260 if( libuna_unicode_character_copy_from_utf8(
2261 &utf8_unicode_character,
2262 utf8_string,
2263 utf8_string_size,
2264 &utf8_string_index,
2265 error ) != 1 )
2266 {
2267 libcerror_error_set(
2268 error,
2269 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2270 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2271 "%s: unable to copy Unicode character from UTF-8.",
2272 function );
2273
2274 return( -1 );
2275 }
2276 /* Convert the UTF-16 stream bytes into an Unicode character
2277 */
2278 if( libuna_unicode_character_copy_from_utf16_stream(
2279 &utf16_stream_unicode_character,
2280 utf16_stream,
2281 utf16_stream_size,
2282 &utf16_stream_index,
2283 byte_order,
2284 error ) != 1 )
2285 {
2286 libcerror_error_set(
2287 error,
2288 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2289 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2290 "%s: unable to copy Unicode character from UTF-16 stream.",
2291 function );
2292
2293 return( -1 );
2294 }
2295 if( utf8_unicode_character < utf16_stream_unicode_character )
2296 {
2297 return( LIBUNA_COMPARE_LESS );
2298 }
2299 else if( utf8_unicode_character > utf16_stream_unicode_character )
2300 {
2301 return( LIBUNA_COMPARE_GREATER );
2302 }
2303 }
2304 /* Check if both strings were entirely processed
2305 */
2306 if( utf8_string_index < utf8_string_size )
2307 {
2308 return( LIBUNA_COMPARE_GREATER );
2309 }
2310 else if( utf16_stream_index < utf16_stream_size )
2311 {
2312 return( LIBUNA_COMPARE_LESS );
2313 }
2314 return( LIBUNA_COMPARE_EQUAL );
2315 }
2316
2317 /* Determines the size of an UTF-8 string from an UTF-32 string
2318 * Returns 1 if successful or -1 on error
2319 */
libuna_utf8_string_size_from_utf32(const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf8_string_size,libcerror_error_t ** error)2320 int libuna_utf8_string_size_from_utf32(
2321 const libuna_utf32_character_t *utf32_string,
2322 size_t utf32_string_size,
2323 size_t *utf8_string_size,
2324 libcerror_error_t **error )
2325 {
2326 static char *function = "libuna_utf8_string_size_from_utf32";
2327 size_t utf32_string_index = 0;
2328 libuna_unicode_character_t unicode_character = 0;
2329
2330 if( utf32_string == NULL )
2331 {
2332 libcerror_error_set(
2333 error,
2334 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2335 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2336 "%s: invalid UTF-32 string.",
2337 function );
2338
2339 return( -1 );
2340 }
2341 if( utf32_string_size > (size_t) SSIZE_MAX )
2342 {
2343 libcerror_error_set(
2344 error,
2345 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2346 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2347 "%s: invalid UTF-32 string size value exceeds maximum.",
2348 function );
2349
2350 return( -1 );
2351 }
2352 if( utf8_string_size == NULL )
2353 {
2354 libcerror_error_set(
2355 error,
2356 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2357 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2358 "%s: invalid UTF-8 string size.",
2359 function );
2360
2361 return( -1 );
2362 }
2363 *utf8_string_size = 0;
2364
2365 if( utf32_string_size == 0 )
2366 {
2367 return( 1 );
2368 }
2369 while( utf32_string_index < utf32_string_size )
2370 {
2371 /* Convert the UTF-32 character bytes into an Unicode character
2372 */
2373 if( libuna_unicode_character_copy_from_utf32(
2374 &unicode_character,
2375 utf32_string,
2376 utf32_string_size,
2377 &utf32_string_index,
2378 error ) != 1 )
2379 {
2380 libcerror_error_set(
2381 error,
2382 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2383 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2384 "%s: unable to copy Unicode character from UTF-32.",
2385 function );
2386
2387 return( -1 );
2388 }
2389 /* Determine how many UTF-8 character bytes are required
2390 */
2391 if( libuna_unicode_character_size_to_utf8(
2392 unicode_character,
2393 utf8_string_size,
2394 error ) != 1 )
2395 {
2396 libcerror_error_set(
2397 error,
2398 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2399 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2400 "%s: unable to unable to determine size of Unicode character in UTF-8.",
2401 function );
2402
2403 return( -1 );
2404 }
2405 }
2406 /* Check if the string is terminated with an end-of-string character
2407 */
2408 if( unicode_character != 0 )
2409 {
2410 *utf8_string_size += 1;
2411 }
2412 return( 1 );
2413 }
2414
2415 /* Copies an UTF-8 string from an UTF-32 string
2416 * Returns 1 if successful or -1 on error
2417 */
libuna_utf8_string_copy_from_utf32(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,libcerror_error_t ** error)2418 int libuna_utf8_string_copy_from_utf32(
2419 libuna_utf8_character_t *utf8_string,
2420 size_t utf8_string_size,
2421 const libuna_utf32_character_t *utf32_string,
2422 size_t utf32_string_size,
2423 libcerror_error_t **error )
2424 {
2425 static char *function = "libuna_utf8_string_copy_from_utf32";
2426 size_t utf8_string_index = 0;
2427
2428 if( libuna_utf8_string_with_index_copy_from_utf32(
2429 utf8_string,
2430 utf8_string_size,
2431 &utf8_string_index,
2432 utf32_string,
2433 utf32_string_size,
2434 error ) != 1 )
2435 {
2436 libcerror_error_set(
2437 error,
2438 LIBCERROR_ERROR_DOMAIN_RUNTIME,
2439 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2440 "%s: unable to copy UTF-32 string to UTF-8 string.",
2441 function );
2442
2443 return( -1 );
2444 }
2445 return( 1 );
2446 }
2447
2448 /* Copies an UTF-8 string from an UTF-32 string
2449 * Returns 1 if successful or -1 on error
2450 */
libuna_utf8_string_with_index_copy_from_utf32(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,libcerror_error_t ** error)2451 int libuna_utf8_string_with_index_copy_from_utf32(
2452 libuna_utf8_character_t *utf8_string,
2453 size_t utf8_string_size,
2454 size_t *utf8_string_index,
2455 const libuna_utf32_character_t *utf32_string,
2456 size_t utf32_string_size,
2457 libcerror_error_t **error )
2458 {
2459 static char *function = "libuna_utf8_string_with_index_copy_from_utf32";
2460 size_t utf32_string_index = 0;
2461 libuna_unicode_character_t unicode_character = 0;
2462
2463 if( utf8_string == NULL )
2464 {
2465 libcerror_error_set(
2466 error,
2467 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2468 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2469 "%s: invalid UTF-8 string.",
2470 function );
2471
2472 return( -1 );
2473 }
2474 if( utf8_string_size > (size_t) SSIZE_MAX )
2475 {
2476 libcerror_error_set(
2477 error,
2478 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2479 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2480 "%s: invalid UTF-8 string size value exceeds maximum.",
2481 function );
2482
2483 return( -1 );
2484 }
2485 if( utf8_string_index == NULL )
2486 {
2487 libcerror_error_set(
2488 error,
2489 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2490 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2491 "%s: invalid UTF-8 string index.",
2492 function );
2493
2494 return( -1 );
2495 }
2496 if( utf32_string == NULL )
2497 {
2498 libcerror_error_set(
2499 error,
2500 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2501 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2502 "%s: invalid UTF-32 string.",
2503 function );
2504
2505 return( -1 );
2506 }
2507 if( utf32_string_size > (size_t) SSIZE_MAX )
2508 {
2509 libcerror_error_set(
2510 error,
2511 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2512 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2513 "%s: invalid UTF-32 string size value exceeds maximum.",
2514 function );
2515
2516 return( -1 );
2517 }
2518 if( utf32_string_size == 0 )
2519 {
2520 libcerror_error_set(
2521 error,
2522 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2523 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
2524 "%s: missing UTF-32 string value.",
2525 function );
2526
2527 return( -1 );
2528 }
2529 while( utf32_string_index < utf32_string_size )
2530 {
2531 /* Convert the UTF-32 character bytes into an Unicode character
2532 */
2533 if( libuna_unicode_character_copy_from_utf32(
2534 &unicode_character,
2535 utf32_string,
2536 utf32_string_size,
2537 &utf32_string_index,
2538 error ) != 1 )
2539 {
2540 libcerror_error_set(
2541 error,
2542 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2543 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2544 "%s: unable to copy Unicode character from UTF-32.",
2545 function );
2546
2547 return( -1 );
2548 }
2549 /* Convert the Unicode character into UTF-8 character bytes
2550 */
2551 if( libuna_unicode_character_copy_to_utf8(
2552 unicode_character,
2553 utf8_string,
2554 utf8_string_size,
2555 utf8_string_index,
2556 error ) != 1 )
2557 {
2558 libcerror_error_set(
2559 error,
2560 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2561 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2562 "%s: unable to copy Unicode character to UTF-8.",
2563 function );
2564
2565 return( -1 );
2566 }
2567 }
2568 /* Check if the string is terminated with an end-of-string character
2569 */
2570 if( unicode_character != 0 )
2571 {
2572 if( *utf8_string_index >= utf8_string_size )
2573 {
2574 libcerror_error_set(
2575 error,
2576 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2577 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2578 "%s: UTF-8 string too small.",
2579 function );
2580
2581 return( -1 );
2582 }
2583 utf8_string[ *utf8_string_index ] = 0;
2584
2585 *utf8_string_index += 1;
2586 }
2587 return( 1 );
2588 }
2589
2590 /* Compares an UTF-8 string with an UTF-32 string
2591 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
2592 */
libuna_utf8_string_compare_with_utf32(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,libcerror_error_t ** error)2593 int libuna_utf8_string_compare_with_utf32(
2594 const libuna_utf8_character_t *utf8_string,
2595 size_t utf8_string_size,
2596 const libuna_utf32_character_t *utf32_string,
2597 size_t utf32_string_size,
2598 libcerror_error_t **error )
2599 {
2600 static char *function = "libuna_utf8_string_compare_with_utf32";
2601 size_t utf32_string_index = 0;
2602 size_t utf8_string_index = 0;
2603 libuna_unicode_character_t utf8_unicode_character = 0;
2604 libuna_unicode_character_t utf32_unicode_character = 0;
2605
2606 if( utf8_string == NULL )
2607 {
2608 libcerror_error_set(
2609 error,
2610 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2611 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2612 "%s: invalid UTF-8 string.",
2613 function );
2614
2615 return( -1 );
2616 }
2617 if( utf8_string_size > (size_t) SSIZE_MAX )
2618 {
2619 libcerror_error_set(
2620 error,
2621 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2622 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2623 "%s: invalid UTF-8 string size value exceeds maximum.",
2624 function );
2625
2626 return( -1 );
2627 }
2628 if( utf32_string == NULL )
2629 {
2630 libcerror_error_set(
2631 error,
2632 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2633 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2634 "%s: invalid UTF-32 string.",
2635 function );
2636
2637 return( -1 );
2638 }
2639 if( utf32_string_size > (size_t) SSIZE_MAX )
2640 {
2641 libcerror_error_set(
2642 error,
2643 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2644 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2645 "%s: invalid UTF-32 string size value exceeds maximum.",
2646 function );
2647
2648 return( -1 );
2649 }
2650 if( utf32_string_size == 0 )
2651 {
2652 libcerror_error_set(
2653 error,
2654 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2655 LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
2656 "%s: missing UTF-32 string value.",
2657 function );
2658
2659 return( -1 );
2660 }
2661 if( ( utf8_string_size >= 1 )
2662 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
2663 {
2664 utf8_string_size -= 1;
2665 }
2666 if( ( utf32_string_size >= 1 )
2667 && ( utf32_string[ utf32_string_size - 1 ] == 0 ) )
2668 {
2669 utf32_string_size -= 1;
2670 }
2671 while( ( utf8_string_index < utf8_string_size )
2672 && ( utf32_string_index < utf32_string_size ) )
2673 {
2674 /* Convert the UTF-8 character bytes into an Unicode character
2675 */
2676 if( libuna_unicode_character_copy_from_utf8(
2677 &utf8_unicode_character,
2678 utf8_string,
2679 utf8_string_size,
2680 &utf8_string_index,
2681 error ) != 1 )
2682 {
2683 libcerror_error_set(
2684 error,
2685 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2686 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2687 "%s: unable to copy Unicode character from UTF-8.",
2688 function );
2689
2690 return( -1 );
2691 }
2692 /* Convert the UTF-32 character bytes into an Unicode character
2693 */
2694 if( libuna_unicode_character_copy_from_utf32(
2695 &utf32_unicode_character,
2696 utf32_string,
2697 utf32_string_size,
2698 &utf32_string_index,
2699 error ) != 1 )
2700 {
2701 libcerror_error_set(
2702 error,
2703 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2704 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2705 "%s: unable to copy Unicode character from UTF-32.",
2706 function );
2707
2708 return( -1 );
2709 }
2710 if( utf8_unicode_character < utf32_unicode_character )
2711 {
2712 return( LIBUNA_COMPARE_LESS );
2713 }
2714 else if( utf8_unicode_character > utf32_unicode_character )
2715 {
2716 return( LIBUNA_COMPARE_GREATER );
2717 }
2718 }
2719 /* Check if both strings were entirely processed
2720 */
2721 if( utf8_string_index < utf8_string_size )
2722 {
2723 return( LIBUNA_COMPARE_GREATER );
2724 }
2725 else if( utf32_string_index < utf32_string_size )
2726 {
2727 return( LIBUNA_COMPARE_LESS );
2728 }
2729 return( LIBUNA_COMPARE_EQUAL );
2730 }
2731
2732 /* Determines the size of an UTF-8 string from an UTF-32 stream
2733 * Returns 1 if successful or -1 on error
2734 */
libuna_utf8_string_size_from_utf32_stream(const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,size_t * utf8_string_size,libcerror_error_t ** error)2735 int libuna_utf8_string_size_from_utf32_stream(
2736 const uint8_t *utf32_stream,
2737 size_t utf32_stream_size,
2738 int byte_order,
2739 size_t *utf8_string_size,
2740 libcerror_error_t **error )
2741 {
2742 static char *function = "libuna_utf8_string_size_from_utf32_stream";
2743 size_t utf32_stream_index = 0;
2744 libuna_unicode_character_t unicode_character = 0;
2745 int read_byte_order = 0;
2746
2747 if( utf32_stream == NULL )
2748 {
2749 libcerror_error_set(
2750 error,
2751 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2752 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2753 "%s: invalid UTF-32 stream.",
2754 function );
2755
2756 return( -1 );
2757 }
2758 if( utf32_stream_size > (size_t) SSIZE_MAX )
2759 {
2760 libcerror_error_set(
2761 error,
2762 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2763 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2764 "%s: invalid UTF-32 stream size value exceeds maximum.",
2765 function );
2766
2767 return( -1 );
2768 }
2769 if( ( utf32_stream_size % 4 ) != 0 )
2770 {
2771 libcerror_error_set(
2772 error,
2773 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2774 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2775 "%s: missing UTF-32 stream value.",
2776 function );
2777
2778 return( -1 );
2779 }
2780 if( utf8_string_size == NULL )
2781 {
2782 libcerror_error_set(
2783 error,
2784 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2785 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2786 "%s: invalid UTF-8 string size.",
2787 function );
2788
2789 return( -1 );
2790 }
2791 *utf8_string_size = 0;
2792
2793 if( utf32_stream_size == 0 )
2794 {
2795 return( 1 );
2796 }
2797 /* Check if UTF-32 stream is in big or little endian
2798 */
2799 if( utf32_stream_size >= 4 )
2800 {
2801 if( ( utf32_stream[ 0 ] == 0x00 )
2802 && ( utf32_stream[ 1 ] == 0x00 )
2803 && ( utf32_stream[ 2 ] == 0xfe )
2804 && ( utf32_stream[ 3 ] == 0xff ) )
2805 {
2806 read_byte_order = LIBUNA_ENDIAN_BIG;
2807 utf32_stream_index = 4;
2808 }
2809 else if( ( utf32_stream[ 0 ] == 0xff )
2810 && ( utf32_stream[ 1 ] == 0xfe )
2811 && ( utf32_stream[ 2 ] == 0x00 )
2812 && ( utf32_stream[ 3 ] == 0x00 ) )
2813 {
2814 read_byte_order = LIBUNA_ENDIAN_LITTLE;
2815 utf32_stream_index = 4;
2816 }
2817 if( byte_order == 0 )
2818 {
2819 byte_order = read_byte_order;
2820 }
2821 }
2822 if( ( byte_order != LIBUNA_ENDIAN_BIG )
2823 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
2824 {
2825 libcerror_error_set(
2826 error,
2827 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2828 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2829 "%s: unsupported byte order.",
2830 function );
2831
2832 return( -1 );
2833 }
2834 while( ( utf32_stream_index + 3 ) < utf32_stream_size )
2835 {
2836 /* Convert the UTF-32 stream bytes into an Unicode character
2837 */
2838 if( libuna_unicode_character_copy_from_utf32_stream(
2839 &unicode_character,
2840 utf32_stream,
2841 utf32_stream_size,
2842 &utf32_stream_index,
2843 byte_order,
2844 error ) != 1 )
2845 {
2846 libcerror_error_set(
2847 error,
2848 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2849 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2850 "%s: unable to copy Unicode character from UTF-32 stream.",
2851 function );
2852
2853 return( -1 );
2854 }
2855 /* Determine how many UTF-8 character bytes are required
2856 */
2857 if( libuna_unicode_character_size_to_utf8(
2858 unicode_character,
2859 utf8_string_size,
2860 error ) != 1 )
2861 {
2862 libcerror_error_set(
2863 error,
2864 LIBCERROR_ERROR_DOMAIN_CONVERSION,
2865 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2866 "%s: unable to unable to determine size of Unicode character in UTF-8.",
2867 function );
2868
2869 return( -1 );
2870 }
2871 if( unicode_character == 0 )
2872 {
2873 break;
2874 }
2875 }
2876 /* Check if the string is terminated with an end-of-string character
2877 */
2878 if( unicode_character != 0 )
2879 {
2880 *utf8_string_size += 1;
2881 }
2882 return( 1 );
2883 }
2884
2885 /* Copies an UTF-8 string from an UTF-32 stream
2886 * Returns 1 if successful or -1 on error
2887 */
libuna_utf8_string_copy_from_utf32_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,libcerror_error_t ** error)2888 int libuna_utf8_string_copy_from_utf32_stream(
2889 libuna_utf8_character_t *utf8_string,
2890 size_t utf8_string_size,
2891 const uint8_t *utf32_stream,
2892 size_t utf32_stream_size,
2893 int byte_order,
2894 libcerror_error_t **error )
2895 {
2896 static char *function = "libuna_utf8_string_copy_from_utf32_stream";
2897 size_t utf8_string_index = 0;
2898
2899 if( libuna_utf8_string_with_index_copy_from_utf32_stream(
2900 utf8_string,
2901 utf8_string_size,
2902 &utf8_string_index,
2903 utf32_stream,
2904 utf32_stream_size,
2905 byte_order,
2906 error ) != 1 )
2907 {
2908 libcerror_error_set(
2909 error,
2910 LIBCERROR_ERROR_DOMAIN_RUNTIME,
2911 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2912 "%s: unable to copy UTF-32 stream to UTF-8 string.",
2913 function );
2914
2915 return( -1 );
2916 }
2917 return( 1 );
2918 }
2919
2920 /* Copies an UTF-8 string from an UTF-32 stream
2921 * Returns 1 if successful or -1 on error
2922 */
libuna_utf8_string_with_index_copy_from_utf32_stream(libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,libcerror_error_t ** error)2923 int libuna_utf8_string_with_index_copy_from_utf32_stream(
2924 libuna_utf8_character_t *utf8_string,
2925 size_t utf8_string_size,
2926 size_t *utf8_string_index,
2927 const uint8_t *utf32_stream,
2928 size_t utf32_stream_size,
2929 int byte_order,
2930 libcerror_error_t **error )
2931 {
2932 static char *function = "libuna_utf8_string_with_index_copy_from_utf32_stream";
2933 size_t utf32_stream_index = 0;
2934 libuna_unicode_character_t unicode_character = 0;
2935 int read_byte_order = 0;
2936
2937 if( utf8_string == NULL )
2938 {
2939 libcerror_error_set(
2940 error,
2941 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2942 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2943 "%s: invalid UTF-8 string.",
2944 function );
2945
2946 return( -1 );
2947 }
2948 if( utf8_string_size > (size_t) SSIZE_MAX )
2949 {
2950 libcerror_error_set(
2951 error,
2952 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2953 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2954 "%s: invalid UTF-8 string size value exceeds maximum.",
2955 function );
2956
2957 return( -1 );
2958 }
2959 if( utf8_string_index == NULL )
2960 {
2961 libcerror_error_set(
2962 error,
2963 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2964 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2965 "%s: invalid UTF-8 string index.",
2966 function );
2967
2968 return( -1 );
2969 }
2970 if( utf32_stream == NULL )
2971 {
2972 libcerror_error_set(
2973 error,
2974 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2975 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2976 "%s: invalid UTF-32 stream.",
2977 function );
2978
2979 return( -1 );
2980 }
2981 if( utf32_stream_size > (size_t) SSIZE_MAX )
2982 {
2983 libcerror_error_set(
2984 error,
2985 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2986 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2987 "%s: invalid UTF-32 stream size value exceeds maximum.",
2988 function );
2989
2990 return( -1 );
2991 }
2992 if( ( utf32_stream_size == 0 )
2993 || ( ( utf32_stream_size % 4 ) != 0 ) )
2994 {
2995 libcerror_error_set(
2996 error,
2997 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2998 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2999 "%s: missing UTF-32 stream value.",
3000 function );
3001
3002 return( -1 );
3003 }
3004 /* Check if UTF-32 stream is in big or little endian
3005 */
3006 if( utf32_stream_size >= 4 )
3007 {
3008 if( ( utf32_stream[ 0 ] == 0x00 )
3009 && ( utf32_stream[ 1 ] == 0x00 )
3010 && ( utf32_stream[ 2 ] == 0xfe )
3011 && ( utf32_stream[ 3 ] == 0xff ) )
3012 {
3013 read_byte_order = LIBUNA_ENDIAN_BIG;
3014 utf32_stream_index = 4;
3015 }
3016 else if( ( utf32_stream[ 0 ] == 0xff )
3017 && ( utf32_stream[ 1 ] == 0xfe )
3018 && ( utf32_stream[ 2 ] == 0x00 )
3019 && ( utf32_stream[ 3 ] == 0x00 ) )
3020 {
3021 read_byte_order = LIBUNA_ENDIAN_LITTLE;
3022 utf32_stream_index = 4;
3023 }
3024 if( byte_order == 0 )
3025 {
3026 byte_order = read_byte_order;
3027 }
3028 }
3029 if( ( byte_order != LIBUNA_ENDIAN_BIG )
3030 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
3031 {
3032 libcerror_error_set(
3033 error,
3034 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3035 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3036 "%s: unsupported byte order.",
3037 function );
3038
3039 return( -1 );
3040 }
3041 while( ( utf32_stream_index + 3 ) < utf32_stream_size )
3042 {
3043 /* Convert the UTF-32 stream bytes into an Unicode character
3044 */
3045 if( libuna_unicode_character_copy_from_utf32_stream(
3046 &unicode_character,
3047 utf32_stream,
3048 utf32_stream_size,
3049 &utf32_stream_index,
3050 byte_order,
3051 error ) != 1 )
3052 {
3053 libcerror_error_set(
3054 error,
3055 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3056 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
3057 "%s: unable to copy Unicode character from UTF-32 stream.",
3058 function );
3059
3060 return( -1 );
3061 }
3062 /* Convert the Unicode character into UTF-8 character bytes
3063 */
3064 if( libuna_unicode_character_copy_to_utf8(
3065 unicode_character,
3066 utf8_string,
3067 utf8_string_size,
3068 utf8_string_index,
3069 error ) != 1 )
3070 {
3071 libcerror_error_set(
3072 error,
3073 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3074 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3075 "%s: unable to copy Unicode character to UTF-8.",
3076 function );
3077
3078 return( -1 );
3079 }
3080 if( unicode_character == 0 )
3081 {
3082 break;
3083 }
3084 }
3085 /* Check if the string is terminated with an end-of-string character
3086 */
3087 if( unicode_character != 0 )
3088 {
3089 if( *utf8_string_index >= utf8_string_size )
3090 {
3091 libcerror_error_set(
3092 error,
3093 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3094 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3095 "%s: UTF-8 string too small.",
3096 function );
3097
3098 return( -1 );
3099 }
3100 utf8_string[ *utf8_string_index ] = 0;
3101
3102 *utf8_string_index += 1;
3103 }
3104 return( 1 );
3105 }
3106
3107 /* Compares an UTF-8 string with an UTF-32 stream
3108 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
3109 */
libuna_utf8_string_compare_with_utf32_stream(const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,const uint8_t * utf32_stream,size_t utf32_stream_size,int byte_order,libcerror_error_t ** error)3110 int libuna_utf8_string_compare_with_utf32_stream(
3111 const libuna_utf8_character_t *utf8_string,
3112 size_t utf8_string_size,
3113 const uint8_t *utf32_stream,
3114 size_t utf32_stream_size,
3115 int byte_order,
3116 libcerror_error_t **error )
3117 {
3118 static char *function = "libuna_utf8_string_compare_with_utf32_stream";
3119 size_t utf32_stream_index = 0;
3120 size_t utf8_string_index = 0;
3121 libuna_unicode_character_t utf8_unicode_character = 0;
3122 libuna_unicode_character_t utf32_stream_unicode_character = 0;
3123 int read_byte_order = 0;
3124
3125 if( utf8_string == NULL )
3126 {
3127 libcerror_error_set(
3128 error,
3129 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3130 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3131 "%s: invalid UTF-8 string.",
3132 function );
3133
3134 return( -1 );
3135 }
3136 if( utf8_string_size > (size_t) SSIZE_MAX )
3137 {
3138 libcerror_error_set(
3139 error,
3140 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3141 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3142 "%s: invalid UTF-8 string size value exceeds maximum.",
3143 function );
3144
3145 return( -1 );
3146 }
3147 if( utf32_stream == NULL )
3148 {
3149 libcerror_error_set(
3150 error,
3151 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3152 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3153 "%s: invalid UTF-32 stream.",
3154 function );
3155
3156 return( -1 );
3157 }
3158 if( utf32_stream_size > (size_t) SSIZE_MAX )
3159 {
3160 libcerror_error_set(
3161 error,
3162 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3163 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3164 "%s: invalid UTF-32 stream size value exceeds maximum.",
3165 function );
3166
3167 return( -1 );
3168 }
3169 if( ( utf32_stream_size == 0 )
3170 || ( ( utf32_stream_size % 4 ) != 0 ) )
3171 {
3172 libcerror_error_set(
3173 error,
3174 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3175 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3176 "%s: missing UTF-32 stream value.",
3177 function );
3178
3179 return( -1 );
3180 }
3181 /* Check if UTF-32 stream is in big or little endian
3182 */
3183 if( utf32_stream_size >= 4 )
3184 {
3185 if( ( utf32_stream[ 0 ] == 0x00 )
3186 && ( utf32_stream[ 1 ] == 0x00 )
3187 && ( utf32_stream[ 2 ] == 0xfe )
3188 && ( utf32_stream[ 3 ] == 0xff ) )
3189 {
3190 read_byte_order = LIBUNA_ENDIAN_BIG;
3191 utf32_stream_index = 4;
3192 }
3193 else if( ( utf32_stream[ 0 ] == 0xff )
3194 && ( utf32_stream[ 1 ] == 0xfe )
3195 && ( utf32_stream[ 2 ] == 0x00 )
3196 && ( utf32_stream[ 3 ] == 0x00 ) )
3197 {
3198 read_byte_order = LIBUNA_ENDIAN_LITTLE;
3199 utf32_stream_index = 4;
3200 }
3201 if( byte_order == 0 )
3202 {
3203 byte_order = read_byte_order;
3204 }
3205 }
3206 if( ( byte_order != LIBUNA_ENDIAN_BIG )
3207 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
3208 {
3209 libcerror_error_set(
3210 error,
3211 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3212 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3213 "%s: unsupported byte order.",
3214 function );
3215
3216 return( -1 );
3217 }
3218 if( ( utf8_string_size >= 1 )
3219 && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
3220 {
3221 utf8_string_size -= 1;
3222 }
3223 /* Check if the UTF-32 stream is terminated with zero bytes
3224 */
3225 if( ( utf32_stream_size >= 4 )
3226 && ( utf32_stream[ utf32_stream_size - 4 ] == 0 )
3227 && ( utf32_stream[ utf32_stream_size - 3 ] == 0 )
3228 && ( utf32_stream[ utf32_stream_size - 2 ] == 0 )
3229 && ( utf32_stream[ utf32_stream_size - 1 ] == 0 ) )
3230 {
3231 utf32_stream_size -= 1;
3232 }
3233 while( ( utf8_string_index < utf8_string_size )
3234 && ( utf32_stream_index < utf32_stream_size ) )
3235 {
3236 /* Convert the UTF-8 character bytes into an Unicode character
3237 */
3238 if( libuna_unicode_character_copy_from_utf8(
3239 &utf8_unicode_character,
3240 utf8_string,
3241 utf8_string_size,
3242 &utf8_string_index,
3243 error ) != 1 )
3244 {
3245 libcerror_error_set(
3246 error,
3247 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3248 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3249 "%s: unable to copy Unicode character from UTF-8.",
3250 function );
3251
3252 return( -1 );
3253 }
3254 /* Convert the UTF-32 stream bytes into an Unicode character
3255 */
3256 if( libuna_unicode_character_copy_from_utf32_stream(
3257 &utf32_stream_unicode_character,
3258 utf32_stream,
3259 utf32_stream_size,
3260 &utf32_stream_index,
3261 byte_order,
3262 error ) != 1 )
3263 {
3264 libcerror_error_set(
3265 error,
3266 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3267 LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
3268 "%s: unable to copy Unicode character from UTF-32 stream.",
3269 function );
3270
3271 return( -1 );
3272 }
3273 if( utf8_unicode_character < utf32_stream_unicode_character )
3274 {
3275 return( LIBUNA_COMPARE_LESS );
3276 }
3277 else if( utf8_unicode_character > utf32_stream_unicode_character )
3278 {
3279 return( LIBUNA_COMPARE_GREATER );
3280 }
3281 }
3282 /* Check if both strings were entirely processed
3283 */
3284 if( utf8_string_index < utf8_string_size )
3285 {
3286 return( LIBUNA_COMPARE_GREATER );
3287 }
3288 else if( utf32_stream_index < utf32_stream_size )
3289 {
3290 return( LIBUNA_COMPARE_LESS );
3291 }
3292 return( LIBUNA_COMPARE_EQUAL );
3293 }
3294
3295