1 /*
2 * Unicode character functions
3 *
4 * Copyright (C) 2008-2019, Joachim Metz <joachim.metz@gmail.com>
5 *
6 * Refer to AUTHORS for acknowledgements.
7 *
8 * This software is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * This software is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this software. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <common.h>
23 #include <types.h>
24
25 #include "libuna_base64_stream.h"
26 #include "libuna_codepage_iso_8859_2.h"
27 #include "libuna_codepage_iso_8859_3.h"
28 #include "libuna_codepage_iso_8859_4.h"
29 #include "libuna_codepage_iso_8859_5.h"
30 #include "libuna_codepage_iso_8859_6.h"
31 #include "libuna_codepage_iso_8859_7.h"
32 #include "libuna_codepage_iso_8859_8.h"
33 #include "libuna_codepage_iso_8859_9.h"
34 #include "libuna_codepage_iso_8859_10.h"
35 #include "libuna_codepage_iso_8859_13.h"
36 #include "libuna_codepage_iso_8859_14.h"
37 #include "libuna_codepage_iso_8859_15.h"
38 #include "libuna_codepage_iso_8859_16.h"
39 #include "libuna_codepage_koi8_r.h"
40 #include "libuna_codepage_koi8_u.h"
41 #include "libuna_codepage_windows_874.h"
42 #include "libuna_codepage_windows_932.h"
43 #include "libuna_codepage_windows_936.h"
44 #include "libuna_codepage_windows_949.h"
45 #include "libuna_codepage_windows_950.h"
46 #include "libuna_codepage_windows_1250.h"
47 #include "libuna_codepage_windows_1251.h"
48 #include "libuna_codepage_windows_1252.h"
49 #include "libuna_codepage_windows_1253.h"
50 #include "libuna_codepage_windows_1254.h"
51 #include "libuna_codepage_windows_1255.h"
52 #include "libuna_codepage_windows_1256.h"
53 #include "libuna_codepage_windows_1257.h"
54 #include "libuna_codepage_windows_1258.h"
55 #include "libuna_definitions.h"
56 #include "libuna_libcerror.h"
57 #include "libuna_types.h"
58 #include "libuna_unicode_character.h"
59 #include "libuna_unused.h"
60
61 /* Valid directly encoded characters: A-Z, a-z, 0-9, '\', '(', ')', ',', '-', '.', '/', ':', '?'
62 * Valid directly encoded whitespace: '\t', '\n', '\r', ' '
63 * Valid optional directly encoded characters: '!', '"', '#', '$', '%', '&', '*', ';', '<', '=', '>', '@', '[', ']', '^', '_', '`', '{', '|', '}'
64 */
65 uint8_t libuna_unicode_character_utf7_valid_directly_encoded_character[ 256 ] = {
66 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
67 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
68 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
74 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
79 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
80 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
82
83 /* Valid UTF-7 base64 characters: A-Z, a-z, 0-9, '+' and '/'
84 */
85 uint8_t libuna_unicode_character_utf7_valid_base64_character[ 256 ] = {
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
90 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
92 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
94 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
102
103 /* Determines the size of a byte stream character from an Unicode character
104 * Adds the size to the byte stream character size value
105 * Returns 1 if successful or -1 on error
106 */
libuna_unicode_character_size_to_byte_stream(libuna_unicode_character_t unicode_character,int codepage,size_t * byte_stream_character_size,libcerror_error_t ** error)107 int libuna_unicode_character_size_to_byte_stream(
108 libuna_unicode_character_t unicode_character,
109 int codepage,
110 size_t *byte_stream_character_size,
111 libcerror_error_t **error )
112 {
113 static char *function = "libuna_unicode_character_size_to_byte_stream";
114 int result = 1;
115
116 if( byte_stream_character_size == NULL )
117 {
118 libcerror_error_set(
119 error,
120 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
121 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
122 "%s: invalid byte stream character size.",
123 function );
124
125 return( -1 );
126 }
127 switch( codepage )
128 {
129 case LIBUNA_CODEPAGE_ASCII:
130 case LIBUNA_CODEPAGE_ISO_8859_1:
131 case LIBUNA_CODEPAGE_ISO_8859_2:
132 case LIBUNA_CODEPAGE_ISO_8859_3:
133 case LIBUNA_CODEPAGE_ISO_8859_4:
134 case LIBUNA_CODEPAGE_ISO_8859_5:
135 case LIBUNA_CODEPAGE_ISO_8859_6:
136 case LIBUNA_CODEPAGE_ISO_8859_7:
137 case LIBUNA_CODEPAGE_ISO_8859_8:
138 case LIBUNA_CODEPAGE_ISO_8859_9:
139 case LIBUNA_CODEPAGE_ISO_8859_10:
140 case LIBUNA_CODEPAGE_ISO_8859_11:
141 case LIBUNA_CODEPAGE_ISO_8859_13:
142 case LIBUNA_CODEPAGE_ISO_8859_14:
143 case LIBUNA_CODEPAGE_ISO_8859_15:
144 case LIBUNA_CODEPAGE_ISO_8859_16:
145 case LIBUNA_CODEPAGE_KOI8_R:
146 case LIBUNA_CODEPAGE_KOI8_U:
147 case LIBUNA_CODEPAGE_WINDOWS_874:
148 case LIBUNA_CODEPAGE_WINDOWS_1250:
149 case LIBUNA_CODEPAGE_WINDOWS_1251:
150 case LIBUNA_CODEPAGE_WINDOWS_1252:
151 case LIBUNA_CODEPAGE_WINDOWS_1253:
152 case LIBUNA_CODEPAGE_WINDOWS_1254:
153 case LIBUNA_CODEPAGE_WINDOWS_1255:
154 case LIBUNA_CODEPAGE_WINDOWS_1256:
155 case LIBUNA_CODEPAGE_WINDOWS_1257:
156 case LIBUNA_CODEPAGE_WINDOWS_1258:
157 *byte_stream_character_size += 1;
158 break;
159
160 case LIBUNA_CODEPAGE_WINDOWS_932:
161 result = libuna_codepage_windows_932_unicode_character_size_to_byte_stream(
162 unicode_character,
163 byte_stream_character_size,
164 error );
165 break;
166
167 case LIBUNA_CODEPAGE_WINDOWS_936:
168 result = libuna_codepage_windows_936_unicode_character_size_to_byte_stream(
169 unicode_character,
170 byte_stream_character_size,
171 error );
172 break;
173
174 case LIBUNA_CODEPAGE_WINDOWS_949:
175 result = libuna_codepage_windows_949_unicode_character_size_to_byte_stream(
176 unicode_character,
177 byte_stream_character_size,
178 error );
179 break;
180
181 case LIBUNA_CODEPAGE_WINDOWS_950:
182 result = libuna_codepage_windows_950_unicode_character_size_to_byte_stream(
183 unicode_character,
184 byte_stream_character_size,
185 error );
186 break;
187
188 default:
189 libcerror_error_set(
190 error,
191 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
192 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
193 "%s: unsupported codepage: %d.",
194 function,
195 codepage );
196
197 return( -1 );
198 }
199 if( result != 1 )
200 {
201 libcerror_error_set(
202 error,
203 LIBCERROR_ERROR_DOMAIN_RUNTIME,
204 LIBCERROR_RUNTIME_ERROR_GET_FAILED,
205 "%s: unable to determine byte stream character size.",
206 function );
207
208 return( -1 );
209 }
210 return( result );
211 }
212
213 /* Copies an Unicode character from a byte stream
214 * Returns 1 if successful or -1 on error
215 */
libuna_unicode_character_copy_from_byte_stream(libuna_unicode_character_t * unicode_character,const uint8_t * byte_stream,size_t byte_stream_size,size_t * byte_stream_index,int codepage,libcerror_error_t ** error)216 int libuna_unicode_character_copy_from_byte_stream(
217 libuna_unicode_character_t *unicode_character,
218 const uint8_t *byte_stream,
219 size_t byte_stream_size,
220 size_t *byte_stream_index,
221 int codepage,
222 libcerror_error_t **error )
223 {
224 static char *function = "libuna_unicode_character_copy_from_byte_stream";
225 uint8_t byte_stream_character = 0;
226 int result = 1;
227
228 if( unicode_character == NULL )
229 {
230 libcerror_error_set(
231 error,
232 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
233 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
234 "%s: invalid Unicode character.",
235 function );
236
237 return( -1 );
238 }
239 if( byte_stream == NULL )
240 {
241 libcerror_error_set(
242 error,
243 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
244 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
245 "%s: invalid byte stream.",
246 function );
247
248 return( -1 );
249 }
250 if( byte_stream_size > (size_t) SSIZE_MAX )
251 {
252 libcerror_error_set(
253 error,
254 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
255 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
256 "%s: invalid byte stream size value exceeds maximum.",
257 function );
258
259 return( -1 );
260 }
261 if( byte_stream_index == NULL )
262 {
263 libcerror_error_set(
264 error,
265 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
266 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
267 "%s: invalid byte stream index.",
268 function );
269
270 return( -1 );
271 }
272 if( *byte_stream_index >= byte_stream_size )
273 {
274 libcerror_error_set(
275 error,
276 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
277 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
278 "%s: byte stream too small.",
279 function );
280
281 return( -1 );
282 }
283 byte_stream_character = byte_stream[ *byte_stream_index ];
284
285 switch( codepage )
286 {
287 case LIBUNA_CODEPAGE_ASCII:
288 if( byte_stream_character < 0x80 )
289 {
290 *unicode_character = byte_stream_character;
291 }
292 else
293 {
294 *unicode_character = 0xfffd;
295 }
296 *byte_stream_index += 1;
297
298 break;
299
300 case LIBUNA_CODEPAGE_ISO_8859_1:
301 *unicode_character = byte_stream_character;
302
303 *byte_stream_index += 1;
304
305 break;
306
307 case LIBUNA_CODEPAGE_ISO_8859_2:
308 if( byte_stream_character < 0xa0 )
309 {
310 *unicode_character = byte_stream_character;
311 }
312 else
313 {
314 byte_stream_character -= 0xa0;
315
316 *unicode_character = libuna_codepage_iso_8859_2_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
317 }
318 *byte_stream_index += 1;
319
320 break;
321
322 case LIBUNA_CODEPAGE_ISO_8859_3:
323 if( byte_stream_character < 0xa0 )
324 {
325 *unicode_character = byte_stream_character;
326 }
327 else
328 {
329 byte_stream_character -= 0xa0;
330
331 *unicode_character = libuna_codepage_iso_8859_3_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
332 }
333 *byte_stream_index += 1;
334
335 break;
336
337 case LIBUNA_CODEPAGE_ISO_8859_4:
338 if( byte_stream_character < 0xa0 )
339 {
340 *unicode_character = byte_stream_character;
341 }
342 else
343 {
344 byte_stream_character -= 0xa0;
345
346 *unicode_character = libuna_codepage_iso_8859_4_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
347 }
348 *byte_stream_index += 1;
349
350 break;
351
352 case LIBUNA_CODEPAGE_ISO_8859_5:
353 if( byte_stream_character < 0xa0 )
354 {
355 *unicode_character = byte_stream_character;
356 }
357 else
358 {
359 byte_stream_character -= 0xa0;
360
361 *unicode_character = libuna_codepage_iso_8859_5_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
362 }
363 *byte_stream_index += 1;
364
365 break;
366
367 case LIBUNA_CODEPAGE_ISO_8859_6:
368 if( byte_stream_character < 0xa0 )
369 {
370 *unicode_character = byte_stream_character;
371 }
372 else
373 {
374 byte_stream_character -= 0xa0;
375
376 *unicode_character = libuna_codepage_iso_8859_6_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
377 }
378 *byte_stream_index += 1;
379
380 break;
381
382 case LIBUNA_CODEPAGE_ISO_8859_7:
383 if( byte_stream_character < 0xa0 )
384 {
385 *unicode_character = byte_stream_character;
386 }
387 else
388 {
389 byte_stream_character -= 0xa0;
390
391 *unicode_character = libuna_codepage_iso_8859_7_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
392 }
393 *byte_stream_index += 1;
394
395 break;
396
397 case LIBUNA_CODEPAGE_ISO_8859_8:
398 if( byte_stream_character < 0xa0 )
399 {
400 *unicode_character = byte_stream_character;
401 }
402 else
403 {
404 byte_stream_character -= 0xa0;
405
406 *unicode_character = libuna_codepage_iso_8859_8_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
407 }
408 *byte_stream_index += 1;
409
410 break;
411
412 case LIBUNA_CODEPAGE_ISO_8859_9:
413 if( byte_stream_character < 0xd0 )
414 {
415 *unicode_character = byte_stream_character;
416 }
417 else
418 {
419 byte_stream_character -= 0xd0;
420
421 *unicode_character = libuna_codepage_iso_8859_9_byte_stream_to_unicode_base_0xd0[ byte_stream_character ];
422 }
423 *byte_stream_index += 1;
424
425 break;
426
427 case LIBUNA_CODEPAGE_ISO_8859_10:
428 if( byte_stream_character < 0xa0 )
429 {
430 *unicode_character = byte_stream_character;
431 }
432 else
433 {
434 byte_stream_character -= 0xa0;
435
436 *unicode_character = libuna_codepage_iso_8859_10_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
437 }
438 *byte_stream_index += 1;
439
440 break;
441
442 case LIBUNA_CODEPAGE_ISO_8859_11:
443 if( byte_stream_character < 0xa1 )
444 {
445 *unicode_character = byte_stream_character;
446 }
447 else if( byte_stream_character < 0xdb )
448 {
449 *unicode_character = byte_stream_character + 0x0d60;
450 }
451 else if( byte_stream_character < 0xdf )
452 {
453 *unicode_character = 0xfffd;
454 }
455 else if( byte_stream_character < 0xfc )
456 {
457 *unicode_character = byte_stream_character + 0x0d60;
458 }
459 else
460 {
461 *unicode_character = 0xfffd;
462 }
463 *byte_stream_index += 1;
464
465 break;
466
467 case LIBUNA_CODEPAGE_ISO_8859_13:
468 if( byte_stream_character < 0xa0 )
469 {
470 *unicode_character = byte_stream_character;
471 }
472 else
473 {
474 byte_stream_character -= 0xa0;
475
476 *unicode_character = libuna_codepage_iso_8859_13_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
477 }
478 *byte_stream_index += 1;
479
480 break;
481
482 case LIBUNA_CODEPAGE_ISO_8859_14:
483 if( byte_stream_character < 0xa0 )
484 {
485 *unicode_character = byte_stream_character;
486 }
487 else
488 {
489 byte_stream_character -= 0xa0;
490
491 *unicode_character = libuna_codepage_iso_8859_14_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
492 }
493 *byte_stream_index += 1;
494
495 break;
496
497 case LIBUNA_CODEPAGE_ISO_8859_15:
498 if( ( byte_stream_character >= 0xa0 )
499 && ( byte_stream_character < 0xc0 ) )
500 {
501 byte_stream_character -= 0xa0;
502
503 *unicode_character = libuna_codepage_iso_8859_15_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
504 }
505 else
506 {
507 *unicode_character = byte_stream_character;
508 }
509 *byte_stream_index += 1;
510
511 break;
512
513 case LIBUNA_CODEPAGE_ISO_8859_16:
514 if( byte_stream_character < 0xa0 )
515 {
516 *unicode_character = byte_stream_character;
517 }
518 else
519 {
520 byte_stream_character -= 0xa0;
521
522 *unicode_character = libuna_codepage_iso_8859_16_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
523 }
524 *byte_stream_index += 1;
525
526 break;
527
528 case LIBUNA_CODEPAGE_KOI8_R:
529 if( byte_stream_character < 0x80 )
530 {
531 *unicode_character = byte_stream_character;
532 }
533 else
534 {
535 byte_stream_character -= 0x80;
536
537 *unicode_character = libuna_codepage_koi8_r_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
538 }
539 *byte_stream_index += 1;
540
541 break;
542
543 case LIBUNA_CODEPAGE_KOI8_U:
544 if( byte_stream_character < 0x80 )
545 {
546 *unicode_character = byte_stream_character;
547 }
548 else
549 {
550 byte_stream_character -= 0x80;
551
552 *unicode_character = libuna_codepage_koi8_u_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
553 }
554 *byte_stream_index += 1;
555
556 break;
557
558 case LIBUNA_CODEPAGE_WINDOWS_874:
559 if( byte_stream_character < 0x80 )
560 {
561 *unicode_character = byte_stream_character;
562 }
563 else
564 {
565 byte_stream_character -= 0x80;
566
567 *unicode_character = libuna_codepage_windows_874_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
568 }
569 *byte_stream_index += 1;
570
571 break;
572
573 case LIBUNA_CODEPAGE_WINDOWS_932:
574 result = libuna_codepage_windows_932_copy_from_byte_stream(
575 unicode_character,
576 byte_stream,
577 byte_stream_size,
578 byte_stream_index,
579 error );
580 break;
581
582 case LIBUNA_CODEPAGE_WINDOWS_936:
583 result = libuna_codepage_windows_936_copy_from_byte_stream(
584 unicode_character,
585 byte_stream,
586 byte_stream_size,
587 byte_stream_index,
588 error );
589 break;
590
591 case LIBUNA_CODEPAGE_WINDOWS_949:
592 result = libuna_codepage_windows_949_copy_from_byte_stream(
593 unicode_character,
594 byte_stream,
595 byte_stream_size,
596 byte_stream_index,
597 error );
598 break;
599
600 case LIBUNA_CODEPAGE_WINDOWS_950:
601 result = libuna_codepage_windows_950_copy_from_byte_stream(
602 unicode_character,
603 byte_stream,
604 byte_stream_size,
605 byte_stream_index,
606 error );
607 break;
608
609 case LIBUNA_CODEPAGE_WINDOWS_1250:
610 if( byte_stream_character < 0x80 )
611 {
612 *unicode_character = byte_stream_character;
613 }
614 else
615 {
616 byte_stream_character -= 0x80;
617
618 *unicode_character = libuna_codepage_windows_1250_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
619 }
620 *byte_stream_index += 1;
621
622 break;
623
624 case LIBUNA_CODEPAGE_WINDOWS_1251:
625 if( byte_stream_character < 0x80 )
626 {
627 *unicode_character = byte_stream_character;
628 }
629 else
630 {
631 byte_stream_character -= 0x80;
632
633 *unicode_character = libuna_codepage_windows_1251_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
634 }
635 *byte_stream_index += 1;
636
637 break;
638
639 case LIBUNA_CODEPAGE_WINDOWS_1252:
640 if( ( byte_stream_character < 0x80 )
641 || ( byte_stream_character >= 0xa0 ) )
642 {
643 *unicode_character = byte_stream_character;
644 }
645 else
646 {
647 byte_stream_character -= 0x80;
648
649 *unicode_character = libuna_codepage_windows_1252_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
650 }
651 *byte_stream_index += 1;
652
653 break;
654
655 case LIBUNA_CODEPAGE_WINDOWS_1253:
656 if( byte_stream_character < 0x80 )
657 {
658 *unicode_character = byte_stream_character;
659 }
660 else
661 {
662 byte_stream_character -= 0x80;
663
664 *unicode_character = libuna_codepage_windows_1253_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
665 }
666 *byte_stream_index += 1;
667
668 break;
669
670 case LIBUNA_CODEPAGE_WINDOWS_1254:
671 if( byte_stream_character < 0x80 )
672 {
673 *unicode_character = byte_stream_character;
674 }
675 else if( byte_stream_character < 0xa0 )
676 {
677 byte_stream_character -= 0x80;
678
679 *unicode_character = libuna_codepage_windows_1254_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
680 }
681 else if( byte_stream_character < 0xd0 )
682 {
683 *unicode_character = byte_stream_character;
684 }
685 else if( byte_stream_character < 0xe0 )
686 {
687 byte_stream_character -= 0xd0;
688
689 *unicode_character = libuna_codepage_windows_1254_byte_stream_to_unicode_base_0xd0[ byte_stream_character ];
690 }
691 else if( byte_stream_character < 0xf0 )
692 {
693 *unicode_character = byte_stream_character;
694 }
695 else
696 {
697 byte_stream_character -= 0xf0;
698
699 *unicode_character = libuna_codepage_windows_1254_byte_stream_to_unicode_base_0xf0[ byte_stream_character ];
700 }
701 *byte_stream_index += 1;
702
703 break;
704
705 case LIBUNA_CODEPAGE_WINDOWS_1255:
706 if( byte_stream_character < 0x80 )
707 {
708 *unicode_character = byte_stream_character;
709 }
710 else
711 {
712 byte_stream_character -= 0x80;
713
714 *unicode_character = libuna_codepage_windows_1255_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
715 }
716 *byte_stream_index += 1;
717
718 break;
719
720 case LIBUNA_CODEPAGE_WINDOWS_1256:
721 if( byte_stream_character < 0x80 )
722 {
723 *unicode_character = byte_stream_character;
724 }
725 else
726 {
727 byte_stream_character -= 0x80;
728
729 *unicode_character = libuna_codepage_windows_1256_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
730 }
731 *byte_stream_index += 1;
732
733 break;
734
735 case LIBUNA_CODEPAGE_WINDOWS_1257:
736 if( byte_stream_character < 0x80 )
737 {
738 *unicode_character = byte_stream_character;
739 }
740 else
741 {
742 byte_stream_character -= 0x80;
743
744 *unicode_character = libuna_codepage_windows_1257_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
745 }
746 *byte_stream_index += 1;
747
748 break;
749
750 case LIBUNA_CODEPAGE_WINDOWS_1258:
751 if( byte_stream_character < 0x80 )
752 {
753 *unicode_character = byte_stream_character;
754 }
755 else
756 {
757 byte_stream_character -= 0x80;
758
759 *unicode_character = libuna_codepage_windows_1258_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
760 }
761 *byte_stream_index += 1;
762
763 break;
764
765 default:
766 libcerror_error_set(
767 error,
768 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
769 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
770 "%s: unsupported codepage: %d.",
771 function,
772 codepage );
773
774 return( -1 );
775 }
776 if( result != 1 )
777 {
778 libcerror_error_set(
779 error,
780 LIBCERROR_ERROR_DOMAIN_RUNTIME,
781 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
782 "%s: unable to copy Unicode character from byte stream.",
783 function );
784
785 return( -1 );
786 }
787 return( result );
788 }
789
790 /* Copies an Unicode character to a byte stream
791 * Returns 1 if successful or -1 on error
792 */
libuna_unicode_character_copy_to_byte_stream(libuna_unicode_character_t unicode_character,uint8_t * byte_stream,size_t byte_stream_size,size_t * byte_stream_index,int codepage,libcerror_error_t ** error)793 int libuna_unicode_character_copy_to_byte_stream(
794 libuna_unicode_character_t unicode_character,
795 uint8_t *byte_stream,
796 size_t byte_stream_size,
797 size_t *byte_stream_index,
798 int codepage,
799 libcerror_error_t **error )
800 {
801 static char *function = "libuna_unicode_character_copy_to_byte_stream";
802 int result = 1;
803
804 if( byte_stream == NULL )
805 {
806 libcerror_error_set(
807 error,
808 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
809 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
810 "%s: invalid byte stream.",
811 function );
812
813 return( -1 );
814 }
815 if( byte_stream_size > (size_t) SSIZE_MAX )
816 {
817 libcerror_error_set(
818 error,
819 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
820 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
821 "%s: invalid byte stream size value exceeds maximum.",
822 function );
823
824 return( -1 );
825 }
826 if( byte_stream_index == NULL )
827 {
828 libcerror_error_set(
829 error,
830 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
831 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
832 "%s: invalid byte stream index.",
833 function );
834
835 return( -1 );
836 }
837 if( *byte_stream_index >= byte_stream_size )
838 {
839 libcerror_error_set(
840 error,
841 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
842 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
843 "%s: byte stream too small.",
844 function );
845
846 return( -1 );
847 }
848 switch( codepage )
849 {
850 case LIBUNA_CODEPAGE_ASCII:
851 if( unicode_character < 0x0080 )
852 {
853 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
854 }
855 else
856 {
857 byte_stream[ *byte_stream_index ] = 0x1a;
858 }
859 *byte_stream_index += 1;
860
861 break;
862
863 case LIBUNA_CODEPAGE_ISO_8859_1:
864 if( unicode_character < 0x0100 )
865 {
866 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
867 }
868 else
869 {
870 byte_stream[ *byte_stream_index ] = 0x1a;
871 }
872 *byte_stream_index += 1;
873
874 break;
875
876 case LIBUNA_CODEPAGE_ISO_8859_2:
877 if( unicode_character < 0x00a0 )
878 {
879 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
880 }
881 else if( ( unicode_character >= 0x00a0 )
882 && ( unicode_character < 0x0120 ) )
883 {
884 unicode_character -= 0x00a0;
885
886 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
887 }
888 else if( ( unicode_character >= 0x0138 )
889 && ( unicode_character < 0x0180 ) )
890 {
891 unicode_character -= 0x0138;
892
893 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x0138[ unicode_character ];
894 }
895 else if( ( unicode_character >= 0x02d8 )
896 && ( unicode_character < 0x02e0 ) )
897 {
898 unicode_character -= 0x02d8;
899
900 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x02d8[ unicode_character ];
901 }
902 else if( unicode_character == 0x02c7 )
903 {
904 byte_stream[ *byte_stream_index ] = 0xb7;
905 }
906 else
907 {
908 byte_stream[ *byte_stream_index ] = 0x1a;
909 }
910 *byte_stream_index += 1;
911
912 break;
913
914 case LIBUNA_CODEPAGE_ISO_8859_3:
915 if( unicode_character < 0x00a0 )
916 {
917 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
918 }
919 else if( ( unicode_character >= 0x00a0 )
920 && ( unicode_character < 0x0100 ) )
921 {
922 unicode_character -= 0x00a0;
923
924 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
925 }
926 else if( ( unicode_character >= 0x0108 )
927 && ( unicode_character < 0x0110 ) )
928 {
929 unicode_character -= 0x0108;
930
931 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0108[ unicode_character ];
932 }
933 else if( ( unicode_character >= 0x0118 )
934 && ( unicode_character < 0x0128 ) )
935 {
936 unicode_character -= 0x0118;
937
938 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0118[ unicode_character ];
939 }
940 else if( ( unicode_character >= 0x0130 )
941 && ( unicode_character < 0x0138 ) )
942 {
943 unicode_character -= 0x0130;
944
945 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0130[ unicode_character ];
946 }
947 else if( ( unicode_character >= 0x0158 )
948 && ( unicode_character < 0x0160 ) )
949 {
950 unicode_character -= 0x0158;
951
952 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0158[ unicode_character ];
953 }
954 else switch( unicode_character )
955 {
956 case 0x016c:
957 byte_stream[ *byte_stream_index ] = 0xdd;
958 break;
959
960 case 0x016d:
961 byte_stream[ *byte_stream_index ] = 0xfd;
962 break;
963
964 case 0x017b:
965 byte_stream[ *byte_stream_index ] = 0xaf;
966 break;
967
968 case 0x017c:
969 byte_stream[ *byte_stream_index ] = 0xbf;
970 break;
971
972 case 0x02d8:
973 byte_stream[ *byte_stream_index ] = 0xa2;
974 break;
975
976 case 0x02d9:
977 byte_stream[ *byte_stream_index ] = 0xff;
978 break;
979
980 default:
981 byte_stream[ *byte_stream_index ] = 0x1a;
982 break;
983 }
984 *byte_stream_index += 1;
985
986 break;
987
988 case LIBUNA_CODEPAGE_ISO_8859_4:
989 if( unicode_character < 0x00a0 )
990 {
991 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
992 }
993 else if( ( unicode_character >= 0x00a0 )
994 && ( unicode_character < 0x0158 ) )
995 {
996 unicode_character -= 0x00a0;
997
998 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_4_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
999 }
1000 else if( ( unicode_character >= 0x0160 )
1001 && ( unicode_character < 0x0180 ) )
1002 {
1003 unicode_character -= 0x0160;
1004
1005 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_4_unicode_to_byte_stream_base_0x0160[ unicode_character ];
1006 }
1007 else switch( unicode_character )
1008 {
1009 case 0x02c7:
1010 byte_stream[ *byte_stream_index ] = 0xb7;
1011 break;
1012
1013 case 0x02d9:
1014 byte_stream[ *byte_stream_index ] = 0xff;
1015 break;
1016
1017 case 0x02db:
1018 byte_stream[ *byte_stream_index ] = 0xb2;
1019 break;
1020
1021 default:
1022 byte_stream[ *byte_stream_index ] = 0x1a;
1023 break;
1024 }
1025 *byte_stream_index += 1;
1026
1027 break;
1028
1029 case LIBUNA_CODEPAGE_ISO_8859_5:
1030 if( unicode_character < 0x00a1 )
1031 {
1032 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1033 }
1034 else if( ( unicode_character >= 0x0400 )
1035 && ( unicode_character < 0x0460 ) )
1036 {
1037 unicode_character -= 0x0400;
1038
1039 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_5_unicode_to_byte_stream_base_0x0400[ unicode_character ];
1040 }
1041 else switch( unicode_character )
1042 {
1043 case 0x00a7:
1044 byte_stream[ *byte_stream_index ] = 0xfd;
1045 break;
1046
1047 case 0x00ad:
1048 byte_stream[ *byte_stream_index ] = 0xad;
1049 break;
1050
1051 case 0x2116:
1052 byte_stream[ *byte_stream_index ] = 0xf0;
1053 break;
1054
1055 default:
1056 byte_stream[ *byte_stream_index ] = 0x1a;
1057 break;
1058 }
1059 *byte_stream_index += 1;
1060
1061 break;
1062
1063 case LIBUNA_CODEPAGE_ISO_8859_6:
1064 if( unicode_character < 0x00a1 )
1065 {
1066 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1067 }
1068 else if( ( unicode_character >= 0x0618 )
1069 && ( unicode_character < 0x658 ) )
1070 {
1071 unicode_character -= 0x0618;
1072
1073 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_6_unicode_to_byte_stream_base_0x0618[ unicode_character ];
1074 }
1075 else switch( unicode_character )
1076 {
1077 case 0x00a4:
1078 byte_stream[ *byte_stream_index ] = 0xa4;
1079 break;
1080
1081 case 0x00ad:
1082 byte_stream[ *byte_stream_index ] = 0xad;
1083 break;
1084
1085 case 0x060c:
1086 byte_stream[ *byte_stream_index ] = 0xac;
1087 break;
1088
1089 default:
1090 byte_stream[ *byte_stream_index ] = 0x1a;
1091 break;
1092 }
1093 *byte_stream_index += 1;
1094
1095 break;
1096
1097 case LIBUNA_CODEPAGE_ISO_8859_7:
1098 if( unicode_character < 0x00a0 )
1099 {
1100 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1101 }
1102 else if( ( unicode_character >= 0x00a0 )
1103 && ( unicode_character < 0x00b8 ) )
1104 {
1105 unicode_character -= 0x00a0;
1106
1107 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_7_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1108 }
1109 else if( ( unicode_character >= 0x0380 )
1110 && ( unicode_character < 0x03d0 ) )
1111 {
1112 unicode_character -= 0x0380;
1113
1114 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_7_unicode_to_byte_stream_base_0x0380[ unicode_character ];
1115 }
1116 else switch( unicode_character )
1117 {
1118 case 0x00bb:
1119 byte_stream[ *byte_stream_index ] = 0xbb;
1120 break;
1121
1122 case 0x00bd:
1123 byte_stream[ *byte_stream_index ] = 0xbd;
1124 break;
1125
1126 case 0x037a:
1127 byte_stream[ *byte_stream_index ] = 0xaa;
1128 break;
1129
1130 case 0x2015:
1131 byte_stream[ *byte_stream_index ] = 0xaf;
1132 break;
1133
1134 case 0x2018:
1135 byte_stream[ *byte_stream_index ] = 0xa1;
1136 break;
1137
1138 case 0x2019:
1139 byte_stream[ *byte_stream_index ] = 0xa2;
1140 break;
1141
1142 case 0x20ac:
1143 byte_stream[ *byte_stream_index ] = 0xa4;
1144 break;
1145
1146 case 0x20af:
1147 byte_stream[ *byte_stream_index ] = 0xa5;
1148 break;
1149
1150 default:
1151 byte_stream[ *byte_stream_index ] = 0x1a;
1152 break;
1153 }
1154 *byte_stream_index += 1;
1155
1156 break;
1157
1158 case LIBUNA_CODEPAGE_ISO_8859_8:
1159 if( unicode_character < 0x00a0 )
1160 {
1161 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1162 }
1163 else if( ( unicode_character >= 0x00a0 )
1164 && ( unicode_character < 0x00c0 ) )
1165 {
1166 unicode_character -= 0x00a0;
1167
1168 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_8_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1169 }
1170 else if( ( unicode_character >= 0x05d0 )
1171 && ( unicode_character < 0x05f0 ) )
1172 {
1173 unicode_character -= 0x05d0;
1174
1175 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_8_unicode_to_byte_stream_base_0x05d0[ unicode_character ];
1176 }
1177 else switch( unicode_character )
1178 {
1179 case 0x00d7:
1180 byte_stream[ *byte_stream_index ] = 0xaa;
1181 break;
1182
1183 case 0x00f7:
1184 byte_stream[ *byte_stream_index ] = 0xba;
1185 break;
1186
1187 case 0x200e:
1188 byte_stream[ *byte_stream_index ] = 0xfd;
1189 break;
1190
1191 case 0x200f:
1192 byte_stream[ *byte_stream_index ] = 0xfe;
1193 break;
1194
1195 case 0x2017:
1196 byte_stream[ *byte_stream_index ] = 0xdf;
1197 break;
1198
1199 default:
1200 byte_stream[ *byte_stream_index ] = 0x1a;
1201 break;
1202 }
1203 *byte_stream_index += 1;
1204
1205 break;
1206
1207 case LIBUNA_CODEPAGE_ISO_8859_9:
1208 if( unicode_character < 0x00d0 )
1209 {
1210 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1211 }
1212 else if( ( unicode_character >= 0x00d0 )
1213 && ( unicode_character < 0x0100 ) )
1214 {
1215 unicode_character -= 0x00d0;
1216
1217 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_9_unicode_to_byte_stream_base_0x00d0[ unicode_character ];
1218 }
1219 else switch( unicode_character )
1220 {
1221 case 0x011e:
1222 byte_stream[ *byte_stream_index ] = 0xd0;
1223 break;
1224
1225 case 0x011f:
1226 byte_stream[ *byte_stream_index ] = 0xf0;
1227 break;
1228
1229 case 0x0130:
1230 byte_stream[ *byte_stream_index ] = 0xdd;
1231 break;
1232
1233 case 0x0131:
1234 byte_stream[ *byte_stream_index ] = 0xfd;
1235 break;
1236
1237 case 0x015e:
1238 byte_stream[ *byte_stream_index ] = 0xde;
1239 break;
1240
1241 case 0x015f:
1242 byte_stream[ *byte_stream_index ] = 0xfe;
1243 break;
1244
1245 default:
1246 byte_stream[ *byte_stream_index ] = 0x1a;
1247 break;
1248 }
1249 *byte_stream_index += 1;
1250
1251 break;
1252
1253 case LIBUNA_CODEPAGE_ISO_8859_10:
1254 if( unicode_character < 0x00a1 )
1255 {
1256 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1257 }
1258 else if( ( unicode_character >= 0x00c0 )
1259 && ( unicode_character < 0x0150 ) )
1260 {
1261 unicode_character -= 0x00c0;
1262
1263 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_10_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
1264 }
1265 else if( ( unicode_character >= 0x0160 )
1266 && ( unicode_character < 0x0170 ) )
1267 {
1268 unicode_character -= 0x0160;
1269
1270 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_10_unicode_to_byte_stream_base_0x0160[ unicode_character ];
1271 }
1272 else switch( unicode_character )
1273 {
1274 case 0x00a7:
1275 byte_stream[ *byte_stream_index ] = 0xa7;
1276 break;
1277
1278 case 0x00ad:
1279 byte_stream[ *byte_stream_index ] = 0xad;
1280 break;
1281
1282 case 0x00b0:
1283 byte_stream[ *byte_stream_index ] = 0xb0;
1284 break;
1285
1286 case 0x00b7:
1287 byte_stream[ *byte_stream_index ] = 0xb7;
1288 break;
1289
1290 case 0x0172:
1291 byte_stream[ *byte_stream_index ] = 0xd9;
1292 break;
1293
1294 case 0x0173:
1295 byte_stream[ *byte_stream_index ] = 0xf9;
1296 break;
1297
1298 case 0x017d:
1299 byte_stream[ *byte_stream_index ] = 0xac;
1300 break;
1301
1302 case 0x017e:
1303 byte_stream[ *byte_stream_index ] = 0xbc;
1304 break;
1305
1306 case 0x2015:
1307 byte_stream[ *byte_stream_index ] = 0xbd;
1308 break;
1309
1310 default:
1311 byte_stream[ *byte_stream_index ] = 0x1a;
1312 break;
1313 }
1314 *byte_stream_index += 1;
1315
1316 break;
1317
1318 case LIBUNA_CODEPAGE_ISO_8859_11:
1319 if( unicode_character < 0x00a1 )
1320 {
1321 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1322 }
1323 else if( ( unicode_character >= 0x0e01 )
1324 && ( unicode_character < 0x0e3b ) )
1325 {
1326 byte_stream[ *byte_stream_index ] = (uint8_t) ( unicode_character - 0x0d60 );
1327 }
1328 else if( ( unicode_character >= 0x0e3f )
1329 && ( unicode_character < 0x0e5c ) )
1330 {
1331 byte_stream[ *byte_stream_index ] = (uint8_t) ( unicode_character - 0x0d60 );
1332 }
1333 else
1334 {
1335 byte_stream[ *byte_stream_index ] = 0x1a;
1336 }
1337 *byte_stream_index += 1;
1338
1339 break;
1340
1341 case LIBUNA_CODEPAGE_ISO_8859_13:
1342 if( unicode_character < 0x00a0 )
1343 {
1344 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1345 }
1346 else if( ( unicode_character >= 0x00a0 )
1347 && ( unicode_character < 0x0180 ) )
1348 {
1349 unicode_character -= 0x00a0;
1350
1351 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_13_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1352 }
1353 else if( ( unicode_character >= 0x2018 )
1354 && ( unicode_character < 0x2020 ) )
1355 {
1356 unicode_character -= 0x2018;
1357
1358 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_13_unicode_to_byte_stream_base_0x2018[ unicode_character ];
1359 }
1360 else
1361 {
1362 byte_stream[ *byte_stream_index ] = 0x1a;
1363 }
1364 *byte_stream_index += 1;
1365
1366 break;
1367
1368 case LIBUNA_CODEPAGE_ISO_8859_14:
1369 if( unicode_character < 0x00a1 )
1370 {
1371 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1372 }
1373 else if( ( unicode_character >= 0x00c0 )
1374 && ( unicode_character < 0x0100 ) )
1375 {
1376 unicode_character -= 0x00c0;
1377
1378 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
1379 }
1380 else if( ( unicode_character >= 0x0170 )
1381 && ( unicode_character < 0x0178 ) )
1382 {
1383 unicode_character -= 0x0170;
1384
1385 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x0170[ unicode_character ];
1386 }
1387 else if( ( unicode_character >= 0x1e80 )
1388 && ( unicode_character < 0x1e88 ) )
1389 {
1390 unicode_character -= 0x1e80;
1391
1392 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x1e80[ unicode_character ];
1393 }
1394 else switch( unicode_character )
1395 {
1396 case 0x00a3:
1397 byte_stream[ *byte_stream_index ] = 0xa3;
1398 break;
1399
1400 case 0x00a7:
1401 byte_stream[ *byte_stream_index ] = 0xa7;
1402 break;
1403
1404 case 0x00a9:
1405 byte_stream[ *byte_stream_index ] = 0xa9;
1406 break;
1407
1408 case 0x00ad:
1409 byte_stream[ *byte_stream_index ] = 0xad;
1410 break;
1411
1412 case 0x00ae:
1413 byte_stream[ *byte_stream_index ] = 0xae;
1414 break;
1415
1416 case 0x00b6:
1417 byte_stream[ *byte_stream_index ] = 0xb6;
1418 break;
1419
1420 case 0x010a:
1421 byte_stream[ *byte_stream_index ] = 0xa4;
1422 break;
1423
1424 case 0x010b:
1425 byte_stream[ *byte_stream_index ] = 0xa5;
1426 break;
1427
1428 case 0x0120:
1429 byte_stream[ *byte_stream_index ] = 0xb2;
1430 break;
1431
1432 case 0x0121:
1433 byte_stream[ *byte_stream_index ] = 0xb3;
1434 break;
1435
1436 case 0x0178:
1437 byte_stream[ *byte_stream_index ] = 0xaf;
1438 break;
1439
1440 case 0x1e02:
1441 byte_stream[ *byte_stream_index ] = 0xa1;
1442 break;
1443
1444 case 0x1e03:
1445 byte_stream[ *byte_stream_index ] = 0xa2;
1446 break;
1447
1448 case 0x1e0a:
1449 byte_stream[ *byte_stream_index ] = 0xa6;
1450 break;
1451
1452 case 0x1e0b:
1453 byte_stream[ *byte_stream_index ] = 0xab;
1454 break;
1455
1456 case 0x1e1e:
1457 byte_stream[ *byte_stream_index ] = 0xb0;
1458 break;
1459
1460 case 0x1e1f:
1461 byte_stream[ *byte_stream_index ] = 0xb1;
1462 break;
1463
1464 case 0x1e40:
1465 byte_stream[ *byte_stream_index ] = 0xb4;
1466 break;
1467
1468 case 0x1e41:
1469 byte_stream[ *byte_stream_index ] = 0xb5;
1470 break;
1471
1472 case 0x1e56:
1473 byte_stream[ *byte_stream_index ] = 0xb7;
1474 break;
1475
1476 case 0x1e57:
1477 byte_stream[ *byte_stream_index ] = 0xb9;
1478 break;
1479
1480 case 0x1e60:
1481 byte_stream[ *byte_stream_index ] = 0xbb;
1482 break;
1483
1484 case 0x1e61:
1485 byte_stream[ *byte_stream_index ] = 0xbf;
1486 break;
1487
1488 case 0x1e6a:
1489 byte_stream[ *byte_stream_index ] = 0xd7;
1490 break;
1491
1492 case 0x1e6b:
1493 byte_stream[ *byte_stream_index ] = 0xf7;
1494 break;
1495
1496 case 0x1ef2:
1497 byte_stream[ *byte_stream_index ] = 0xac;
1498 break;
1499
1500 case 0x1ef3:
1501 byte_stream[ *byte_stream_index ] = 0xbc;
1502 break;
1503
1504 default:
1505 byte_stream[ *byte_stream_index ] = 0x1a;
1506 break;
1507 }
1508 *byte_stream_index += 1;
1509
1510 break;
1511
1512 case LIBUNA_CODEPAGE_ISO_8859_15:
1513 if( unicode_character < 0x00a0 )
1514 {
1515 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1516 }
1517 else if( ( unicode_character >= 0x00a0 )
1518 && ( unicode_character < 0x00c0 ) )
1519 {
1520 unicode_character -= 0x00a0;
1521
1522 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_15_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1523 }
1524 else if( unicode_character < 0x0100 )
1525 {
1526 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1527 }
1528 else switch( unicode_character )
1529 {
1530 case 0x0152:
1531 byte_stream[ *byte_stream_index ] = 0xbc;
1532 break;
1533
1534 case 0x0153:
1535 byte_stream[ *byte_stream_index ] = 0xbd;
1536 break;
1537
1538 case 0x0160:
1539 byte_stream[ *byte_stream_index ] = 0xa6;
1540 break;
1541
1542 case 0x0161:
1543 byte_stream[ *byte_stream_index ] = 0xa8;
1544 break;
1545
1546 case 0x0178:
1547 byte_stream[ *byte_stream_index ] = 0xbe;
1548 break;
1549
1550 case 0x017d:
1551 byte_stream[ *byte_stream_index ] = 0xb4;
1552 break;
1553
1554 case 0x017e:
1555 byte_stream[ *byte_stream_index ] = 0xb8;
1556 break;
1557
1558 case 0x20ac:
1559 byte_stream[ *byte_stream_index ] = 0xa4;
1560 break;
1561
1562 default:
1563 byte_stream[ *byte_stream_index ] = 0x1a;
1564 break;
1565 }
1566 *byte_stream_index += 1;
1567
1568 break;
1569
1570 case LIBUNA_CODEPAGE_ISO_8859_16:
1571 if( unicode_character < 0x00a1 )
1572 {
1573 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1574 }
1575 else if( ( unicode_character >= 0x00a8 )
1576 && ( unicode_character < 0x0108 ) )
1577 {
1578 unicode_character -= 0x00a8;
1579
1580 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x00a8[ unicode_character ];
1581 }
1582 else if( ( unicode_character >= 0x0140 )
1583 && ( unicode_character < 0x0148 ) )
1584 {
1585 unicode_character -= 0x0140;
1586
1587 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0140[ unicode_character ];
1588 }
1589 else if( ( unicode_character >= 0x0150 )
1590 && ( unicode_character < 0x0158 ) )
1591 {
1592 unicode_character -= 0x0150;
1593
1594 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0150[ unicode_character ];
1595 }
1596 else if( ( unicode_character >= 0x0178 )
1597 && ( unicode_character < 0x0180 ) )
1598 {
1599 unicode_character -= 0x0178;
1600
1601 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0178[ unicode_character ];
1602 }
1603 else if( ( unicode_character >= 0x0218 )
1604 && ( unicode_character < 0x0220 ) )
1605 {
1606 unicode_character -= 0x0218;
1607
1608 byte_stream[ *byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0218[ unicode_character ];
1609 }
1610 else switch( unicode_character )
1611 {
1612 case 0x00a7:
1613 byte_stream[ *byte_stream_index ] = 0xa7;
1614 break;
1615
1616 case 0x010c:
1617 byte_stream[ *byte_stream_index ] = 0xb2;
1618 break;
1619
1620 case 0x010d:
1621 byte_stream[ *byte_stream_index ] = 0xb9;
1622 break;
1623
1624 case 0x0110:
1625 byte_stream[ *byte_stream_index ] = 0xd0;
1626 break;
1627
1628 case 0x0111:
1629 byte_stream[ *byte_stream_index ] = 0xf0;
1630 break;
1631
1632 case 0x0118:
1633 byte_stream[ *byte_stream_index ] = 0xdd;
1634 break;
1635
1636 case 0x0119:
1637 byte_stream[ *byte_stream_index ] = 0xfd;
1638 break;
1639
1640 case 0x015a:
1641 byte_stream[ *byte_stream_index ] = 0xd7;
1642 break;
1643
1644 case 0x015b:
1645 byte_stream[ *byte_stream_index ] = 0xf7;
1646 break;
1647
1648 case 0x0160:
1649 byte_stream[ *byte_stream_index ] = 0xa6;
1650 break;
1651
1652 case 0x0161:
1653 byte_stream[ *byte_stream_index ] = 0xa8;
1654 break;
1655
1656 case 0x0170:
1657 byte_stream[ *byte_stream_index ] = 0xd8;
1658 break;
1659
1660 case 0x0171:
1661 byte_stream[ *byte_stream_index ] = 0xf8;
1662 break;
1663
1664 case 0x201d:
1665 byte_stream[ *byte_stream_index ] = 0xb5;
1666 break;
1667
1668 case 0x201e:
1669 byte_stream[ *byte_stream_index ] = 0xa5;
1670 break;
1671
1672 case 0x20ac:
1673 byte_stream[ *byte_stream_index ] = 0xa4;
1674 break;
1675
1676 default:
1677 byte_stream[ *byte_stream_index ] = 0x1a;
1678 break;
1679 }
1680 *byte_stream_index += 1;
1681
1682 break;
1683
1684 case LIBUNA_CODEPAGE_KOI8_R:
1685 if( unicode_character < 0x0080 )
1686 {
1687 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1688 }
1689 else if( ( unicode_character >= 0x0410 )
1690 && ( unicode_character < 0x0450 ) )
1691 {
1692 unicode_character -= 0x0410;
1693
1694 byte_stream[ *byte_stream_index ] = libuna_codepage_koi8_r_unicode_to_byte_stream_base_0x0410[ unicode_character ];
1695 }
1696 else if( ( unicode_character >= 0x2550 )
1697 && ( unicode_character < 0x2570 ) )
1698 {
1699 unicode_character -= 0x2550;
1700
1701 byte_stream[ *byte_stream_index ] = libuna_codepage_koi8_r_unicode_to_byte_stream_base_0x2550[ unicode_character ];
1702 }
1703 else switch( unicode_character )
1704 {
1705 case 0x00a0:
1706 byte_stream[ *byte_stream_index ] = 0x9a;
1707 break;
1708
1709 case 0x00a9:
1710 byte_stream[ *byte_stream_index ] = 0xbf;
1711 break;
1712
1713 case 0x00b0:
1714 byte_stream[ *byte_stream_index ] = 0x9c;
1715 break;
1716
1717 case 0x00b2:
1718 byte_stream[ *byte_stream_index ] = 0x9d;
1719 break;
1720
1721 case 0x00b7:
1722 byte_stream[ *byte_stream_index ] = 0x9e;
1723 break;
1724
1725 case 0x00f7:
1726 byte_stream[ *byte_stream_index ] = 0x9f;
1727 break;
1728
1729 case 0x0401:
1730 byte_stream[ *byte_stream_index ] = 0xb3;
1731 break;
1732
1733 case 0x0451:
1734 byte_stream[ *byte_stream_index ] = 0xa3;
1735 break;
1736
1737 case 0x2219:
1738 byte_stream[ *byte_stream_index ] = 0x95;
1739 break;
1740
1741 case 0x221a:
1742 byte_stream[ *byte_stream_index ] = 0x96;
1743 break;
1744
1745 case 0x2248:
1746 byte_stream[ *byte_stream_index ] = 0x97;
1747 break;
1748
1749 case 0x2264:
1750 byte_stream[ *byte_stream_index ] = 0x98;
1751 break;
1752
1753 case 0x2265:
1754 byte_stream[ *byte_stream_index ] = 0x99;
1755 break;
1756
1757 case 0x2320:
1758 byte_stream[ *byte_stream_index ] = 0x93;
1759 break;
1760
1761 case 0x2321:
1762 byte_stream[ *byte_stream_index ] = 0x9b;
1763 break;
1764
1765 case 0x2500:
1766 byte_stream[ *byte_stream_index ] = 0x80;
1767 break;
1768
1769 case 0x2502:
1770 byte_stream[ *byte_stream_index ] = 0x81;
1771 break;
1772
1773 case 0x250c:
1774 byte_stream[ *byte_stream_index ] = 0x82;
1775 break;
1776
1777 case 0x2510:
1778 byte_stream[ *byte_stream_index ] = 0x83;
1779 break;
1780
1781 case 0x2514:
1782 byte_stream[ *byte_stream_index ] = 0x84;
1783 break;
1784
1785 case 0x2518:
1786 byte_stream[ *byte_stream_index ] = 0x85;
1787 break;
1788
1789 case 0x251c:
1790 byte_stream[ *byte_stream_index ] = 0x86;
1791 break;
1792
1793 case 0x2524:
1794 byte_stream[ *byte_stream_index ] = 0x87;
1795 break;
1796
1797 case 0x252c:
1798 byte_stream[ *byte_stream_index ] = 0x88;
1799 break;
1800
1801 case 0x2534:
1802 byte_stream[ *byte_stream_index ] = 0x89;
1803 break;
1804
1805 case 0x253c:
1806 byte_stream[ *byte_stream_index ] = 0x8a;
1807 break;
1808
1809 case 0x2580:
1810 byte_stream[ *byte_stream_index ] = 0x8b;
1811 break;
1812
1813 case 0x2584:
1814 byte_stream[ *byte_stream_index ] = 0x8c;
1815 break;
1816
1817 case 0x2588:
1818 byte_stream[ *byte_stream_index ] = 0x8d;
1819 break;
1820
1821 case 0x258c:
1822 byte_stream[ *byte_stream_index ] = 0x8e;
1823 break;
1824
1825 case 0x2590:
1826 byte_stream[ *byte_stream_index ] = 0x8f;
1827 break;
1828
1829 case 0x2591:
1830 byte_stream[ *byte_stream_index ] = 0x90;
1831 break;
1832
1833 case 0x2592:
1834 byte_stream[ *byte_stream_index ] = 0x91;
1835 break;
1836
1837 case 0x2593:
1838 byte_stream[ *byte_stream_index ] = 0x92;
1839 break;
1840
1841 case 0x25a0:
1842 byte_stream[ *byte_stream_index ] = 0x94;
1843 break;
1844
1845 default:
1846 byte_stream[ *byte_stream_index ] = 0x1a;
1847 break;
1848 }
1849 *byte_stream_index += 1;
1850
1851 break;
1852
1853 case LIBUNA_CODEPAGE_KOI8_U:
1854 if( unicode_character < 0x0080 )
1855 {
1856 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
1857 }
1858 else if( ( unicode_character >= 0x0410 )
1859 && ( unicode_character < 0x0450 ) )
1860 {
1861 unicode_character -= 0x0410;
1862
1863 byte_stream[ *byte_stream_index ] = libuna_codepage_koi8_u_unicode_to_byte_stream_base_0x0410[ unicode_character ];
1864 }
1865 else if( ( unicode_character >= 0x2550 )
1866 && ( unicode_character < 0x2570 ) )
1867 {
1868 unicode_character -= 0x2550;
1869
1870 byte_stream[ *byte_stream_index ] = libuna_codepage_koi8_u_unicode_to_byte_stream_base_0x2550[ unicode_character ];
1871 }
1872 else switch( unicode_character )
1873 {
1874 case 0x00a0:
1875 byte_stream[ *byte_stream_index ] = 0x9a;
1876 break;
1877
1878 case 0x00a9:
1879 byte_stream[ *byte_stream_index ] = 0xbf;
1880 break;
1881
1882 case 0x00b0:
1883 byte_stream[ *byte_stream_index ] = 0x9c;
1884 break;
1885
1886 case 0x00b2:
1887 byte_stream[ *byte_stream_index ] = 0x9d;
1888 break;
1889
1890 case 0x00b7:
1891 byte_stream[ *byte_stream_index ] = 0x9e;
1892 break;
1893
1894 case 0x00f7:
1895 byte_stream[ *byte_stream_index ] = 0x9f;
1896 break;
1897
1898 case 0x0401:
1899 byte_stream[ *byte_stream_index ] = 0xb3;
1900 break;
1901
1902 case 0x0404:
1903 byte_stream[ *byte_stream_index ] = 0xb4;
1904 break;
1905
1906 case 0x0406:
1907 byte_stream[ *byte_stream_index ] = 0xb6;
1908 break;
1909
1910 case 0x0407:
1911 byte_stream[ *byte_stream_index ] = 0xb7;
1912 break;
1913
1914 case 0x0451:
1915 byte_stream[ *byte_stream_index ] = 0xa3;
1916 break;
1917
1918 case 0x0454:
1919 byte_stream[ *byte_stream_index ] = 0xa4;
1920 break;
1921
1922 case 0x0456:
1923 byte_stream[ *byte_stream_index ] = 0xa6;
1924 break;
1925
1926 case 0x0457:
1927 byte_stream[ *byte_stream_index ] = 0xa7;
1928 break;
1929
1930 case 0x0490:
1931 byte_stream[ *byte_stream_index ] = 0xbd;
1932 break;
1933
1934 case 0x0491:
1935 byte_stream[ *byte_stream_index ] = 0xad;
1936 break;
1937
1938 case 0x2219:
1939 byte_stream[ *byte_stream_index ] = 0x95;
1940 break;
1941
1942 case 0x221a:
1943 byte_stream[ *byte_stream_index ] = 0x96;
1944 break;
1945
1946 case 0x2248:
1947 byte_stream[ *byte_stream_index ] = 0x97;
1948 break;
1949
1950 case 0x2264:
1951 byte_stream[ *byte_stream_index ] = 0x98;
1952 break;
1953
1954 case 0x2265:
1955 byte_stream[ *byte_stream_index ] = 0x99;
1956 break;
1957
1958 case 0x2320:
1959 byte_stream[ *byte_stream_index ] = 0x93;
1960 break;
1961
1962 case 0x2321:
1963 byte_stream[ *byte_stream_index ] = 0x9b;
1964 break;
1965
1966 case 0x2500:
1967 byte_stream[ *byte_stream_index ] = 0x80;
1968 break;
1969
1970 case 0x2502:
1971 byte_stream[ *byte_stream_index ] = 0x81;
1972 break;
1973
1974 case 0x250c:
1975 byte_stream[ *byte_stream_index ] = 0x82;
1976 break;
1977
1978 case 0x2510:
1979 byte_stream[ *byte_stream_index ] = 0x83;
1980 break;
1981
1982 case 0x2514:
1983 byte_stream[ *byte_stream_index ] = 0x84;
1984 break;
1985
1986 case 0x2518:
1987 byte_stream[ *byte_stream_index ] = 0x85;
1988 break;
1989
1990 case 0x251c:
1991 byte_stream[ *byte_stream_index ] = 0x86;
1992 break;
1993
1994 case 0x2524:
1995 byte_stream[ *byte_stream_index ] = 0x87;
1996 break;
1997
1998 case 0x252c:
1999 byte_stream[ *byte_stream_index ] = 0x88;
2000 break;
2001
2002 case 0x2534:
2003 byte_stream[ *byte_stream_index ] = 0x89;
2004 break;
2005
2006 case 0x253c:
2007 byte_stream[ *byte_stream_index ] = 0x8a;
2008 break;
2009
2010 case 0x2580:
2011 byte_stream[ *byte_stream_index ] = 0x8b;
2012 break;
2013
2014 case 0x2584:
2015 byte_stream[ *byte_stream_index ] = 0x8c;
2016 break;
2017
2018 case 0x2588:
2019 byte_stream[ *byte_stream_index ] = 0x8d;
2020 break;
2021
2022 case 0x258c:
2023 byte_stream[ *byte_stream_index ] = 0x8e;
2024 break;
2025
2026 case 0x2590:
2027 byte_stream[ *byte_stream_index ] = 0x8f;
2028 break;
2029
2030 case 0x2591:
2031 byte_stream[ *byte_stream_index ] = 0x90;
2032 break;
2033
2034 case 0x2592:
2035 byte_stream[ *byte_stream_index ] = 0x91;
2036 break;
2037
2038 case 0x2593:
2039 byte_stream[ *byte_stream_index ] = 0x92;
2040 break;
2041
2042 case 0x25a0:
2043 byte_stream[ *byte_stream_index ] = 0x94;
2044 break;
2045
2046 default:
2047 byte_stream[ *byte_stream_index ] = 0x1a;
2048 break;
2049 }
2050 *byte_stream_index += 1;
2051
2052 break;
2053
2054 case LIBUNA_CODEPAGE_WINDOWS_874:
2055 if( ( unicode_character < 0x0080 )
2056 || ( unicode_character == 0x00a0 ) )
2057 {
2058 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2059 }
2060 else if( ( unicode_character >= 0x0e00 )
2061 && ( unicode_character < 0x0e60 ) )
2062 {
2063 unicode_character -= 0x0e00;
2064
2065 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_874_unicode_to_byte_stream_base_0x0e00[ unicode_character ];
2066 }
2067 else if( ( unicode_character >= 0x2018 )
2068 && ( unicode_character < 0x2020 ) )
2069 {
2070 unicode_character -= 0x2018;
2071
2072 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_874_unicode_to_byte_stream_base_0x2018[ unicode_character ];
2073 }
2074 else switch( unicode_character )
2075 {
2076 case 0x2013:
2077 byte_stream[ *byte_stream_index ] = 0x96;
2078 break;
2079
2080 case 0x2014:
2081 byte_stream[ *byte_stream_index ] = 0x97;
2082 break;
2083
2084 case 0x2022:
2085 byte_stream[ *byte_stream_index ] = 0x95;
2086 break;
2087
2088 case 0x2026:
2089 byte_stream[ *byte_stream_index ] = 0x85;
2090 break;
2091
2092 case 0x20ac:
2093 byte_stream[ *byte_stream_index ] = 0x80;
2094 break;
2095
2096 default:
2097 byte_stream[ *byte_stream_index ] = 0x1a;
2098 break;
2099 }
2100 *byte_stream_index += 1;
2101
2102 break;
2103
2104 case LIBUNA_CODEPAGE_WINDOWS_932:
2105 result = libuna_codepage_windows_932_copy_to_byte_stream(
2106 unicode_character,
2107 byte_stream,
2108 byte_stream_size,
2109 byte_stream_index,
2110 error );
2111 break;
2112
2113 case LIBUNA_CODEPAGE_WINDOWS_936:
2114 result = libuna_codepage_windows_936_copy_to_byte_stream(
2115 unicode_character,
2116 byte_stream,
2117 byte_stream_size,
2118 byte_stream_index,
2119 error );
2120 break;
2121
2122 case LIBUNA_CODEPAGE_WINDOWS_949:
2123 result = libuna_codepage_windows_949_copy_to_byte_stream(
2124 unicode_character,
2125 byte_stream,
2126 byte_stream_size,
2127 byte_stream_index,
2128 error );
2129 break;
2130
2131 case LIBUNA_CODEPAGE_WINDOWS_950:
2132 result = libuna_codepage_windows_950_copy_to_byte_stream(
2133 unicode_character,
2134 byte_stream,
2135 byte_stream_size,
2136 byte_stream_index,
2137 error );
2138 break;
2139
2140 case LIBUNA_CODEPAGE_WINDOWS_1250:
2141 if( unicode_character < 0x0080 )
2142 {
2143 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2144 }
2145 else if( ( unicode_character >= 0x00a0 )
2146 && ( unicode_character < 0x0120 ) )
2147 {
2148 unicode_character -= 0x00a0;
2149
2150 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1250_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2151 }
2152 else if( ( unicode_character >= 0x0138 )
2153 && ( unicode_character < 0x0180 ) )
2154 {
2155 unicode_character -= 0x0138;
2156
2157 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1250_unicode_to_byte_stream_base_0x0138[ unicode_character ];
2158 }
2159 else if( ( unicode_character >= 0x02d8 )
2160 && ( unicode_character < 0x02e0 ) )
2161 {
2162 unicode_character -= 0x02d8;
2163
2164 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1250_unicode_to_byte_stream_base_0x02d8[ unicode_character ];
2165 }
2166 else if( ( unicode_character >= 0x2010 )
2167 && ( unicode_character < 0x2028 ) )
2168 {
2169 unicode_character -= 0x2010;
2170
2171 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1250_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2172 }
2173 else if( ( unicode_character >= 0x2030 )
2174 && ( unicode_character < 0x2040 ) )
2175 {
2176 unicode_character -= 0x2030;
2177
2178 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1250_unicode_to_byte_stream_base_0x2030[ unicode_character ];
2179 }
2180 else switch( unicode_character )
2181 {
2182 case 0x02c7:
2183 byte_stream[ *byte_stream_index ] = 0xa1;
2184 break;
2185
2186 case 0x20ac:
2187 byte_stream[ *byte_stream_index ] = 0x80;
2188 break;
2189
2190 case 0x2122:
2191 byte_stream[ *byte_stream_index ] = 0x99;
2192 break;
2193
2194 default:
2195 byte_stream[ *byte_stream_index ] = 0x1a;
2196 break;
2197 }
2198 *byte_stream_index += 1;
2199
2200 break;
2201
2202 case LIBUNA_CODEPAGE_WINDOWS_1251:
2203 if( unicode_character < 0x0080 )
2204 {
2205 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2206 }
2207 else if( ( unicode_character >= 0x00a0 )
2208 && ( unicode_character < 0x00c0 ) )
2209 {
2210 unicode_character -= 0x00a0;
2211
2212 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1251_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2213 }
2214 else if( ( unicode_character >= 0x0400 )
2215 && ( unicode_character < 0x0460 ) )
2216 {
2217 unicode_character -= 0x0400;
2218
2219 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1251_unicode_to_byte_stream_base_0x0400[ unicode_character ];
2220 }
2221 else if( ( unicode_character >= 0x2010 )
2222 && ( unicode_character < 0x2028 ) )
2223 {
2224 unicode_character -= 0x2010;
2225
2226 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1251_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2227 }
2228 else switch( unicode_character )
2229 {
2230 case 0x0490:
2231 byte_stream[ *byte_stream_index ] = 0xa5;
2232 break;
2233
2234 case 0x0491:
2235 byte_stream[ *byte_stream_index ] = 0xb4;
2236 break;
2237
2238 case 0x2030:
2239 byte_stream[ *byte_stream_index ] = 0x89;
2240 break;
2241
2242 case 0x2039:
2243 byte_stream[ *byte_stream_index ] = 0x8b;
2244 break;
2245
2246 case 0x203a:
2247 byte_stream[ *byte_stream_index ] = 0x9b;
2248 break;
2249
2250 case 0x20ac:
2251 byte_stream[ *byte_stream_index ] = 0x88;
2252 break;
2253
2254 case 0x2116:
2255 byte_stream[ *byte_stream_index ] = 0xb9;
2256 break;
2257
2258 case 0x2122:
2259 byte_stream[ *byte_stream_index ] = 0x99;
2260 break;
2261
2262 default:
2263 byte_stream[ *byte_stream_index ] = 0x1a;
2264 break;
2265 }
2266 *byte_stream_index += 1;
2267
2268 break;
2269
2270 case LIBUNA_CODEPAGE_WINDOWS_1252:
2271 if( unicode_character < 0x0080 )
2272 {
2273 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2274 }
2275 else if( ( unicode_character >= 0x00a0 )
2276 && ( unicode_character < 0x0100 ) )
2277 {
2278 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2279 }
2280 else if( ( unicode_character >= 0x2010 )
2281 && ( unicode_character < 0x2028 ) )
2282 {
2283 unicode_character -= 0x2010;
2284
2285 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1252_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2286 }
2287 else switch( unicode_character )
2288 {
2289 case 0x0152:
2290 byte_stream[ *byte_stream_index ] = 0x8c;
2291 break;
2292
2293 case 0x0153:
2294 byte_stream[ *byte_stream_index ] = 0x9c;
2295 break;
2296
2297 case 0x0160:
2298 byte_stream[ *byte_stream_index ] = 0x8a;
2299 break;
2300
2301 case 0x0161:
2302 byte_stream[ *byte_stream_index ] = 0x9a;
2303 break;
2304
2305 case 0x0178:
2306 byte_stream[ *byte_stream_index ] = 0x9f;
2307 break;
2308
2309 case 0x017d:
2310 byte_stream[ *byte_stream_index ] = 0x8e;
2311 break;
2312
2313 case 0x017e:
2314 byte_stream[ *byte_stream_index ] = 0x9e;
2315 break;
2316
2317 case 0x0192:
2318 byte_stream[ *byte_stream_index ] = 0x83;
2319 break;
2320
2321 case 0x02c6:
2322 byte_stream[ *byte_stream_index ] = 0x88;
2323 break;
2324
2325 case 0x02dc:
2326 byte_stream[ *byte_stream_index ] = 0x98;
2327 break;
2328
2329 case 0x2030:
2330 byte_stream[ *byte_stream_index ] = 0x89;
2331 break;
2332
2333 case 0x2039:
2334 byte_stream[ *byte_stream_index ] = 0x8b;
2335 break;
2336
2337 case 0x203a:
2338 byte_stream[ *byte_stream_index ] = 0x9b;
2339 break;
2340
2341 case 0x20ac:
2342 byte_stream[ *byte_stream_index ] = 0x80;
2343 break;
2344
2345 case 0x2122:
2346 byte_stream[ *byte_stream_index ] = 0x99;
2347 break;
2348
2349 default:
2350 byte_stream[ *byte_stream_index ] = 0x1a;
2351 break;
2352 }
2353 *byte_stream_index += 1;
2354
2355 break;
2356
2357 case LIBUNA_CODEPAGE_WINDOWS_1253:
2358 if( unicode_character < 0x0080 )
2359 {
2360 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2361 }
2362 else if( ( unicode_character >= 0x00a0 )
2363 && ( unicode_character < 0x00c0 ) )
2364 {
2365 unicode_character -= 0x00a0;
2366
2367 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1253_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2368 }
2369 else if( ( unicode_character >= 0x0380 )
2370 && ( unicode_character < 0x03d0 ) )
2371 {
2372 unicode_character -= 0x0380;
2373
2374 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1253_unicode_to_byte_stream_base_0x0380[ unicode_character ];
2375 }
2376 else if( ( unicode_character >= 0x2010 )
2377 && ( unicode_character < 0x2028 ) )
2378 {
2379 unicode_character -= 0x2010;
2380
2381 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1253_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2382 }
2383 else switch( unicode_character )
2384 {
2385 case 0x0192:
2386 byte_stream[ *byte_stream_index ] = 0x83;
2387 break;
2388
2389 case 0x2030:
2390 byte_stream[ *byte_stream_index ] = 0x89;
2391 break;
2392
2393 case 0x2039:
2394 byte_stream[ *byte_stream_index ] = 0x8b;
2395 break;
2396
2397 case 0x203a:
2398 byte_stream[ *byte_stream_index ] = 0x9b;
2399 break;
2400
2401 case 0x20ac:
2402 byte_stream[ *byte_stream_index ] = 0x80;
2403 break;
2404
2405 case 0x2122:
2406 byte_stream[ *byte_stream_index ] = 0x99;
2407 break;
2408
2409 default:
2410 byte_stream[ *byte_stream_index ] = 0x1a;
2411 break;
2412 }
2413 *byte_stream_index += 1;
2414
2415 break;
2416
2417 case LIBUNA_CODEPAGE_WINDOWS_1254:
2418 if( unicode_character < 0x0080 )
2419 {
2420 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2421 }
2422 else if( ( unicode_character >= 0x00a0 )
2423 && ( unicode_character < 0x00d0 ) )
2424 {
2425 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2426 }
2427 else if( ( unicode_character >= 0x00d0 )
2428 && ( unicode_character < 0x0100 ) )
2429 {
2430 unicode_character -= 0x00d0;
2431
2432 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1254_unicode_to_byte_stream_base_0x00d0[ unicode_character ];
2433 }
2434 else if( ( unicode_character >= 0x2010 )
2435 && ( unicode_character < 0x2028 ) )
2436 {
2437 unicode_character -= 0x2010;
2438
2439 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1254_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2440 }
2441 else switch( unicode_character )
2442 {
2443 case 0x011e:
2444 byte_stream[ *byte_stream_index ] = 0xd0;
2445 break;
2446
2447 case 0x011f:
2448 byte_stream[ *byte_stream_index ] = 0xf0;
2449 break;
2450
2451 case 0x0130:
2452 byte_stream[ *byte_stream_index ] = 0xdd;
2453 break;
2454
2455 case 0x0131:
2456 byte_stream[ *byte_stream_index ] = 0xfd;
2457 break;
2458
2459 case 0x0152:
2460 byte_stream[ *byte_stream_index ] = 0x8c;
2461 break;
2462
2463 case 0x0153:
2464 byte_stream[ *byte_stream_index ] = 0x9c;
2465 break;
2466
2467 case 0x015e:
2468 byte_stream[ *byte_stream_index ] = 0xde;
2469 break;
2470
2471 case 0x015f:
2472 byte_stream[ *byte_stream_index ] = 0xfe;
2473 break;
2474
2475 case 0x0160:
2476 byte_stream[ *byte_stream_index ] = 0x8a;
2477 break;
2478
2479 case 0x0161:
2480 byte_stream[ *byte_stream_index ] = 0x9a;
2481 break;
2482
2483 case 0x0178:
2484 byte_stream[ *byte_stream_index ] = 0x9f;
2485 break;
2486
2487 case 0x0192:
2488 byte_stream[ *byte_stream_index ] = 0x83;
2489 break;
2490
2491 case 0x02c6:
2492 byte_stream[ *byte_stream_index ] = 0x88;
2493 break;
2494
2495 case 0x02dc:
2496 byte_stream[ *byte_stream_index ] = 0x98;
2497 break;
2498
2499 case 0x2030:
2500 byte_stream[ *byte_stream_index ] = 0x89;
2501 break;
2502
2503 case 0x2039:
2504 byte_stream[ *byte_stream_index ] = 0x8b;
2505 break;
2506
2507 case 0x203a:
2508 byte_stream[ *byte_stream_index ] = 0x9b;
2509 break;
2510
2511 case 0x20ac:
2512 byte_stream[ *byte_stream_index ] = 0x80;
2513 break;
2514
2515 case 0x2122:
2516 byte_stream[ *byte_stream_index ] = 0x99;
2517 break;
2518
2519 default:
2520 byte_stream[ *byte_stream_index ] = 0x1a;
2521 break;
2522 }
2523 *byte_stream_index += 1;
2524
2525 break;
2526
2527 case LIBUNA_CODEPAGE_WINDOWS_1255:
2528 if( unicode_character < 0x0080 )
2529 {
2530 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2531 }
2532 else if( ( unicode_character >= 0x00a0 )
2533 && ( unicode_character < 0x00c0 ) )
2534 {
2535 unicode_character -= 0x00a0;
2536
2537 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1255_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2538 }
2539 else if( ( unicode_character >= 0x05b0 )
2540 && ( unicode_character < 0x05c8 ) )
2541 {
2542 unicode_character -= 0x05b0;
2543
2544 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1255_unicode_to_byte_stream_base_0x05b0[ unicode_character ];
2545 }
2546 else if( ( unicode_character >= 0x05d0 )
2547 && ( unicode_character < 0x05f8 ) )
2548 {
2549 unicode_character -= 0x05d0;
2550
2551 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1255_unicode_to_byte_stream_base_0x05d0[ unicode_character ];
2552 }
2553 else if( ( unicode_character >= 0x2010 )
2554 && ( unicode_character < 0x2028 ) )
2555 {
2556 unicode_character -= 0x2010;
2557
2558 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1255_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2559 }
2560 else switch( unicode_character )
2561 {
2562 case 0x00d7:
2563 byte_stream[ *byte_stream_index ] = 0xaa;
2564 break;
2565
2566 case 0x00f7:
2567 byte_stream[ *byte_stream_index ] = 0xba;
2568 break;
2569
2570 case 0x0192:
2571 byte_stream[ *byte_stream_index ] = 0x83;
2572 break;
2573
2574 case 0x02c6:
2575 byte_stream[ *byte_stream_index ] = 0x88;
2576 break;
2577
2578 case 0x02dc:
2579 byte_stream[ *byte_stream_index ] = 0x98;
2580 break;
2581
2582 case 0x200e:
2583 byte_stream[ *byte_stream_index ] = 0xfd;
2584 break;
2585
2586 case 0x200f:
2587 byte_stream[ *byte_stream_index ] = 0xfe;
2588 break;
2589
2590 case 0x2030:
2591 byte_stream[ *byte_stream_index ] = 0x89;
2592 break;
2593
2594 case 0x2039:
2595 byte_stream[ *byte_stream_index ] = 0x8b;
2596 break;
2597
2598 case 0x203a:
2599 byte_stream[ *byte_stream_index ] = 0x9b;
2600 break;
2601
2602 case 0x20aa:
2603 byte_stream[ *byte_stream_index ] = 0xa4;
2604 break;
2605
2606 case 0x20ac:
2607 byte_stream[ *byte_stream_index ] = 0x80;
2608 break;
2609
2610 case 0x2122:
2611 byte_stream[ *byte_stream_index ] = 0x99;
2612 break;
2613
2614 default:
2615 byte_stream[ *byte_stream_index ] = 0x1a;
2616 break;
2617 }
2618 *byte_stream_index += 1;
2619
2620 break;
2621
2622 case LIBUNA_CODEPAGE_WINDOWS_1256:
2623 if( unicode_character < 0x0080 )
2624 {
2625 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2626 }
2627 else if( ( unicode_character >= 0x00a0 )
2628 && ( unicode_character < 0x00c0 ) )
2629 {
2630 unicode_character -= 0x00a0;
2631
2632 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1256_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2633 }
2634 else if( ( unicode_character >= 0x00e0 )
2635 && ( unicode_character < 0x0100 ) )
2636 {
2637 unicode_character -= 0x00e0;
2638
2639 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1256_unicode_to_byte_stream_base_0x00e0[ unicode_character ];
2640 }
2641 else if( ( unicode_character >= 0x0618 )
2642 && ( unicode_character < 0x0658 ) )
2643 {
2644 unicode_character -= 0x0618;
2645
2646 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1256_unicode_to_byte_stream_base_0x0618[ unicode_character ];
2647 }
2648 else if( ( unicode_character >= 0x2008 )
2649 && ( unicode_character < 0x2028 ) )
2650 {
2651 unicode_character -= 0x2008;
2652
2653 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1256_unicode_to_byte_stream_base_0x2008[ unicode_character ];
2654 }
2655 else switch( unicode_character )
2656 {
2657 case 0x00d7:
2658 byte_stream[ *byte_stream_index ] = 0xd7;
2659 break;
2660
2661 case 0x0152:
2662 byte_stream[ *byte_stream_index ] = 0x8c;
2663 break;
2664
2665 case 0x0153:
2666 byte_stream[ *byte_stream_index ] = 0x9c;
2667 break;
2668
2669 case 0x0192:
2670 byte_stream[ *byte_stream_index ] = 0x83;
2671 break;
2672
2673 case 0x02c6:
2674 byte_stream[ *byte_stream_index ] = 0x88;
2675 break;
2676
2677 case 0x060c:
2678 byte_stream[ *byte_stream_index ] = 0xa1;
2679 break;
2680
2681 case 0x0679:
2682 byte_stream[ *byte_stream_index ] = 0x8a;
2683 break;
2684
2685 case 0x067e:
2686 byte_stream[ *byte_stream_index ] = 0x81;
2687 break;
2688
2689 case 0x0686:
2690 byte_stream[ *byte_stream_index ] = 0x8d;
2691 break;
2692
2693 case 0x0688:
2694 byte_stream[ *byte_stream_index ] = 0x8f;
2695 break;
2696
2697 case 0x0691:
2698 byte_stream[ *byte_stream_index ] = 0x9a;
2699 break;
2700
2701 case 0x0698:
2702 byte_stream[ *byte_stream_index ] = 0x8e;
2703 break;
2704
2705 case 0x06a9:
2706 byte_stream[ *byte_stream_index ] = 0x98;
2707 break;
2708
2709 case 0x06af:
2710 byte_stream[ *byte_stream_index ] = 0x90;
2711 break;
2712
2713 case 0x06ba:
2714 byte_stream[ *byte_stream_index ] = 0x9f;
2715 break;
2716
2717 case 0x06be:
2718 byte_stream[ *byte_stream_index ] = 0xaa;
2719 break;
2720
2721 case 0x06c1:
2722 byte_stream[ *byte_stream_index ] = 0xc0;
2723 break;
2724
2725 case 0x06d2:
2726 byte_stream[ *byte_stream_index ] = 0xff;
2727 break;
2728
2729 case 0x2030:
2730 byte_stream[ *byte_stream_index ] = 0x89;
2731 break;
2732
2733 case 0x2039:
2734 byte_stream[ *byte_stream_index ] = 0x8b;
2735 break;
2736
2737 case 0x203a:
2738 byte_stream[ *byte_stream_index ] = 0x9b;
2739 break;
2740
2741 case 0x20ac:
2742 byte_stream[ *byte_stream_index ] = 0x80;
2743 break;
2744
2745 case 0x2122:
2746 byte_stream[ *byte_stream_index ] = 0x99;
2747 break;
2748
2749 default:
2750 byte_stream[ *byte_stream_index ] = 0x1a;
2751 break;
2752 }
2753 *byte_stream_index += 1;
2754
2755 break;
2756
2757 case LIBUNA_CODEPAGE_WINDOWS_1257:
2758 if( unicode_character < 0x0080 )
2759 {
2760 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2761 }
2762 else if( ( unicode_character >= 0x00a0 )
2763 && ( unicode_character < 0x0180 ) )
2764 {
2765 unicode_character -= 0x00a0;
2766
2767 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1257_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2768 }
2769 else if( ( unicode_character >= 0x2010 )
2770 && ( unicode_character < 0x2028 ) )
2771 {
2772 unicode_character -= 0x2010;
2773
2774 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1257_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2775 }
2776 else switch( unicode_character )
2777 {
2778 case 0x02c7:
2779 byte_stream[ *byte_stream_index ] = 0x8e;
2780 break;
2781
2782 case 0x02d9:
2783 byte_stream[ *byte_stream_index ] = 0xff;
2784 break;
2785
2786 case 0x02db:
2787 byte_stream[ *byte_stream_index ] = 0x9e;
2788 break;
2789
2790 case 0x2030:
2791 byte_stream[ *byte_stream_index ] = 0x89;
2792 break;
2793
2794 case 0x2039:
2795 byte_stream[ *byte_stream_index ] = 0x8b;
2796 break;
2797
2798 case 0x203a:
2799 byte_stream[ *byte_stream_index ] = 0x9b;
2800 break;
2801
2802 case 0x20ac:
2803 byte_stream[ *byte_stream_index ] = 0x80;
2804 break;
2805
2806 case 0x2122:
2807 byte_stream[ *byte_stream_index ] = 0x99;
2808 break;
2809
2810 default:
2811 byte_stream[ *byte_stream_index ] = 0x1a;
2812 break;
2813 }
2814 *byte_stream_index += 1;
2815
2816 break;
2817
2818 case LIBUNA_CODEPAGE_WINDOWS_1258:
2819 if( unicode_character < 0x0080 )
2820 {
2821 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2822 }
2823 else if( ( unicode_character >= 0x00a0 )
2824 && ( unicode_character < 0x00c0 ) )
2825 {
2826 byte_stream[ *byte_stream_index ] = (uint8_t) unicode_character;
2827 }
2828 else if( ( unicode_character >= 0x00c0 )
2829 && ( unicode_character < 0x0108 ) )
2830 {
2831 unicode_character -= 0x00c0;
2832
2833 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1258_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
2834 }
2835 else if( ( unicode_character >= 0x2010 )
2836 && ( unicode_character < 0x2028 ) )
2837 {
2838 unicode_character -= 0x2010;
2839
2840 byte_stream[ *byte_stream_index ] = libuna_codepage_windows_1258_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2841 }
2842 else switch( unicode_character )
2843 {
2844 case 0x0110:
2845 byte_stream[ *byte_stream_index ] = 0xd0;
2846 break;
2847
2848 case 0x0111:
2849 byte_stream[ *byte_stream_index ] = 0xf0;
2850 break;
2851
2852 case 0x0152:
2853 byte_stream[ *byte_stream_index ] = 0x8c;
2854 break;
2855
2856 case 0x0153:
2857 byte_stream[ *byte_stream_index ] = 0x9c;
2858 break;
2859
2860 case 0x0178:
2861 byte_stream[ *byte_stream_index ] = 0x9f;
2862 break;
2863
2864 case 0x0192:
2865 byte_stream[ *byte_stream_index ] = 0x83;
2866 break;
2867
2868 case 0x01a0:
2869 byte_stream[ *byte_stream_index ] = 0xd5;
2870 break;
2871
2872 case 0x01a1:
2873 byte_stream[ *byte_stream_index ] = 0xf5;
2874 break;
2875
2876 case 0x01af:
2877 byte_stream[ *byte_stream_index ] = 0xdd;
2878 break;
2879
2880 case 0x01b0:
2881 byte_stream[ *byte_stream_index ] = 0xfd;
2882 break;
2883
2884 case 0x02c6:
2885 byte_stream[ *byte_stream_index ] = 0x88;
2886 break;
2887
2888 case 0x02dc:
2889 byte_stream[ *byte_stream_index ] = 0x98;
2890 break;
2891
2892 case 0x0300:
2893 byte_stream[ *byte_stream_index ] = 0xcc;
2894 break;
2895
2896 case 0x0301:
2897 byte_stream[ *byte_stream_index ] = 0xec;
2898 break;
2899
2900 case 0x0303:
2901 byte_stream[ *byte_stream_index ] = 0xde;
2902 break;
2903
2904 case 0x0309:
2905 byte_stream[ *byte_stream_index ] = 0xd2;
2906 break;
2907
2908 case 0x0323:
2909 byte_stream[ *byte_stream_index ] = 0xf2;
2910 break;
2911
2912 case 0x2030:
2913 byte_stream[ *byte_stream_index ] = 0x89;
2914 break;
2915
2916 case 0x2039:
2917 byte_stream[ *byte_stream_index ] = 0x8b;
2918 break;
2919
2920 case 0x203a:
2921 byte_stream[ *byte_stream_index ] = 0x9b;
2922 break;
2923
2924 case 0x20ab:
2925 byte_stream[ *byte_stream_index ] = 0xfe;
2926 break;
2927
2928 case 0x20ac:
2929 byte_stream[ *byte_stream_index ] = 0x80;
2930 break;
2931
2932 case 0x2122:
2933 byte_stream[ *byte_stream_index ] = 0x99;
2934 break;
2935
2936 default:
2937 byte_stream[ *byte_stream_index ] = 0x1a;
2938 break;
2939 }
2940 *byte_stream_index += 1;
2941
2942 break;
2943
2944 default:
2945 libcerror_error_set(
2946 error,
2947 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2948 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2949 "%s: unsupported codepage: %d.",
2950 function,
2951 codepage );
2952
2953 return( -1 );
2954 }
2955 if( result != 1 )
2956 {
2957 libcerror_error_set(
2958 error,
2959 LIBCERROR_ERROR_DOMAIN_RUNTIME,
2960 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2961 "%s: unable to copy Unicode character to byte stream.",
2962 function );
2963
2964 return( -1 );
2965 }
2966 return( 1 );
2967 }
2968
2969 /* Determines the size of an UTF-7 stream character from an Unicode character
2970 * Adds the size to the UTF-7 stream character size value
2971 * Returns 1 if successful or -1 on error
2972 */
libuna_unicode_character_size_to_utf7_stream(libuna_unicode_character_t unicode_character,size_t * utf7_stream_character_size,uint32_t * utf7_stream_base64_data,libcerror_error_t ** error)2973 int libuna_unicode_character_size_to_utf7_stream(
2974 libuna_unicode_character_t unicode_character,
2975 size_t *utf7_stream_character_size,
2976 uint32_t *utf7_stream_base64_data,
2977 libcerror_error_t **error )
2978 {
2979 static char *function = "libuna_unicode_character_size_to_utf7_stream";
2980 libuna_utf16_character_t utf16_surrogate = 0;
2981 size_t safe_utf7_stream_character_size = 0;
2982 uint32_t base64_triplet = 0;
2983 uint32_t safe_utf7_stream_base64_data = 0;
2984 uint8_t base64_encode_character = 0;
2985 uint8_t byte_bit_shift = 0;
2986 uint8_t current_byte = 0;
2987 uint8_t number_of_bytes = 0;
2988
2989 if( utf7_stream_character_size == NULL )
2990 {
2991 libcerror_error_set(
2992 error,
2993 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2994 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2995 "%s: invalid UTF-7 stream character size.",
2996 function );
2997
2998 return( -1 );
2999 }
3000 if( utf7_stream_base64_data == NULL )
3001 {
3002 libcerror_error_set(
3003 error,
3004 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3005 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3006 "%s: invalid UTF-7 stream base64 data.",
3007 function );
3008
3009 return( -1 );
3010 }
3011 safe_utf7_stream_character_size = *utf7_stream_character_size;
3012 safe_utf7_stream_base64_data = *utf7_stream_base64_data;
3013
3014 /* Determine if the Unicode character is valid
3015 */
3016 if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
3017 {
3018 unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
3019 }
3020 /* The + character must be escaped
3021 */
3022 if( unicode_character == (libuna_unicode_character_t) '+' )
3023 {
3024 }
3025 /* Allow for the end of string character
3026 */
3027 else if( unicode_character == 0 )
3028 {
3029 }
3030 else if( ( unicode_character >= 256 )
3031 || ( libuna_unicode_character_utf7_valid_directly_encoded_character[ (uint8_t) unicode_character ] == 0 ) )
3032 {
3033 base64_encode_character = 1;
3034 }
3035 if( base64_encode_character == 0 )
3036 {
3037 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3038 {
3039 safe_utf7_stream_base64_data = 0;
3040 }
3041 safe_utf7_stream_character_size += 1;
3042
3043 /* The + character must be escaped
3044 */
3045 if( unicode_character == (libuna_unicode_character_t) '+' )
3046 {
3047 safe_utf7_stream_character_size += 1;
3048 }
3049 }
3050 else
3051 {
3052 /* Escape the base64 encoded characters with a +
3053 */
3054 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3055 {
3056 safe_utf7_stream_character_size += 1;
3057 }
3058 /* Otherwise continue the previously base64 encoded characters
3059 */
3060 else
3061 {
3062 base64_triplet = safe_utf7_stream_base64_data & 0x00ffffff;
3063 number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
3064 current_byte = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
3065
3066 if( number_of_bytes > 0 )
3067 {
3068 if( safe_utf7_stream_character_size < (size_t) ( number_of_bytes + 1 ) )
3069 {
3070 libcerror_error_set(
3071 error,
3072 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3073 LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3074 "%s: invalid UTF-7 stream character size value out of bounds.",
3075 function );
3076
3077 return( -1 );
3078 }
3079 /* Correct the size for the last partial base64 stream
3080 */
3081 safe_utf7_stream_character_size -= number_of_bytes + 1;
3082 }
3083 if( safe_utf7_stream_character_size < 1 )
3084 {
3085 libcerror_error_set(
3086 error,
3087 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3088 LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3089 "%s: invalid UTF-7 stream character size value out of bounds.",
3090 function );
3091
3092 return( -1 );
3093 }
3094 /* Correct the size for the base64 stream termination character
3095 */
3096 safe_utf7_stream_character_size -= 1;
3097 }
3098 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3099
3100 if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
3101 {
3102 unicode_character -= 0x010000;
3103
3104 utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
3105
3106 byte_bit_shift = 16 - ( current_byte * 8 );
3107 base64_triplet += (uint32_t) ( ( utf16_surrogate >> 8 ) & 0xff ) << byte_bit_shift;
3108 current_byte += 1;
3109 number_of_bytes += 1;
3110
3111 if( number_of_bytes == 3 )
3112 {
3113 safe_utf7_stream_character_size += 4;
3114 number_of_bytes = 0;
3115 current_byte = 0;
3116 base64_triplet = 0;
3117 }
3118 byte_bit_shift = 16 - ( current_byte * 8 );
3119 base64_triplet += (uint32_t) ( utf16_surrogate & 0xff ) << byte_bit_shift;
3120 current_byte += 1;
3121 number_of_bytes += 1;
3122
3123 if( number_of_bytes == 3 )
3124 {
3125 safe_utf7_stream_character_size += 4;
3126 number_of_bytes = 0;
3127 current_byte = 0;
3128 base64_triplet = 0;
3129 }
3130 unicode_character = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
3131 }
3132 byte_bit_shift = 16 - ( current_byte * 8 );
3133 base64_triplet += (uint32_t) ( ( unicode_character >> 8 ) & 0xff ) << byte_bit_shift;
3134 current_byte += 1;
3135 number_of_bytes += 1;
3136
3137 if( number_of_bytes == 3 )
3138 {
3139 safe_utf7_stream_character_size += 4;
3140 number_of_bytes = 0;
3141 current_byte = 0;
3142 base64_triplet = 0;
3143 }
3144 byte_bit_shift = 16 - ( current_byte * 8 );
3145 base64_triplet += (uint32_t) ( unicode_character & 0xff ) << byte_bit_shift;
3146 current_byte += 1;
3147 number_of_bytes += 1;
3148
3149 if( number_of_bytes == 3 )
3150 {
3151 safe_utf7_stream_character_size += 4;
3152 number_of_bytes = 0;
3153 current_byte = 0;
3154 base64_triplet = 0;
3155 }
3156 /* Terminate the base64 encoded characters
3157 */
3158 if( number_of_bytes > 0 )
3159 {
3160 safe_utf7_stream_character_size += number_of_bytes + 1;
3161 }
3162 safe_utf7_stream_character_size += 1;
3163 }
3164 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3165 {
3166 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3167 safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3168 safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3169 safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3170 }
3171 *utf7_stream_character_size = safe_utf7_stream_character_size;
3172 *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3173
3174 return( 1 );
3175 }
3176
3177 /* Copies an Unicode character from an UTF-7 stream
3178 * The bits of the base64 data contain:
3179 * 0 - 23 the base64 triplet
3180 * 24 - 25 the number of bytes in the triplet
3181 * 26 - 27 unused
3182 * 28 - 29 the current byte
3183 * 30 unused
3184 * 31 flag to indicate the current UTF-7 characters are (modified) base64 encoded
3185 *
3186 * Returns 1 if successful or -1 on error
3187 */
libuna_unicode_character_copy_from_utf7_stream(libuna_unicode_character_t * unicode_character,const uint8_t * utf7_stream,size_t utf7_stream_size,size_t * utf7_stream_index,uint32_t * utf7_stream_base64_data,libcerror_error_t ** error)3188 int libuna_unicode_character_copy_from_utf7_stream(
3189 libuna_unicode_character_t *unicode_character,
3190 const uint8_t *utf7_stream,
3191 size_t utf7_stream_size,
3192 size_t *utf7_stream_index,
3193 uint32_t *utf7_stream_base64_data,
3194 libcerror_error_t **error )
3195 {
3196 static char *function = "libuna_unicode_character_copy_from_utf7_stream";
3197 libuna_utf16_character_t utf16_surrogate = 0;
3198 size_t safe_utf7_stream_index = 0;
3199 uint32_t base64_triplet = 0;
3200 uint32_t safe_utf7_stream_base64_data = 0;
3201 uint8_t byte_bit_shift = 0;
3202 uint8_t current_byte = 0;
3203 uint8_t number_of_bytes = 0;
3204 uint8_t padding_size = 0;
3205 uint8_t utf7_character_value = 0;
3206
3207 if( unicode_character == NULL )
3208 {
3209 libcerror_error_set(
3210 error,
3211 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3212 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3213 "%s: invalid Unicode character.",
3214 function );
3215
3216 return( -1 );
3217 }
3218 if( utf7_stream == NULL )
3219 {
3220 libcerror_error_set(
3221 error,
3222 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3223 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3224 "%s: invalid UTF-7 stream.",
3225 function );
3226
3227 return( -1 );
3228 }
3229 if( utf7_stream_size > (size_t) SSIZE_MAX )
3230 {
3231 libcerror_error_set(
3232 error,
3233 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3234 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3235 "%s: invalid UTF-7 stream size value exceeds maximum.",
3236 function );
3237
3238 return( -1 );
3239 }
3240 if( utf7_stream_index == NULL )
3241 {
3242 libcerror_error_set(
3243 error,
3244 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3245 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3246 "%s: invalid UTF-7 stream index.",
3247 function );
3248
3249 return( -1 );
3250 }
3251 if( *utf7_stream_index >= utf7_stream_size )
3252 {
3253 libcerror_error_set(
3254 error,
3255 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3256 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3257 "%s: UTF-7 stream too small.",
3258 function );
3259
3260 return( -1 );
3261 }
3262 if( utf7_stream_base64_data == NULL )
3263 {
3264 libcerror_error_set(
3265 error,
3266 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3267 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3268 "%s: invalid UTF-7 base64 data.",
3269 function );
3270
3271 return( -1 );
3272 }
3273 safe_utf7_stream_index = *utf7_stream_index;
3274 safe_utf7_stream_base64_data = *utf7_stream_base64_data;
3275
3276 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3277 {
3278 base64_triplet = safe_utf7_stream_base64_data & 0x00ffffff;
3279 number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
3280 current_byte = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
3281
3282 if( current_byte >= number_of_bytes )
3283 {
3284 if( safe_utf7_stream_index >= utf7_stream_size )
3285 {
3286 libcerror_error_set(
3287 error,
3288 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3289 LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3290 "%s: invalid UTF-7 stream character size value out of bounds.",
3291 function );
3292
3293 return( -1 );
3294 }
3295 utf7_character_value = utf7_stream[ safe_utf7_stream_index ];
3296
3297 /* Any character not in the modified base64 alphabet terminates the base64 encoded sequence
3298 */
3299 if( libuna_unicode_character_utf7_valid_base64_character[ utf7_character_value ] == 0 )
3300 {
3301 safe_utf7_stream_base64_data = 0;
3302 }
3303 }
3304 }
3305 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3306 {
3307 if( safe_utf7_stream_index >= utf7_stream_size )
3308 {
3309 libcerror_error_set(
3310 error,
3311 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3312 LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3313 "%s: invalid UTF-7 stream character size value out of bounds.",
3314 function );
3315
3316 return( -1 );
3317 }
3318 utf7_character_value = utf7_stream[ safe_utf7_stream_index ];
3319
3320 /* Determine if the character is modified base64 encoded
3321 * or a + character
3322 */
3323 if( utf7_character_value == (uint8_t) '+' )
3324 {
3325 if( ( safe_utf7_stream_index + 1 ) >= utf7_stream_size )
3326 {
3327 libcerror_error_set(
3328 error,
3329 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3330 LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3331 "%s: invalid UTF-7 stream character size value out of bounds.",
3332 function );
3333
3334 return( -1 );
3335 }
3336 if( utf7_stream[ safe_utf7_stream_index + 1 ] != (uint8_t) '-' )
3337 {
3338 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3339
3340 safe_utf7_stream_index++;
3341 }
3342 }
3343 /* Allow for the end of string character
3344 */
3345 else if( utf7_character_value == 0 )
3346 {
3347 }
3348 else if( libuna_unicode_character_utf7_valid_directly_encoded_character[ utf7_character_value ] == 0 )
3349 {
3350 libcerror_error_set(
3351 error,
3352 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3353 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3354 "%s: invalid directly encoded UTF-7 character byte: 0x%02" PRIx8 ".",
3355 function,
3356 utf7_character_value );
3357
3358 return( -1 );
3359 }
3360 }
3361 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3362 {
3363 *unicode_character = utf7_stream[ safe_utf7_stream_index++ ];
3364
3365 if( ( *unicode_character == (libuna_unicode_character_t) '+' )
3366 && ( utf7_stream[ safe_utf7_stream_index ] == (uint8_t) '-' ) )
3367 {
3368 safe_utf7_stream_index++;
3369 }
3370 }
3371 else if( ( number_of_bytes == 0 )
3372 || ( current_byte >= number_of_bytes ) )
3373 {
3374 if( libuna_base64_triplet_copy_from_base64_stream(
3375 &base64_triplet,
3376 utf7_stream,
3377 utf7_stream_size,
3378 &safe_utf7_stream_index,
3379 &padding_size,
3380 LIBUNA_BASE64_VARIANT_UTF7,
3381 error ) != 1 )
3382 {
3383 libcerror_error_set(
3384 error,
3385 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3386 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3387 "%s: unable to copy base64 encoded UTF-7 characters.",
3388 function );
3389
3390 return( -1 );
3391 }
3392 if( padding_size > 2 )
3393 {
3394 libcerror_error_set(
3395 error,
3396 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3397 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3398 "%s: unsupported padding in base64 encoded UTF-7 characters.",
3399 function );
3400
3401 return( -1 );
3402 }
3403 number_of_bytes = 3 - padding_size;
3404 current_byte = 0;
3405 }
3406 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3407 {
3408 byte_bit_shift = 16 - ( current_byte * 8 );
3409 *unicode_character = ( ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL ) << 8;
3410 current_byte += 1;
3411
3412 if( current_byte >= number_of_bytes )
3413 {
3414 if( libuna_base64_triplet_copy_from_base64_stream(
3415 &base64_triplet,
3416 utf7_stream,
3417 utf7_stream_size,
3418 &safe_utf7_stream_index,
3419 &padding_size,
3420 LIBUNA_BASE64_VARIANT_UTF7,
3421 error ) != 1 )
3422 {
3423 libcerror_error_set(
3424 error,
3425 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3426 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3427 "%s: unable to copy base64 encoded UTF-7 characters.",
3428 function );
3429
3430 return( -1 );
3431 }
3432 if( padding_size > 2 )
3433 {
3434 libcerror_error_set(
3435 error,
3436 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3437 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3438 "%s: unsupported padding in base64 encoded UTF-7 characters.",
3439 function );
3440
3441 return( -1 );
3442 }
3443 number_of_bytes = 3 - padding_size;
3444 current_byte = 0;
3445 }
3446 byte_bit_shift = 16 - ( current_byte * 8 );
3447 *unicode_character += ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL;
3448 current_byte += 1;
3449
3450 if( ( *unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
3451 && ( *unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
3452 {
3453 if( current_byte >= number_of_bytes )
3454 {
3455 if( libuna_base64_triplet_copy_from_base64_stream(
3456 &base64_triplet,
3457 utf7_stream,
3458 utf7_stream_size,
3459 &safe_utf7_stream_index,
3460 &padding_size,
3461 LIBUNA_BASE64_VARIANT_UTF7,
3462 error ) != 1 )
3463 {
3464 libcerror_error_set(
3465 error,
3466 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3467 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3468 "%s: unable to copy base64 encoded UTF-7 characters.",
3469 function );
3470
3471 return( -1 );
3472 }
3473 if( padding_size > 2 )
3474 {
3475 libcerror_error_set(
3476 error,
3477 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3478 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3479 "%s: unsupported padding in base64 encoded UTF-7 characters.",
3480 function );
3481
3482 return( -1 );
3483 }
3484 number_of_bytes = 3 - padding_size;
3485 current_byte = 0;
3486 }
3487 byte_bit_shift = 16 - ( current_byte * 8 );
3488 utf16_surrogate = ( ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL ) << 8;
3489 current_byte += 1;
3490
3491 if( current_byte >= number_of_bytes )
3492 {
3493 if( libuna_base64_triplet_copy_from_base64_stream(
3494 &base64_triplet,
3495 utf7_stream,
3496 utf7_stream_size,
3497 &safe_utf7_stream_index,
3498 &padding_size,
3499 LIBUNA_BASE64_VARIANT_UTF7,
3500 error ) != 1 )
3501 {
3502 libcerror_error_set(
3503 error,
3504 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3505 LIBCERROR_RUNTIME_ERROR_GET_FAILED,
3506 "%s: unable to retrieve base64 encoded UTF-7 characters.",
3507 function );
3508
3509 return( -1 );
3510 }
3511 if( padding_size > 2 )
3512 {
3513 libcerror_error_set(
3514 error,
3515 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3516 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3517 "%s: unsupported padding in base64 encoded UTF-7 characters.",
3518 function );
3519
3520 return( -1 );
3521 }
3522 number_of_bytes = 3 - padding_size;
3523 current_byte = 0;
3524 }
3525 byte_bit_shift = 16 - ( current_byte * 8 );
3526 utf16_surrogate += ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL;
3527 current_byte += 1;
3528
3529 /* Determine if the UTF-16 character is within the low surrogate range
3530 */
3531 if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
3532 && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
3533 {
3534 *unicode_character -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
3535 *unicode_character <<= 10;
3536 *unicode_character += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END;
3537 *unicode_character += 0x010000;
3538 }
3539 else
3540 {
3541 *unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
3542 }
3543 }
3544 if( safe_utf7_stream_index >= utf7_stream_size )
3545 {
3546 libcerror_error_set(
3547 error,
3548 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3549 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3550 "%s: UTF-7 stream too small.",
3551 function );
3552
3553 return( -1 );
3554 }
3555 if( ( current_byte >= number_of_bytes )
3556 && ( utf7_stream[ safe_utf7_stream_index ] == (uint8_t) '-' ) )
3557 {
3558 safe_utf7_stream_base64_data = 0;
3559
3560 safe_utf7_stream_index++;
3561 }
3562 }
3563 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3564 {
3565 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3566 safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3567 safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3568 safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3569 }
3570 *utf7_stream_index = safe_utf7_stream_index;
3571 *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3572
3573 return( 1 );
3574 }
3575
3576 /* Copies an Unicode character into a UTF-7 stream
3577 * The bits of the base64 data contain:
3578 * 0 - 23 the base64 triplet
3579 * 24 - 25 the number of bytes in the triplet
3580 * 26 - 27 unused
3581 * 28 - 29 the current byte
3582 * 30 unused
3583 * 31 flag to indicate the current UTF-7 characters are (modified) base64 encoded
3584 *
3585 * Returns 1 if successful or -1 on error
3586 */
libuna_unicode_character_copy_to_utf7_stream(libuna_unicode_character_t unicode_character,uint8_t * utf7_stream,size_t utf7_stream_size,size_t * utf7_stream_index,uint32_t * utf7_stream_base64_data,libcerror_error_t ** error)3587 int libuna_unicode_character_copy_to_utf7_stream(
3588 libuna_unicode_character_t unicode_character,
3589 uint8_t *utf7_stream,
3590 size_t utf7_stream_size,
3591 size_t *utf7_stream_index,
3592 uint32_t *utf7_stream_base64_data,
3593 libcerror_error_t **error )
3594 {
3595 static char *function = "libuna_unicode_character_copy_to_utf7_stream";
3596 libuna_utf16_character_t utf16_surrogate = 0;
3597 size_t safe_utf7_stream_index = 0;
3598 uint32_t base64_triplet = 0;
3599 uint32_t safe_utf7_stream_base64_data = 0;
3600 uint8_t base64_encode_character = 0;
3601 uint8_t byte_bit_shift = 0;
3602 uint8_t current_byte = 0;
3603 uint8_t number_of_bytes = 0;
3604
3605 if( utf7_stream == NULL )
3606 {
3607 libcerror_error_set(
3608 error,
3609 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3610 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3611 "%s: invalid UTF-7 stream.",
3612 function );
3613
3614 return( -1 );
3615 }
3616 if( utf7_stream_size > (size_t) SSIZE_MAX )
3617 {
3618 libcerror_error_set(
3619 error,
3620 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3621 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3622 "%s: invalid UTF-7 stream size value exceeds maximum.",
3623 function );
3624
3625 return( -1 );
3626 }
3627 if( utf7_stream_index == NULL )
3628 {
3629 libcerror_error_set(
3630 error,
3631 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3632 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3633 "%s: invalid UTF-7 stream index.",
3634 function );
3635
3636 return( -1 );
3637 }
3638 if( utf7_stream_base64_data == NULL )
3639 {
3640 libcerror_error_set(
3641 error,
3642 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3643 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3644 "%s: invalid UTF-7 stream base64 data.",
3645 function );
3646
3647 return( -1 );
3648 }
3649 safe_utf7_stream_index = *utf7_stream_index;
3650 safe_utf7_stream_base64_data = *utf7_stream_base64_data;
3651
3652 /* Determine if the Unicode character is valid
3653 */
3654 if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
3655 {
3656 unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
3657 }
3658 /* A-Z is not a continous range on an EBCDIC based system
3659 * it consists of the ranges: A-I, J-R, S-Z
3660 */
3661 if( ( unicode_character >= 0x41 )
3662 && ( unicode_character <= 0x49 ) )
3663 {
3664 unicode_character = ( unicode_character - 0x41 ) + (libuna_unicode_character_t) 'A';
3665 }
3666 else if( ( unicode_character >= 0x4a )
3667 && ( unicode_character <= 0x52 ) )
3668 {
3669 unicode_character = ( unicode_character - 0x4a ) + (libuna_unicode_character_t) 'J';
3670 }
3671 else if( ( unicode_character >= 0x53 )
3672 && ( unicode_character <= 0x5a ) )
3673 {
3674 unicode_character = ( unicode_character - 0x53 ) + (libuna_unicode_character_t) 'S';
3675 }
3676 /* a-z is not a continous range on an EBCDIC based system
3677 * it consists of the ranges: a-i, j-r, s-z
3678 */
3679 else if( ( unicode_character >= 0x61 )
3680 && ( unicode_character <= 0x69 ) )
3681 {
3682 unicode_character = ( unicode_character - 0x61 ) + (libuna_unicode_character_t) 'a';
3683 }
3684 else if( ( unicode_character >= 0x6a )
3685 && ( unicode_character <= 0x72 ) )
3686 {
3687 unicode_character = ( unicode_character - 0x6a ) + (libuna_unicode_character_t) 'j';
3688 }
3689 else if( ( unicode_character >= 0x73 )
3690 && ( unicode_character <= 0x7a ) )
3691 {
3692 unicode_character = ( unicode_character - 0x73 ) + (libuna_unicode_character_t) 's';
3693 }
3694 /* 0-9
3695 */
3696 else if( ( unicode_character >= 0x30 )
3697 && ( unicode_character <= 0x39 ) )
3698 {
3699 unicode_character = ( unicode_character - 0x30 ) + (libuna_unicode_character_t) '0';
3700 }
3701 /* The + character must be escaped
3702 */
3703 else if( unicode_character == (libuna_unicode_character_t) '+' )
3704 {
3705 }
3706 /* Allow for the end of string character
3707 */
3708 else if( unicode_character == 0 )
3709 {
3710 }
3711 else if( ( unicode_character >= 256 )
3712 || ( libuna_unicode_character_utf7_valid_directly_encoded_character[ (uint8_t) unicode_character ] == 0 ) )
3713 {
3714 base64_encode_character = 1;
3715 }
3716 if( base64_encode_character == 0 )
3717 {
3718 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3719 {
3720 safe_utf7_stream_base64_data = 0;
3721 }
3722 if( safe_utf7_stream_index >= utf7_stream_size )
3723 {
3724 libcerror_error_set(
3725 error,
3726 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3727 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3728 "%s: UTF-7 stream too small.",
3729 function );
3730
3731 return( -1 );
3732 }
3733 utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) unicode_character;
3734
3735 /* The + character must be escaped
3736 */
3737 if( unicode_character == (libuna_unicode_character_t) '+' )
3738 {
3739 if( safe_utf7_stream_index >= utf7_stream_size )
3740 {
3741 libcerror_error_set(
3742 error,
3743 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3744 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3745 "%s: UTF-7 stream too small.",
3746 function );
3747
3748 return( -1 );
3749 }
3750 utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '-';
3751 }
3752 }
3753 else
3754 {
3755 /* Escape the base64 encoded chracters with a +
3756 */
3757 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3758 {
3759 if( safe_utf7_stream_index >= utf7_stream_size )
3760 {
3761 libcerror_error_set(
3762 error,
3763 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3764 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3765 "%s: UTF-7 stream too small.",
3766 function );
3767
3768 return( -1 );
3769 }
3770 utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '+';
3771 }
3772 /* Otherwise continue the previously base64 encoded characters
3773 */
3774 else
3775 {
3776 base64_triplet = safe_utf7_stream_base64_data & 0x00ffffff;
3777 number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
3778 current_byte = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
3779
3780 if( number_of_bytes > 0 )
3781 {
3782 /* Correct the index for the last partial base64 stream
3783 */
3784 safe_utf7_stream_index -= number_of_bytes + 1;
3785 }
3786 /* Correct the index for the base64 stream termination character
3787 */
3788 safe_utf7_stream_index -= 1;
3789 }
3790 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3791
3792 if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
3793 {
3794 unicode_character -= 0x010000;
3795
3796 utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 )
3797 + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
3798
3799 byte_bit_shift = 16 - ( current_byte * 8 );
3800 base64_triplet += (uint32_t) ( ( utf16_surrogate >> 8 ) & 0xff ) << byte_bit_shift;
3801 current_byte += 1;
3802 number_of_bytes += 1;
3803
3804 if( number_of_bytes == 3 )
3805 {
3806 if( libuna_base64_triplet_copy_to_base64_stream(
3807 base64_triplet,
3808 utf7_stream,
3809 utf7_stream_size,
3810 &safe_utf7_stream_index,
3811 0,
3812 LIBUNA_BASE64_VARIANT_UTF7,
3813 error ) != 1 )
3814 {
3815 libcerror_error_set(
3816 error,
3817 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3818 LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3819 "%s: unable to set base64 encoded UTF-7 characters.",
3820 function );
3821
3822 return( -1 );
3823 }
3824 number_of_bytes = 0;
3825 current_byte = 0;
3826 base64_triplet = 0;
3827 }
3828 byte_bit_shift = 16 - ( current_byte * 8 );
3829 base64_triplet += (uint32_t) ( utf16_surrogate & 0xff ) << byte_bit_shift;
3830 current_byte += 1;
3831 number_of_bytes += 1;
3832
3833 if( number_of_bytes == 3 )
3834 {
3835 if( libuna_base64_triplet_copy_to_base64_stream(
3836 base64_triplet,
3837 utf7_stream,
3838 utf7_stream_size,
3839 &safe_utf7_stream_index,
3840 0,
3841 LIBUNA_BASE64_VARIANT_UTF7,
3842 error ) != 1 )
3843 {
3844 libcerror_error_set(
3845 error,
3846 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3847 LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3848 "%s: unable to set base64 encoded UTF-7 characters.",
3849 function );
3850
3851 return( -1 );
3852 }
3853 number_of_bytes = 0;
3854 current_byte = 0;
3855 base64_triplet = 0;
3856 }
3857 unicode_character = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff )
3858 + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
3859 }
3860 byte_bit_shift = 16 - ( current_byte * 8 );
3861 base64_triplet += (uint32_t) ( ( unicode_character >> 8 ) & 0xff ) << byte_bit_shift;
3862 current_byte += 1;
3863 number_of_bytes += 1;
3864
3865 if( number_of_bytes == 3 )
3866 {
3867 if( libuna_base64_triplet_copy_to_base64_stream(
3868 base64_triplet,
3869 utf7_stream,
3870 utf7_stream_size,
3871 &safe_utf7_stream_index,
3872 0,
3873 LIBUNA_BASE64_VARIANT_UTF7,
3874 error ) != 1 )
3875 {
3876 libcerror_error_set(
3877 error,
3878 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3879 LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3880 "%s: unable to set base64 encoded UTF-7 characters.",
3881 function );
3882
3883 return( -1 );
3884 }
3885 number_of_bytes = 0;
3886 current_byte = 0;
3887 base64_triplet = 0;
3888 }
3889 byte_bit_shift = 16 - ( current_byte * 8 );
3890 base64_triplet += (uint32_t) ( unicode_character & 0xff ) << byte_bit_shift;
3891 current_byte += 1;
3892 number_of_bytes += 1;
3893
3894 if( number_of_bytes == 3 )
3895 {
3896 if( libuna_base64_triplet_copy_to_base64_stream(
3897 base64_triplet,
3898 utf7_stream,
3899 utf7_stream_size,
3900 &safe_utf7_stream_index,
3901 0,
3902 LIBUNA_BASE64_VARIANT_UTF7,
3903 error ) != 1 )
3904 {
3905 libcerror_error_set(
3906 error,
3907 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3908 LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3909 "%s: unable to set base64 encoded UTF-7 characters.",
3910 function );
3911
3912 return( -1 );
3913 }
3914 number_of_bytes = 0;
3915 current_byte = 0;
3916 base64_triplet = 0;
3917 }
3918 /* Terminate the base64 encoded characters
3919 */
3920 if( number_of_bytes > 0 )
3921 {
3922 if( libuna_base64_triplet_copy_to_base64_stream(
3923 base64_triplet,
3924 utf7_stream,
3925 utf7_stream_size,
3926 &safe_utf7_stream_index,
3927 3 - number_of_bytes,
3928 LIBUNA_BASE64_VARIANT_UTF7,
3929 error ) != 1 )
3930 {
3931 libcerror_error_set(
3932 error,
3933 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3934 LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3935 "%s: unable to set base64 encoded UTF-7 characters.",
3936 function );
3937
3938 return( -1 );
3939 }
3940 }
3941 if( safe_utf7_stream_index >= utf7_stream_size )
3942 {
3943 libcerror_error_set(
3944 error,
3945 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3946 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3947 "%s: UTF-7 stream too small.",
3948 function );
3949
3950 return( -1 );
3951 }
3952 utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '-';
3953 }
3954 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3955 {
3956 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3957 safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3958 safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3959 safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3960 }
3961 *utf7_stream_index = safe_utf7_stream_index;
3962 *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3963
3964 return( 1 );
3965 }
3966
3967 /* Determines the size of an UTF-8 character from an Unicode character
3968 * Adds the size to the UTF-8 character size value
3969 * Returns 1 if successful or -1 on error
3970 */
libuna_unicode_character_size_to_utf8(libuna_unicode_character_t unicode_character,size_t * utf8_character_size,libcerror_error_t ** error)3971 int libuna_unicode_character_size_to_utf8(
3972 libuna_unicode_character_t unicode_character,
3973 size_t *utf8_character_size,
3974 libcerror_error_t **error )
3975 {
3976 static char *function = "libuna_unicode_character_size_to_utf8";
3977
3978 if( utf8_character_size == NULL )
3979 {
3980 libcerror_error_set(
3981 error,
3982 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3983 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3984 "%s: invalid UTF-8 character size.",
3985 function );
3986
3987 return( -1 );
3988 }
3989 if( unicode_character < 0x00000080UL )
3990 {
3991 *utf8_character_size += 1;
3992 }
3993 else if( unicode_character < 0x00000800UL )
3994 {
3995 *utf8_character_size += 2;
3996 }
3997 else if( unicode_character < 0x00010000UL )
3998 {
3999 *utf8_character_size += 3;
4000 }
4001 else if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
4002 {
4003 *utf8_character_size += 3;
4004 }
4005 else
4006 {
4007 *utf8_character_size += 4;
4008 }
4009
4010 /* If UTF-8 USC support is needed it should be implemented in
4011 * utf8_usc or something, but for now leave this here as a reminder
4012
4013 else if( unicode_character < 0x010000 )
4014 {
4015 *utf8_character_size += 3;
4016 }
4017 else if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
4018 {
4019 *utf8_character_size += 2;
4020 }
4021 else if( unicode_character < 0x0200000 )
4022 {
4023 *utf8_character_size += 4;
4024 }
4025 else if( unicode_character < 0x0400000 )
4026 {
4027 *utf8_character_size += 5;
4028 }
4029 else
4030 {
4031 *utf8_character_size += 6;
4032 }
4033 */
4034 return( 1 );
4035 }
4036
4037 /* Copies an Unicode character from an UTF-8 string
4038 * Returns 1 if successful or -1 on error
4039 */
libuna_unicode_character_copy_from_utf8(libuna_unicode_character_t * unicode_character,const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,libcerror_error_t ** error)4040 int libuna_unicode_character_copy_from_utf8(
4041 libuna_unicode_character_t *unicode_character,
4042 const libuna_utf8_character_t *utf8_string,
4043 size_t utf8_string_size,
4044 size_t *utf8_string_index,
4045 libcerror_error_t **error )
4046 {
4047 static char *function = "libuna_unicode_character_copy_from_utf8";
4048 uint8_t utf8_character_additional_bytes = 0;
4049
4050 if( unicode_character == NULL )
4051 {
4052 libcerror_error_set(
4053 error,
4054 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4055 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4056 "%s: invalid Unicode character.",
4057 function );
4058
4059 return( -1 );
4060 }
4061 if( utf8_string == NULL )
4062 {
4063 libcerror_error_set(
4064 error,
4065 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4066 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4067 "%s: invalid UTF-8 string.",
4068 function );
4069
4070 return( -1 );
4071 }
4072 if( utf8_string_size > (size_t) SSIZE_MAX )
4073 {
4074 libcerror_error_set(
4075 error,
4076 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4077 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4078 "%s: invalid UTF-8 string size value exceeds maximum.",
4079 function );
4080
4081 return( -1 );
4082 }
4083 if( utf8_string_index == NULL )
4084 {
4085 libcerror_error_set(
4086 error,
4087 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4088 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4089 "%s: invalid UTF-8 string index.",
4090 function );
4091
4092 return( -1 );
4093 }
4094 if( *utf8_string_index >= utf8_string_size )
4095 {
4096 libcerror_error_set(
4097 error,
4098 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4099 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4100 "%s: UTF-8 string too small.",
4101 function );
4102
4103 return( -1 );
4104 }
4105 /* Determine the number of additional bytes of the UTF-8 character
4106 */
4107 if( utf8_string[ *utf8_string_index ] < 0xc0 )
4108 {
4109 utf8_character_additional_bytes = 0;
4110 }
4111 else if( utf8_string[ *utf8_string_index ] < 0xe0 )
4112 {
4113 utf8_character_additional_bytes = 1;
4114 }
4115 else if( utf8_string[ *utf8_string_index ] < 0xf0 )
4116 {
4117 utf8_character_additional_bytes = 2;
4118 }
4119 else if( utf8_string[ *utf8_string_index ] < 0xf8 )
4120 {
4121 utf8_character_additional_bytes = 3;
4122 }
4123 else if( utf8_string[ *utf8_string_index ] < 0xfc )
4124 {
4125 utf8_character_additional_bytes = 4;
4126 }
4127 else
4128 {
4129 utf8_character_additional_bytes = 5;
4130 }
4131 if( ( *utf8_string_index + utf8_character_additional_bytes + 1 ) > utf8_string_size )
4132 {
4133 libcerror_error_set(
4134 error,
4135 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4136 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4137 "%s: missing UTF-8 character bytes.",
4138 function );
4139
4140 return( -1 );
4141 }
4142 /* Determine the UTF-8 character and make sure it is valid
4143 * Unicode limits the UTF-8 character to consist of a maximum of 4 bytes
4144 * while ISO 10646 Universal Character Set (UCS) allows up to 6 bytes
4145 */
4146 if( utf8_string[ *utf8_string_index ] > 0xf4 )
4147 {
4148 libcerror_error_set(
4149 error,
4150 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4151 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4152 "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
4153 function,
4154 utf8_string[ *utf8_string_index ] );
4155
4156 return( -1 );
4157 }
4158 *unicode_character = utf8_string[ *utf8_string_index ];
4159
4160 if( utf8_character_additional_bytes == 0 )
4161 {
4162 if( ( utf8_string[ *utf8_string_index ] >= 0x80 )
4163 && ( utf8_string[ *utf8_string_index ] < 0xc2 ) )
4164 {
4165 libcerror_error_set(
4166 error,
4167 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4168 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4169 "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
4170 function,
4171 utf8_string[ *utf8_string_index ] );
4172
4173 return( -1 );
4174 }
4175 }
4176 if( utf8_character_additional_bytes >= 1 )
4177 {
4178 if( utf8_string[ *utf8_string_index + 1 ] > 0xbf )
4179 {
4180 libcerror_error_set(
4181 error,
4182 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4183 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4184 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4185 function,
4186 utf8_string[ *utf8_string_index + 1 ] );
4187
4188 return( -1 );
4189 }
4190 if( ( utf8_string[ *utf8_string_index ] == 0xe0 )
4191 && ( utf8_string[ *utf8_string_index + 1 ] < 0xa0 ) )
4192 {
4193 libcerror_error_set(
4194 error,
4195 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4196 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4197 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4198 function,
4199 utf8_string[ *utf8_string_index + 1 ] );
4200
4201 return( -1 );
4202 }
4203 else if( ( utf8_string[ *utf8_string_index ] == 0xed )
4204 && ( utf8_string[ *utf8_string_index + 1 ] > 0x9f ) )
4205 {
4206 libcerror_error_set(
4207 error,
4208 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4209 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4210 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4211 function,
4212 utf8_string[ *utf8_string_index + 1 ] );
4213
4214 return( -1 );
4215 }
4216 else if( ( utf8_string[ *utf8_string_index ] == 0xf0 )
4217 && ( utf8_string[ *utf8_string_index + 1 ] < 0x90 ) )
4218 {
4219 libcerror_error_set(
4220 error,
4221 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4222 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4223 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4224 function,
4225 utf8_string[ *utf8_string_index + 1 ] );
4226
4227 return( -1 );
4228 }
4229 else if( ( utf8_string[ *utf8_string_index ] == 0xf4 )
4230 && ( utf8_string[ *utf8_string_index + 1 ] > 0x8f ) )
4231 {
4232 libcerror_error_set(
4233 error,
4234 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4235 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4236 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4237 function,
4238 utf8_string[ *utf8_string_index + 1 ] );
4239
4240 return( -1 );
4241 }
4242 else if( utf8_string[ *utf8_string_index + 1 ] < 0x80 )
4243 {
4244 libcerror_error_set(
4245 error,
4246 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4247 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4248 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4249 function,
4250 utf8_string[ *utf8_string_index + 1 ] );
4251
4252 return( -1 );
4253 }
4254 *unicode_character <<= 6;
4255 *unicode_character += utf8_string[ *utf8_string_index + 1 ];
4256
4257 if( utf8_character_additional_bytes == 1 )
4258 {
4259 *unicode_character -= 0x03080;
4260 }
4261 }
4262 if( utf8_character_additional_bytes >= 2 )
4263 {
4264 if( ( utf8_string[ *utf8_string_index + 2 ] < 0x80 )
4265 || ( utf8_string[ *utf8_string_index + 2 ] > 0xbf ) )
4266 {
4267 libcerror_error_set(
4268 error,
4269 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4270 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4271 "%s: invalid 3rd UTF-8 character byte: 0x%02" PRIx8 ".",
4272 function,
4273 utf8_string[ *utf8_string_index + 2 ] );
4274
4275 return( -1 );
4276 }
4277 *unicode_character <<= 6;
4278 *unicode_character += utf8_string[ *utf8_string_index + 2 ];
4279
4280 if( utf8_character_additional_bytes == 2 )
4281 {
4282 *unicode_character -= 0x0e2080;
4283 }
4284 }
4285 if( utf8_character_additional_bytes >= 3 )
4286 {
4287 if( ( utf8_string[ *utf8_string_index + 3 ] < 0x80 )
4288 || ( utf8_string[ *utf8_string_index + 3 ] > 0xbf ) )
4289 {
4290 libcerror_error_set(
4291 error,
4292 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4293 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4294 "%s: invalid 4th UTF-8 character byte: 0x%02" PRIx8 ".",
4295 function,
4296 utf8_string[ *utf8_string_index + 3 ] );
4297
4298 return( -1 );
4299 }
4300 *unicode_character <<= 6;
4301 *unicode_character += utf8_string[ *utf8_string_index + 3 ];
4302
4303 if( utf8_character_additional_bytes == 3 )
4304 {
4305 *unicode_character -= 0x03c82080;
4306 }
4307 }
4308 if( utf8_character_additional_bytes >= 4 )
4309 {
4310 if( ( utf8_string[ *utf8_string_index + 4 ] < 0x80 )
4311 || ( utf8_string[ *utf8_string_index + 4 ] > 0xbf ) )
4312 {
4313 libcerror_error_set(
4314 error,
4315 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4316 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4317 "%s: invalid 5th UTF-8 character byte: 0x%02" PRIx8 ".",
4318 function,
4319 utf8_string[ *utf8_string_index + 4 ] );
4320
4321 return( -1 );
4322 }
4323 *unicode_character <<= 6;
4324 *unicode_character += utf8_string[ *utf8_string_index + 4 ];
4325
4326 if( utf8_character_additional_bytes == 4 )
4327 {
4328 *unicode_character -= 0x0fa082080;
4329 }
4330 }
4331 if( utf8_character_additional_bytes == 5 )
4332 {
4333 if( ( utf8_string[ *utf8_string_index + 5 ] < 0x80 )
4334 || ( utf8_string[ *utf8_string_index + 5 ] > 0xbf ) )
4335 {
4336 libcerror_error_set(
4337 error,
4338 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4339 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4340 "%s: invalid 6th UTF-8 character byte: 0x%02" PRIx8 ".",
4341 function,
4342 utf8_string[ *utf8_string_index + 5 ] );
4343
4344 return( -1 );
4345 }
4346 *unicode_character <<= 6;
4347 *unicode_character += utf8_string[ *utf8_string_index + 5 ];
4348 *unicode_character -= 0x082082080;
4349 }
4350 /* Determine if the Unicode character is valid
4351 */
4352 if( *unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
4353 {
4354 *unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4355 }
4356 *utf8_string_index += 1 + utf8_character_additional_bytes;
4357
4358 return( 1 );
4359 }
4360
4361 /* Copies an Unicode character into a UTF-8 string
4362 * Returns 1 if successful or -1 on error
4363 */
libuna_unicode_character_copy_to_utf8(libuna_unicode_character_t unicode_character,libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,libcerror_error_t ** error)4364 int libuna_unicode_character_copy_to_utf8(
4365 libuna_unicode_character_t unicode_character,
4366 libuna_utf8_character_t *utf8_string,
4367 size_t utf8_string_size,
4368 size_t *utf8_string_index,
4369 libcerror_error_t **error )
4370 {
4371 static char *function = "libuna_unicode_character_copy_to_utf8";
4372 uint8_t utf8_character_additional_bytes = 0;
4373 uint8_t utf8_character_iterator = 0;
4374 uint8_t utf8_first_character_mark = 0;
4375
4376 if( utf8_string == NULL )
4377 {
4378 libcerror_error_set(
4379 error,
4380 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4381 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4382 "%s: invalid UTF-8 string.",
4383 function );
4384
4385 return( -1 );
4386 }
4387 if( utf8_string_size > (size_t) SSIZE_MAX )
4388 {
4389 libcerror_error_set(
4390 error,
4391 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4392 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4393 "%s: invalid UTF-8 string size value exceeds maximum.",
4394 function );
4395
4396 return( -1 );
4397 }
4398 if( utf8_string_index == NULL )
4399 {
4400 libcerror_error_set(
4401 error,
4402 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4403 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4404 "%s: invalid UTF-8 string index.",
4405 function );
4406
4407 return( -1 );
4408 }
4409 if( *utf8_string_index >= utf8_string_size )
4410 {
4411 libcerror_error_set(
4412 error,
4413 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4414 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4415 "%s: UTF-8 string too small.",
4416 function );
4417
4418 return( -1 );
4419 }
4420 /* Determine if the Unicode character is valid
4421 */
4422 if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
4423 {
4424 unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4425 }
4426 /* Determine how many UTF-8 character bytes are required
4427 */
4428 if( unicode_character < 0x080 )
4429 {
4430 utf8_character_additional_bytes = 0;
4431 utf8_first_character_mark = 0;
4432 }
4433 else if( unicode_character < 0x0800 )
4434 {
4435 utf8_character_additional_bytes = 1;
4436 utf8_first_character_mark = 0x0c0;
4437 }
4438 else if( unicode_character < 0x010000 )
4439 {
4440 utf8_character_additional_bytes = 2;
4441 utf8_first_character_mark = 0x0e0;
4442 }
4443 else if( unicode_character < 0x0200000 )
4444 {
4445 utf8_character_additional_bytes = 3;
4446 utf8_first_character_mark = 0x0f0;
4447 }
4448 else if( unicode_character < 0x0400000 )
4449 {
4450 utf8_character_additional_bytes = 4;
4451 utf8_first_character_mark = 0x0f8;
4452 }
4453 else
4454 {
4455 utf8_character_additional_bytes = 5;
4456 utf8_first_character_mark = 0x0fc;
4457 }
4458 /* Convert Unicode character into UTF-8 character bytes
4459 */
4460 if( ( *utf8_string_index + utf8_character_additional_bytes ) >= utf8_string_size )
4461 {
4462 libcerror_error_set(
4463 error,
4464 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4465 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4466 "%s: UTF-8 string too small.",
4467 function );
4468
4469 return( -1 );
4470 }
4471 for( utf8_character_iterator = utf8_character_additional_bytes;
4472 utf8_character_iterator > 0;
4473 utf8_character_iterator-- )
4474 {
4475 utf8_string[ *utf8_string_index + utf8_character_iterator ] = (libuna_utf8_character_t) ( ( unicode_character & 0x0bf ) | 0x080 );
4476
4477 unicode_character >>= 6;
4478 }
4479 utf8_string[ *utf8_string_index ] = (libuna_utf8_character_t) ( unicode_character | utf8_first_character_mark );
4480
4481 *utf8_string_index += 1 + utf8_character_additional_bytes;
4482
4483 return( 1 );
4484 }
4485
4486 /* Determines the size of an UTF-16 character from an Unicode character
4487 * Adds the size to the UTF-16 character size value
4488 * Returns 1 if successful or -1 on error
4489 */
libuna_unicode_character_size_to_utf16(libuna_unicode_character_t unicode_character,size_t * utf16_character_size,libcerror_error_t ** error)4490 int libuna_unicode_character_size_to_utf16(
4491 libuna_unicode_character_t unicode_character,
4492 size_t *utf16_character_size,
4493 libcerror_error_t **error )
4494 {
4495 static char *function = "libuna_unicode_character_size_to_utf16";
4496
4497 if( utf16_character_size == NULL )
4498 {
4499 libcerror_error_set(
4500 error,
4501 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4502 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4503 "%s: invalid UTF-16 character size.",
4504 function );
4505
4506 return( -1 );
4507 }
4508 if( ( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
4509 && ( unicode_character <= LIBUNA_UTF16_CHARACTER_MAX ) )
4510 {
4511 *utf16_character_size += 2;
4512 }
4513 else
4514 {
4515 *utf16_character_size += 1;
4516 }
4517 return( 1 );
4518 }
4519
4520 /* Copies an Unicode character from an UTF-16 string
4521 * Returns 1 if successful or -1 on error
4522 */
libuna_unicode_character_copy_from_utf16(libuna_unicode_character_t * unicode_character,const libuna_utf16_character_t * utf16_string,size_t utf16_string_size,size_t * utf16_string_index,libcerror_error_t ** error)4523 int libuna_unicode_character_copy_from_utf16(
4524 libuna_unicode_character_t *unicode_character,
4525 const libuna_utf16_character_t *utf16_string,
4526 size_t utf16_string_size,
4527 size_t *utf16_string_index,
4528 libcerror_error_t **error )
4529 {
4530 static char *function = "libuna_unicode_character_copy_from_utf16";
4531 libuna_utf16_character_t utf16_surrogate = 0;
4532
4533 if( unicode_character == NULL )
4534 {
4535 libcerror_error_set(
4536 error,
4537 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4538 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4539 "%s: invalid Unicode character.",
4540 function );
4541
4542 return( -1 );
4543 }
4544 if( utf16_string == NULL )
4545 {
4546 libcerror_error_set(
4547 error,
4548 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4549 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4550 "%s: invalid UTF-16 string.",
4551 function );
4552
4553 return( -1 );
4554 }
4555 if( utf16_string_size > (size_t) SSIZE_MAX )
4556 {
4557 libcerror_error_set(
4558 error,
4559 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4560 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4561 "%s: invalid UTF-16 string size value exceeds maximum.",
4562 function );
4563
4564 return( -1 );
4565 }
4566 if( utf16_string_index == NULL )
4567 {
4568 libcerror_error_set(
4569 error,
4570 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4571 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4572 "%s: invalid UTF-16 string index.",
4573 function );
4574
4575 return( -1 );
4576 }
4577 if( *utf16_string_index >= utf16_string_size )
4578 {
4579 libcerror_error_set(
4580 error,
4581 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4582 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4583 "%s: UTF-16 string too small.",
4584 function );
4585
4586 return( -1 );
4587 }
4588 *unicode_character = utf16_string[ *utf16_string_index ];
4589 *utf16_string_index += 1;
4590
4591 /* Determine if the UTF-16 character is within the high surrogate range
4592 */
4593 if( ( *unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4594 && ( *unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
4595 {
4596 if( *utf16_string_index >= utf16_string_size )
4597 {
4598 libcerror_error_set(
4599 error,
4600 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4601 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4602 "%s: missing surrogate UTF-16 character bytes.",
4603 function );
4604
4605 return( -1 );
4606 }
4607 utf16_surrogate = utf16_string[ *utf16_string_index ];
4608 *utf16_string_index += 1;
4609
4610 /* Determine if the UTF-16 character is within the low surrogate range
4611 */
4612 if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
4613 && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4614 {
4615 *unicode_character -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
4616 *unicode_character <<= 10;
4617 *unicode_character += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END;
4618 *unicode_character += 0x010000;
4619 }
4620 else
4621 {
4622 *unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4623 }
4624 }
4625 /* Determine if the Unicode character is valid
4626 */
4627 else if( ( *unicode_character >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
4628 && ( *unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4629 {
4630 *unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4631 }
4632 return( 1 );
4633 }
4634
4635 /* Copies an Unicode character into a UTF-16 string
4636 * Returns 1 if successful or -1 on error
4637 */
libuna_unicode_character_copy_to_utf16(libuna_unicode_character_t unicode_character,libuna_utf16_character_t * utf16_string,size_t utf16_string_size,size_t * utf16_string_index,libcerror_error_t ** error)4638 int libuna_unicode_character_copy_to_utf16(
4639 libuna_unicode_character_t unicode_character,
4640 libuna_utf16_character_t *utf16_string,
4641 size_t utf16_string_size,
4642 size_t *utf16_string_index,
4643 libcerror_error_t **error )
4644 {
4645 static char *function = "libuna_unicode_character_copy_to_utf16";
4646
4647 if( utf16_string == NULL )
4648 {
4649 libcerror_error_set(
4650 error,
4651 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4652 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4653 "%s: invalid UTF-16 string.",
4654 function );
4655
4656 return( -1 );
4657 }
4658 if( utf16_string_size > (size_t) SSIZE_MAX )
4659 {
4660 libcerror_error_set(
4661 error,
4662 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4663 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4664 "%s: invalid UTF-16 string size value exceeds maximum.",
4665 function );
4666
4667 return( -1 );
4668 }
4669 if( utf16_string_index == NULL )
4670 {
4671 libcerror_error_set(
4672 error,
4673 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4674 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4675 "%s: invalid UTF-16 string index.",
4676 function );
4677
4678 return( -1 );
4679 }
4680 if( *utf16_string_index >= utf16_string_size )
4681 {
4682 libcerror_error_set(
4683 error,
4684 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4685 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4686 "%s: UTF-16 string too small.",
4687 function );
4688
4689 return( -1 );
4690 }
4691 /* Determine if the Unicode character is valid
4692 */
4693 if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4694 && ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4695 || ( unicode_character > LIBUNA_UTF16_CHARACTER_MAX ) )
4696 {
4697 unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4698 }
4699 if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
4700 {
4701 utf16_string[ *utf16_string_index ] = (libuna_utf16_character_t) unicode_character;
4702
4703 *utf16_string_index += 1;
4704 }
4705 else
4706 {
4707 if( ( *utf16_string_index + 1 ) >= utf16_string_size )
4708 {
4709 libcerror_error_set(
4710 error,
4711 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4712 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4713 "%s: UTF-16 string too small.",
4714 function );
4715
4716 return( -1 );
4717 }
4718 unicode_character -= 0x010000;
4719 utf16_string[ *utf16_string_index ] = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
4720 utf16_string[ *utf16_string_index + 1 ] = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
4721
4722 *utf16_string_index += 2;
4723 }
4724 return( 1 );
4725 }
4726
4727 /* Copies an Unicode character from an UTF-16 stream
4728 * Returns 1 if successful or -1 on error
4729 */
libuna_unicode_character_copy_from_utf16_stream(libuna_unicode_character_t * unicode_character,const uint8_t * utf16_stream,size_t utf16_stream_size,size_t * utf16_stream_index,int byte_order,libcerror_error_t ** error)4730 int libuna_unicode_character_copy_from_utf16_stream(
4731 libuna_unicode_character_t *unicode_character,
4732 const uint8_t *utf16_stream,
4733 size_t utf16_stream_size,
4734 size_t *utf16_stream_index,
4735 int byte_order,
4736 libcerror_error_t **error )
4737 {
4738 static char *function = "libuna_unicode_character_copy_from_utf16_stream";
4739 libuna_utf16_character_t utf16_surrogate = 0;
4740
4741 if( unicode_character == NULL )
4742 {
4743 libcerror_error_set(
4744 error,
4745 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4746 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4747 "%s: invalid Unicode character.",
4748 function );
4749
4750 return( -1 );
4751 }
4752 if( utf16_stream == NULL )
4753 {
4754 libcerror_error_set(
4755 error,
4756 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4757 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4758 "%s: invalid UTF-16 stream.",
4759 function );
4760
4761 return( -1 );
4762 }
4763 if( utf16_stream_size > (size_t) SSIZE_MAX )
4764 {
4765 libcerror_error_set(
4766 error,
4767 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4768 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4769 "%s: invalid UTF-16 stream size value exceeds maximum.",
4770 function );
4771
4772 return( -1 );
4773 }
4774 if( utf16_stream_index == NULL )
4775 {
4776 libcerror_error_set(
4777 error,
4778 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4779 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4780 "%s: invalid UTF-16 stream index.",
4781 function );
4782
4783 return( -1 );
4784 }
4785 if( ( *utf16_stream_index + 1 ) >= utf16_stream_size )
4786 {
4787 libcerror_error_set(
4788 error,
4789 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4790 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4791 "%s: UTF-16 stream too small.",
4792 function );
4793
4794 return( -1 );
4795 }
4796 if( ( byte_order != LIBUNA_ENDIAN_BIG )
4797 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
4798 {
4799 libcerror_error_set(
4800 error,
4801 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4802 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4803 "%s: unsupported byte order.",
4804 function );
4805
4806 return( -1 );
4807 }
4808 if( byte_order == LIBUNA_ENDIAN_BIG )
4809 {
4810 *unicode_character = utf16_stream[ *utf16_stream_index ];
4811 *unicode_character <<= 8;
4812 *unicode_character += utf16_stream[ *utf16_stream_index + 1 ];
4813 }
4814 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
4815 {
4816 *unicode_character = utf16_stream[ *utf16_stream_index + 1 ];
4817 *unicode_character <<= 8;
4818 *unicode_character += utf16_stream[ *utf16_stream_index ];
4819 }
4820 *utf16_stream_index += 2;
4821
4822 /* Determine if the UTF-16 character is within the high surrogate range
4823 */
4824 if( ( *unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4825 && ( *unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
4826 {
4827 if( ( *utf16_stream_index + 1 ) >= utf16_stream_size )
4828 {
4829 libcerror_error_set(
4830 error,
4831 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4832 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4833 "%s: missing surrogate UTF-16 character bytes.",
4834 function );
4835
4836 return( -1 );
4837 }
4838 if( byte_order == LIBUNA_ENDIAN_BIG )
4839 {
4840 utf16_surrogate = utf16_stream[ *utf16_stream_index ];
4841 utf16_surrogate <<= 8;
4842 utf16_surrogate += utf16_stream[ *utf16_stream_index + 1 ];
4843 }
4844 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
4845 {
4846 utf16_surrogate = utf16_stream[ *utf16_stream_index + 1 ];
4847 utf16_surrogate <<= 8;
4848 utf16_surrogate += utf16_stream[ *utf16_stream_index ];
4849 }
4850 *utf16_stream_index += 2;
4851
4852 /* Determine if the UTF-16 character is within the low surrogate range
4853 */
4854 if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
4855 && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4856 {
4857 *unicode_character -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
4858 *unicode_character <<= 10;
4859 *unicode_character += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END;
4860 *unicode_character += 0x010000;
4861 }
4862 else
4863 {
4864 *unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4865 }
4866 }
4867 /* Determine if the Unicode character is valid
4868 */
4869 else if( ( *unicode_character >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
4870 && ( *unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4871 {
4872 *unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4873 }
4874 return( 1 );
4875 }
4876
4877 /* Copies an Unicode character to an UTF-16 stream
4878 * Returns 1 if successful or -1 on error
4879 */
libuna_unicode_character_copy_to_utf16_stream(libuna_unicode_character_t unicode_character,uint8_t * utf16_stream,size_t utf16_stream_size,size_t * utf16_stream_index,int byte_order,libcerror_error_t ** error)4880 int libuna_unicode_character_copy_to_utf16_stream(
4881 libuna_unicode_character_t unicode_character,
4882 uint8_t *utf16_stream,
4883 size_t utf16_stream_size,
4884 size_t *utf16_stream_index,
4885 int byte_order,
4886 libcerror_error_t **error )
4887 {
4888 static char *function = "libuna_unicode_character_copy_to_utf16_stream";
4889 libuna_utf16_character_t utf16_surrogate = 0;
4890
4891 if( utf16_stream == NULL )
4892 {
4893 libcerror_error_set(
4894 error,
4895 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4896 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4897 "%s: invalid UTF-16 stream.",
4898 function );
4899
4900 return( -1 );
4901 }
4902 if( utf16_stream_size > (size_t) SSIZE_MAX )
4903 {
4904 libcerror_error_set(
4905 error,
4906 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4907 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4908 "%s: invalid UTF-16 stream size value exceeds maximum.",
4909 function );
4910
4911 return( -1 );
4912 }
4913 if( utf16_stream_index == NULL )
4914 {
4915 libcerror_error_set(
4916 error,
4917 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4918 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4919 "%s: invalid UTF-16 stream index.",
4920 function );
4921
4922 return( -1 );
4923 }
4924 if( ( *utf16_stream_index + 1 ) >= utf16_stream_size )
4925 {
4926 libcerror_error_set(
4927 error,
4928 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4929 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4930 "%s: UTF-16 stream too small.",
4931 function );
4932
4933 return( -1 );
4934 }
4935 if( ( byte_order != LIBUNA_ENDIAN_BIG )
4936 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
4937 {
4938 libcerror_error_set(
4939 error,
4940 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4941 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4942 "%s: unsupported byte order.",
4943 function );
4944
4945 return( -1 );
4946 }
4947 /* Determine if the Unicode character is valid
4948 */
4949 if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4950 && ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4951 || ( unicode_character > LIBUNA_UTF16_CHARACTER_MAX ) )
4952 {
4953 unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4954 }
4955 if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
4956 {
4957 if( byte_order == LIBUNA_ENDIAN_BIG )
4958 {
4959 utf16_stream[ *utf16_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
4960 unicode_character >>= 8;
4961 utf16_stream[ *utf16_stream_index ] = (uint8_t) ( unicode_character & 0xff );
4962 }
4963 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
4964 {
4965 utf16_stream[ *utf16_stream_index ] = (uint8_t) ( unicode_character & 0xff );
4966 unicode_character >>= 8;
4967 utf16_stream[ *utf16_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
4968 }
4969 *utf16_stream_index += 2;
4970 }
4971 else
4972 {
4973 if( ( *utf16_stream_index + 3 ) >= utf16_stream_size )
4974 {
4975 libcerror_error_set(
4976 error,
4977 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4978 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4979 "%s: UTF-16 stream too small.",
4980 function );
4981
4982 return( -1 );
4983 }
4984 unicode_character -= 0x010000;
4985
4986 utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
4987
4988 if( byte_order == LIBUNA_ENDIAN_BIG )
4989 {
4990 utf16_stream[ *utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
4991 utf16_surrogate >>= 8;
4992 utf16_stream[ *utf16_stream_index ] = (uint8_t) ( utf16_surrogate & 0xff );
4993 }
4994 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
4995 {
4996 utf16_stream[ *utf16_stream_index ] = (uint8_t) ( utf16_surrogate & 0xff );
4997 utf16_surrogate >>= 8;
4998 utf16_stream[ *utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
4999 }
5000 *utf16_stream_index += 2;
5001
5002 utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
5003
5004 if( byte_order == LIBUNA_ENDIAN_BIG )
5005 {
5006 utf16_stream[ *utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5007 utf16_surrogate >>= 8;
5008 utf16_stream[ *utf16_stream_index ] = (uint8_t) ( utf16_surrogate & 0xff );
5009 }
5010 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5011 {
5012 utf16_stream[ *utf16_stream_index ] = (uint8_t) ( utf16_surrogate & 0xff );
5013 utf16_surrogate >>= 8;
5014 utf16_stream[ *utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5015 }
5016 *utf16_stream_index += 2;
5017 }
5018 return( 1 );
5019 }
5020
5021 /* Determines the size of an UTF-32 character from an Unicode character
5022 * Adds the size to the UTF-32 character size value
5023 * Returns 1 if successful or -1 on error
5024 */
libuna_unicode_character_size_to_utf32(libuna_unicode_character_t unicode_character LIBUNA_ATTRIBUTE_UNUSED,size_t * utf32_character_size,libcerror_error_t ** error)5025 int libuna_unicode_character_size_to_utf32(
5026 libuna_unicode_character_t unicode_character LIBUNA_ATTRIBUTE_UNUSED,
5027 size_t *utf32_character_size,
5028 libcerror_error_t **error )
5029 {
5030 static char *function = "libuna_unicode_character_size_to_utf32";
5031
5032 LIBUNA_UNREFERENCED_PARAMETER( unicode_character )
5033
5034 if( utf32_character_size == NULL )
5035 {
5036 libcerror_error_set(
5037 error,
5038 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5039 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5040 "%s: invalid UTF-32 character size.",
5041 function );
5042
5043 return( -1 );
5044 }
5045 *utf32_character_size += 1;
5046
5047 return( 1 );
5048 }
5049
5050 /* Copies an Unicode character from an UTF-32 string
5051 * Returns 1 if successful or -1 on error
5052 */
libuna_unicode_character_copy_from_utf32(libuna_unicode_character_t * unicode_character,const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,libcerror_error_t ** error)5053 int libuna_unicode_character_copy_from_utf32(
5054 libuna_unicode_character_t *unicode_character,
5055 const libuna_utf32_character_t *utf32_string,
5056 size_t utf32_string_size,
5057 size_t *utf32_string_index,
5058 libcerror_error_t **error )
5059 {
5060 static char *function = "libuna_unicode_character_copy_from_utf32";
5061
5062 if( unicode_character == NULL )
5063 {
5064 libcerror_error_set(
5065 error,
5066 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5067 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5068 "%s: invalid Unicode character.",
5069 function );
5070
5071 return( -1 );
5072 }
5073 if( utf32_string == NULL )
5074 {
5075 libcerror_error_set(
5076 error,
5077 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5078 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5079 "%s: invalid UTF-32 string.",
5080 function );
5081
5082 return( -1 );
5083 }
5084 if( utf32_string_size > (size_t) SSIZE_MAX )
5085 {
5086 libcerror_error_set(
5087 error,
5088 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5089 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5090 "%s: invalid UTF-32 string size value exceeds maximum.",
5091 function );
5092
5093 return( -1 );
5094 }
5095 if( utf32_string_index == NULL )
5096 {
5097 libcerror_error_set(
5098 error,
5099 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5100 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5101 "%s: invalid UTF-32 string index.",
5102 function );
5103
5104 return( -1 );
5105 }
5106 if( *utf32_string_index >= utf32_string_size )
5107 {
5108 libcerror_error_set(
5109 error,
5110 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5111 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5112 "%s: UTF-32 string too small.",
5113 function );
5114
5115 return( -1 );
5116 }
5117 /* Determine if the Unicode character is valid
5118 */
5119 if( ( utf32_string[ *utf32_string_index ] >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
5120 && ( utf32_string[ *utf32_string_index ] <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5121 {
5122 *unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
5123 }
5124 else
5125 {
5126 *unicode_character = utf32_string[ *utf32_string_index ];
5127 }
5128 *utf32_string_index += 1;
5129
5130 return( 1 );
5131 }
5132
5133 /* Copies an Unicode character into a UTF-32 string
5134 * Returns 1 if successful or -1 on error
5135 */
libuna_unicode_character_copy_to_utf32(libuna_unicode_character_t unicode_character,libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,libcerror_error_t ** error)5136 int libuna_unicode_character_copy_to_utf32(
5137 libuna_unicode_character_t unicode_character,
5138 libuna_utf32_character_t *utf32_string,
5139 size_t utf32_string_size,
5140 size_t *utf32_string_index,
5141 libcerror_error_t **error )
5142 {
5143 static char *function = "libuna_unicode_character_copy_to_utf32";
5144
5145 if( utf32_string == NULL )
5146 {
5147 libcerror_error_set(
5148 error,
5149 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5150 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5151 "%s: invalid UTF-32 string.",
5152 function );
5153
5154 return( -1 );
5155 }
5156 if( utf32_string_size > (size_t) SSIZE_MAX )
5157 {
5158 libcerror_error_set(
5159 error,
5160 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5161 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5162 "%s: invalid UTF-32 string size value exceeds maximum.",
5163 function );
5164
5165 return( -1 );
5166 }
5167 if( utf32_string_index == NULL )
5168 {
5169 libcerror_error_set(
5170 error,
5171 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5172 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5173 "%s: invalid UTF-32 string index.",
5174 function );
5175
5176 return( -1 );
5177 }
5178 if( *utf32_string_index >= utf32_string_size )
5179 {
5180 libcerror_error_set(
5181 error,
5182 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5183 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5184 "%s: UTF-32 string too small.",
5185 function );
5186
5187 return( -1 );
5188 }
5189 /* Determine if the Unicode character is valid
5190 */
5191 if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5192 && ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5193 || ( unicode_character > LIBUNA_UTF32_CHARACTER_MAX ) )
5194 {
5195 utf32_string[ *utf32_string_index ] = (libuna_utf32_character_t) LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
5196 }
5197 else
5198 {
5199 utf32_string[ *utf32_string_index ] = (libuna_utf32_character_t) unicode_character;
5200 }
5201 *utf32_string_index += 1;
5202
5203 return( 1 );
5204 }
5205
5206 /* Copies an Unicode character from an UTF-32 stream
5207 * Returns 1 if successful or -1 on error
5208 */
libuna_unicode_character_copy_from_utf32_stream(libuna_unicode_character_t * unicode_character,const uint8_t * utf32_stream,size_t utf32_stream_size,size_t * utf32_stream_index,int byte_order,libcerror_error_t ** error)5209 int libuna_unicode_character_copy_from_utf32_stream(
5210 libuna_unicode_character_t *unicode_character,
5211 const uint8_t *utf32_stream,
5212 size_t utf32_stream_size,
5213 size_t *utf32_stream_index,
5214 int byte_order,
5215 libcerror_error_t **error )
5216 {
5217 static char *function = "libuna_unicode_character_copy_from_utf32_stream";
5218
5219 if( unicode_character == NULL )
5220 {
5221 libcerror_error_set(
5222 error,
5223 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5224 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5225 "%s: invalid Unicode character.",
5226 function );
5227
5228 return( -1 );
5229 }
5230 if( utf32_stream == NULL )
5231 {
5232 libcerror_error_set(
5233 error,
5234 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5235 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5236 "%s: invalid UTF-32 stream.",
5237 function );
5238
5239 return( -1 );
5240 }
5241 if( utf32_stream_size > (size_t) SSIZE_MAX )
5242 {
5243 libcerror_error_set(
5244 error,
5245 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5246 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5247 "%s: invalid UTF-32 stream size value exceeds maximum.",
5248 function );
5249
5250 return( -1 );
5251 }
5252 if( utf32_stream_index == NULL )
5253 {
5254 libcerror_error_set(
5255 error,
5256 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5257 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5258 "%s: invalid UTF-32 stream index.",
5259 function );
5260
5261 return( -1 );
5262 }
5263 if( ( *utf32_stream_index + 3 ) >= utf32_stream_size )
5264 {
5265 libcerror_error_set(
5266 error,
5267 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5268 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5269 "%s: UTF-32 stream too small.",
5270 function );
5271
5272 return( -1 );
5273 }
5274 if( ( byte_order != LIBUNA_ENDIAN_BIG )
5275 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
5276 {
5277 libcerror_error_set(
5278 error,
5279 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5280 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5281 "%s: unsupported byte order.",
5282 function );
5283
5284 return( -1 );
5285 }
5286 if( byte_order == LIBUNA_ENDIAN_BIG )
5287 {
5288 *unicode_character = utf32_stream[ *utf32_stream_index ];
5289 *unicode_character <<= 8;
5290 *unicode_character += utf32_stream[ *utf32_stream_index + 1 ];
5291 *unicode_character <<= 8;
5292 *unicode_character += utf32_stream[ *utf32_stream_index + 2 ];
5293 *unicode_character <<= 8;
5294 *unicode_character += utf32_stream[ *utf32_stream_index + 3 ];
5295 }
5296 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5297 {
5298 *unicode_character = utf32_stream[ *utf32_stream_index + 3 ];
5299 *unicode_character <<= 8;
5300 *unicode_character += utf32_stream[ *utf32_stream_index + 2 ];
5301 *unicode_character <<= 8;
5302 *unicode_character += utf32_stream[ *utf32_stream_index + 1 ];
5303 *unicode_character <<= 8;
5304 *unicode_character += utf32_stream[ *utf32_stream_index ];
5305 }
5306 /* Determine if the Unicode character is valid
5307 */
5308 if( ( *unicode_character >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
5309 && ( *unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5310 {
5311 *unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
5312 }
5313 *utf32_stream_index += 4;
5314
5315 return( 1 );
5316 }
5317
5318 /* Copies an Unicode character to an UTF-32 stream
5319 * Returns 1 if successful or -1 on error
5320 */
libuna_unicode_character_copy_to_utf32_stream(libuna_unicode_character_t unicode_character,uint8_t * utf32_stream,size_t utf32_stream_size,size_t * utf32_stream_index,int byte_order,libcerror_error_t ** error)5321 int libuna_unicode_character_copy_to_utf32_stream(
5322 libuna_unicode_character_t unicode_character,
5323 uint8_t *utf32_stream,
5324 size_t utf32_stream_size,
5325 size_t *utf32_stream_index,
5326 int byte_order,
5327 libcerror_error_t **error )
5328 {
5329 static char *function = "libuna_unicode_character_copy_to_utf32_stream";
5330
5331 if( utf32_stream == NULL )
5332 {
5333 libcerror_error_set(
5334 error,
5335 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5336 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5337 "%s: invalid UTF-32 stream.",
5338 function );
5339
5340 return( -1 );
5341 }
5342 if( utf32_stream_size > (size_t) SSIZE_MAX )
5343 {
5344 libcerror_error_set(
5345 error,
5346 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5347 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5348 "%s: invalid UTF-32 stream size value exceeds maximum.",
5349 function );
5350
5351 return( -1 );
5352 }
5353 if( utf32_stream_index == NULL )
5354 {
5355 libcerror_error_set(
5356 error,
5357 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5358 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5359 "%s: invalid UTF-32 stream index.",
5360 function );
5361
5362 return( -1 );
5363 }
5364 if( ( *utf32_stream_index + 3 ) >= utf32_stream_size )
5365 {
5366 libcerror_error_set(
5367 error,
5368 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5369 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5370 "%s: UTF-32 stream too small.",
5371 function );
5372
5373 return( -1 );
5374 }
5375 if( ( byte_order != LIBUNA_ENDIAN_BIG )
5376 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
5377 {
5378 libcerror_error_set(
5379 error,
5380 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5381 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5382 "%s: unsupported byte order.",
5383 function );
5384
5385 return( -1 );
5386 }
5387 /* Determine if the Unicode character is valid
5388 */
5389 if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5390 && ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5391 || ( unicode_character > LIBUNA_UTF32_CHARACTER_MAX ) )
5392 {
5393 unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
5394 }
5395 if( byte_order == LIBUNA_ENDIAN_BIG )
5396 {
5397 utf32_stream[ *utf32_stream_index + 3 ] = (uint8_t) ( unicode_character & 0xff );
5398 unicode_character >>= 8;
5399 utf32_stream[ *utf32_stream_index + 2 ] = (uint8_t) ( unicode_character & 0xff );
5400 unicode_character >>= 8;
5401 utf32_stream[ *utf32_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5402 unicode_character >>= 8;
5403 utf32_stream[ *utf32_stream_index ] = (uint8_t) ( unicode_character & 0xff );
5404 }
5405 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5406 {
5407 utf32_stream[ *utf32_stream_index ] = (uint8_t) ( unicode_character & 0xff );
5408 unicode_character >>= 8;
5409 utf32_stream[ *utf32_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5410 unicode_character >>= 8;
5411 utf32_stream[ *utf32_stream_index + 2 ] = (uint8_t) ( unicode_character & 0xff );
5412 unicode_character >>= 8;
5413 utf32_stream[ *utf32_stream_index + 3 ] = (uint8_t) ( unicode_character & 0xff );
5414 }
5415 *utf32_stream_index += 4;
5416
5417 return( 1 );
5418 }
5419
5420