1 /*
2 * Unicode character functions
3 *
4 * Copyright (C) 2008-2020, Joachim Metz <joachim.metz@gmail.com>
5 *
6 * Refer to AUTHORS for acknowledgements.
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this program. If not, see <https://www.gnu.org/licenses/>.
20 */
21
22 #include <common.h>
23 #include <types.h>
24
25 #include "libuna_base64_stream.h"
26 #include "libuna_codepage_iso_8859_2.h"
27 #include "libuna_codepage_iso_8859_3.h"
28 #include "libuna_codepage_iso_8859_4.h"
29 #include "libuna_codepage_iso_8859_5.h"
30 #include "libuna_codepage_iso_8859_6.h"
31 #include "libuna_codepage_iso_8859_7.h"
32 #include "libuna_codepage_iso_8859_8.h"
33 #include "libuna_codepage_iso_8859_9.h"
34 #include "libuna_codepage_iso_8859_10.h"
35 #include "libuna_codepage_iso_8859_13.h"
36 #include "libuna_codepage_iso_8859_14.h"
37 #include "libuna_codepage_iso_8859_15.h"
38 #include "libuna_codepage_iso_8859_16.h"
39 #include "libuna_codepage_koi8_r.h"
40 #include "libuna_codepage_koi8_u.h"
41 #include "libuna_codepage_windows_874.h"
42 #include "libuna_codepage_windows_932.h"
43 #include "libuna_codepage_windows_936.h"
44 #include "libuna_codepage_windows_949.h"
45 #include "libuna_codepage_windows_950.h"
46 #include "libuna_codepage_windows_1250.h"
47 #include "libuna_codepage_windows_1251.h"
48 #include "libuna_codepage_windows_1252.h"
49 #include "libuna_codepage_windows_1253.h"
50 #include "libuna_codepage_windows_1254.h"
51 #include "libuna_codepage_windows_1255.h"
52 #include "libuna_codepage_windows_1256.h"
53 #include "libuna_codepage_windows_1257.h"
54 #include "libuna_codepage_windows_1258.h"
55 #include "libuna_definitions.h"
56 #include "libuna_libcerror.h"
57 #include "libuna_types.h"
58 #include "libuna_unicode_character.h"
59 #include "libuna_unused.h"
60
61 /* Valid directly encoded characters: A-Z, a-z, 0-9, '\', '(', ')', ',', '-', '.', '/', ':', '?'
62 * Valid directly encoded whitespace: '\t', '\n', '\r', ' '
63 * Valid optional directly encoded characters: '!', '"', '#', '$', '%', '&', '*', ';', '<', '=', '>', '@', '[', ']', '^', '_', '`', '{', '|', '}'
64 */
65 uint8_t libuna_unicode_character_utf7_valid_directly_encoded_character[ 256 ] = {
66 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
67 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
68 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
74 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
79 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
80 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
82
83 /* Valid UTF-7 base64 characters: A-Z, a-z, 0-9, '+' and '/'
84 */
85 uint8_t libuna_unicode_character_utf7_valid_base64_character[ 256 ] = {
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
90 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
92 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
94 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
102
103 /* Determines the size of a byte stream character from an Unicode character
104 * Adds the size to the byte stream character size value
105 * Returns 1 if successful or -1 on error
106 */
libuna_unicode_character_size_to_byte_stream(libuna_unicode_character_t unicode_character,int codepage,size_t * byte_stream_character_size,libcerror_error_t ** error)107 int libuna_unicode_character_size_to_byte_stream(
108 libuna_unicode_character_t unicode_character,
109 int codepage,
110 size_t *byte_stream_character_size,
111 libcerror_error_t **error )
112 {
113 static char *function = "libuna_unicode_character_size_to_byte_stream";
114 size_t safe_byte_stream_character_size = 0;
115 int result = 1;
116
117 if( byte_stream_character_size == NULL )
118 {
119 libcerror_error_set(
120 error,
121 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
122 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
123 "%s: invalid byte stream character size.",
124 function );
125
126 return( -1 );
127 }
128 safe_byte_stream_character_size = *byte_stream_character_size;
129
130 switch( codepage )
131 {
132 case LIBUNA_CODEPAGE_ASCII:
133 case LIBUNA_CODEPAGE_ISO_8859_1:
134 case LIBUNA_CODEPAGE_ISO_8859_2:
135 case LIBUNA_CODEPAGE_ISO_8859_3:
136 case LIBUNA_CODEPAGE_ISO_8859_4:
137 case LIBUNA_CODEPAGE_ISO_8859_5:
138 case LIBUNA_CODEPAGE_ISO_8859_6:
139 case LIBUNA_CODEPAGE_ISO_8859_7:
140 case LIBUNA_CODEPAGE_ISO_8859_8:
141 case LIBUNA_CODEPAGE_ISO_8859_9:
142 case LIBUNA_CODEPAGE_ISO_8859_10:
143 case LIBUNA_CODEPAGE_ISO_8859_11:
144 case LIBUNA_CODEPAGE_ISO_8859_13:
145 case LIBUNA_CODEPAGE_ISO_8859_14:
146 case LIBUNA_CODEPAGE_ISO_8859_15:
147 case LIBUNA_CODEPAGE_ISO_8859_16:
148 case LIBUNA_CODEPAGE_KOI8_R:
149 case LIBUNA_CODEPAGE_KOI8_U:
150 case LIBUNA_CODEPAGE_WINDOWS_874:
151 case LIBUNA_CODEPAGE_WINDOWS_1250:
152 case LIBUNA_CODEPAGE_WINDOWS_1251:
153 case LIBUNA_CODEPAGE_WINDOWS_1252:
154 case LIBUNA_CODEPAGE_WINDOWS_1253:
155 case LIBUNA_CODEPAGE_WINDOWS_1254:
156 case LIBUNA_CODEPAGE_WINDOWS_1255:
157 case LIBUNA_CODEPAGE_WINDOWS_1256:
158 case LIBUNA_CODEPAGE_WINDOWS_1257:
159 case LIBUNA_CODEPAGE_WINDOWS_1258:
160 safe_byte_stream_character_size += 1;
161 break;
162
163 case LIBUNA_CODEPAGE_WINDOWS_932:
164 result = libuna_codepage_windows_932_unicode_character_size_to_byte_stream(
165 unicode_character,
166 &safe_byte_stream_character_size,
167 error );
168 break;
169
170 case LIBUNA_CODEPAGE_WINDOWS_936:
171 result = libuna_codepage_windows_936_unicode_character_size_to_byte_stream(
172 unicode_character,
173 &safe_byte_stream_character_size,
174 error );
175 break;
176
177 case LIBUNA_CODEPAGE_WINDOWS_949:
178 result = libuna_codepage_windows_949_unicode_character_size_to_byte_stream(
179 unicode_character,
180 &safe_byte_stream_character_size,
181 error );
182 break;
183
184 case LIBUNA_CODEPAGE_WINDOWS_950:
185 result = libuna_codepage_windows_950_unicode_character_size_to_byte_stream(
186 unicode_character,
187 &safe_byte_stream_character_size,
188 error );
189 break;
190
191 default:
192 libcerror_error_set(
193 error,
194 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
195 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
196 "%s: unsupported codepage: %d.",
197 function,
198 codepage );
199
200 return( -1 );
201 }
202 if( result != 1 )
203 {
204 libcerror_error_set(
205 error,
206 LIBCERROR_ERROR_DOMAIN_RUNTIME,
207 LIBCERROR_RUNTIME_ERROR_GET_FAILED,
208 "%s: unable to determine byte stream character size.",
209 function );
210
211 return( -1 );
212 }
213 *byte_stream_character_size = safe_byte_stream_character_size;
214
215 return( result );
216 }
217
218 /* Copies an Unicode character from a byte stream
219 * Returns 1 if successful or -1 on error
220 */
libuna_unicode_character_copy_from_byte_stream(libuna_unicode_character_t * unicode_character,const uint8_t * byte_stream,size_t byte_stream_size,size_t * byte_stream_index,int codepage,libcerror_error_t ** error)221 int libuna_unicode_character_copy_from_byte_stream(
222 libuna_unicode_character_t *unicode_character,
223 const uint8_t *byte_stream,
224 size_t byte_stream_size,
225 size_t *byte_stream_index,
226 int codepage,
227 libcerror_error_t **error )
228 {
229 static char *function = "libuna_unicode_character_copy_from_byte_stream";
230 libuna_unicode_character_t safe_unicode_character = 0;
231 size_t safe_byte_stream_index = 0;
232 uint8_t byte_stream_character = 0;
233 int result = 1;
234
235 if( unicode_character == NULL )
236 {
237 libcerror_error_set(
238 error,
239 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
240 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
241 "%s: invalid Unicode character.",
242 function );
243
244 return( -1 );
245 }
246 if( byte_stream == NULL )
247 {
248 libcerror_error_set(
249 error,
250 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
251 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
252 "%s: invalid byte stream.",
253 function );
254
255 return( -1 );
256 }
257 if( byte_stream_size > (size_t) SSIZE_MAX )
258 {
259 libcerror_error_set(
260 error,
261 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
262 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
263 "%s: invalid byte stream size value exceeds maximum.",
264 function );
265
266 return( -1 );
267 }
268 if( byte_stream_index == NULL )
269 {
270 libcerror_error_set(
271 error,
272 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
273 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
274 "%s: invalid byte stream index.",
275 function );
276
277 return( -1 );
278 }
279 safe_byte_stream_index = *byte_stream_index;
280
281 if( safe_byte_stream_index >= byte_stream_size )
282 {
283 libcerror_error_set(
284 error,
285 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
286 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
287 "%s: byte stream too small.",
288 function );
289
290 return( -1 );
291 }
292 byte_stream_character = byte_stream[ safe_byte_stream_index ];
293
294 switch( codepage )
295 {
296 case LIBUNA_CODEPAGE_ASCII:
297 if( byte_stream_character < 0x80 )
298 {
299 safe_unicode_character = byte_stream_character;
300 }
301 else
302 {
303 safe_unicode_character = 0xfffd;
304 }
305 safe_byte_stream_index += 1;
306
307 break;
308
309 case LIBUNA_CODEPAGE_ISO_8859_1:
310 safe_unicode_character = byte_stream_character;
311
312 safe_byte_stream_index += 1;
313
314 break;
315
316 case LIBUNA_CODEPAGE_ISO_8859_2:
317 if( byte_stream_character < 0xa0 )
318 {
319 safe_unicode_character = byte_stream_character;
320 }
321 else
322 {
323 byte_stream_character -= 0xa0;
324
325 safe_unicode_character = libuna_codepage_iso_8859_2_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
326 }
327 safe_byte_stream_index += 1;
328
329 break;
330
331 case LIBUNA_CODEPAGE_ISO_8859_3:
332 if( byte_stream_character < 0xa0 )
333 {
334 safe_unicode_character = byte_stream_character;
335 }
336 else
337 {
338 byte_stream_character -= 0xa0;
339
340 safe_unicode_character = libuna_codepage_iso_8859_3_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
341 }
342 safe_byte_stream_index += 1;
343
344 break;
345
346 case LIBUNA_CODEPAGE_ISO_8859_4:
347 if( byte_stream_character < 0xa0 )
348 {
349 safe_unicode_character = byte_stream_character;
350 }
351 else
352 {
353 byte_stream_character -= 0xa0;
354
355 safe_unicode_character = libuna_codepage_iso_8859_4_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
356 }
357 safe_byte_stream_index += 1;
358
359 break;
360
361 case LIBUNA_CODEPAGE_ISO_8859_5:
362 if( byte_stream_character < 0xa0 )
363 {
364 safe_unicode_character = byte_stream_character;
365 }
366 else
367 {
368 byte_stream_character -= 0xa0;
369
370 safe_unicode_character = libuna_codepage_iso_8859_5_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
371 }
372 safe_byte_stream_index += 1;
373
374 break;
375
376 case LIBUNA_CODEPAGE_ISO_8859_6:
377 if( byte_stream_character < 0xa0 )
378 {
379 safe_unicode_character = byte_stream_character;
380 }
381 else
382 {
383 byte_stream_character -= 0xa0;
384
385 safe_unicode_character = libuna_codepage_iso_8859_6_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
386 }
387 safe_byte_stream_index += 1;
388
389 break;
390
391 case LIBUNA_CODEPAGE_ISO_8859_7:
392 if( byte_stream_character < 0xa0 )
393 {
394 safe_unicode_character = byte_stream_character;
395 }
396 else
397 {
398 byte_stream_character -= 0xa0;
399
400 safe_unicode_character = libuna_codepage_iso_8859_7_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
401 }
402 safe_byte_stream_index += 1;
403
404 break;
405
406 case LIBUNA_CODEPAGE_ISO_8859_8:
407 if( byte_stream_character < 0xa0 )
408 {
409 safe_unicode_character = byte_stream_character;
410 }
411 else
412 {
413 byte_stream_character -= 0xa0;
414
415 safe_unicode_character = libuna_codepage_iso_8859_8_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
416 }
417 safe_byte_stream_index += 1;
418
419 break;
420
421 case LIBUNA_CODEPAGE_ISO_8859_9:
422 if( byte_stream_character < 0xd0 )
423 {
424 safe_unicode_character = byte_stream_character;
425 }
426 else
427 {
428 byte_stream_character -= 0xd0;
429
430 safe_unicode_character = libuna_codepage_iso_8859_9_byte_stream_to_unicode_base_0xd0[ byte_stream_character ];
431 }
432 safe_byte_stream_index += 1;
433
434 break;
435
436 case LIBUNA_CODEPAGE_ISO_8859_10:
437 if( byte_stream_character < 0xa0 )
438 {
439 safe_unicode_character = byte_stream_character;
440 }
441 else
442 {
443 byte_stream_character -= 0xa0;
444
445 safe_unicode_character = libuna_codepage_iso_8859_10_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
446 }
447 safe_byte_stream_index += 1;
448
449 break;
450
451 case LIBUNA_CODEPAGE_ISO_8859_11:
452 if( byte_stream_character < 0xa1 )
453 {
454 safe_unicode_character = byte_stream_character;
455 }
456 else if( byte_stream_character < 0xdb )
457 {
458 safe_unicode_character = byte_stream_character + 0x0d60;
459 }
460 else if( byte_stream_character < 0xdf )
461 {
462 safe_unicode_character = 0xfffd;
463 }
464 else if( byte_stream_character < 0xfc )
465 {
466 safe_unicode_character = byte_stream_character + 0x0d60;
467 }
468 else
469 {
470 safe_unicode_character = 0xfffd;
471 }
472 safe_byte_stream_index += 1;
473
474 break;
475
476 case LIBUNA_CODEPAGE_ISO_8859_13:
477 if( byte_stream_character < 0xa0 )
478 {
479 safe_unicode_character = byte_stream_character;
480 }
481 else
482 {
483 byte_stream_character -= 0xa0;
484
485 safe_unicode_character = libuna_codepage_iso_8859_13_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
486 }
487 safe_byte_stream_index += 1;
488
489 break;
490
491 case LIBUNA_CODEPAGE_ISO_8859_14:
492 if( byte_stream_character < 0xa0 )
493 {
494 safe_unicode_character = byte_stream_character;
495 }
496 else
497 {
498 byte_stream_character -= 0xa0;
499
500 safe_unicode_character = libuna_codepage_iso_8859_14_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
501 }
502 safe_byte_stream_index += 1;
503
504 break;
505
506 case LIBUNA_CODEPAGE_ISO_8859_15:
507 if( ( byte_stream_character >= 0xa0 )
508 && ( byte_stream_character < 0xc0 ) )
509 {
510 byte_stream_character -= 0xa0;
511
512 safe_unicode_character = libuna_codepage_iso_8859_15_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
513 }
514 else
515 {
516 safe_unicode_character = byte_stream_character;
517 }
518 safe_byte_stream_index += 1;
519
520 break;
521
522 case LIBUNA_CODEPAGE_ISO_8859_16:
523 if( byte_stream_character < 0xa0 )
524 {
525 safe_unicode_character = byte_stream_character;
526 }
527 else
528 {
529 byte_stream_character -= 0xa0;
530
531 safe_unicode_character = libuna_codepage_iso_8859_16_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
532 }
533 safe_byte_stream_index += 1;
534
535 break;
536
537 case LIBUNA_CODEPAGE_KOI8_R:
538 if( byte_stream_character < 0x80 )
539 {
540 safe_unicode_character = byte_stream_character;
541 }
542 else
543 {
544 byte_stream_character -= 0x80;
545
546 safe_unicode_character = libuna_codepage_koi8_r_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
547 }
548 safe_byte_stream_index += 1;
549
550 break;
551
552 case LIBUNA_CODEPAGE_KOI8_U:
553 if( byte_stream_character < 0x80 )
554 {
555 safe_unicode_character = byte_stream_character;
556 }
557 else
558 {
559 byte_stream_character -= 0x80;
560
561 safe_unicode_character = libuna_codepage_koi8_u_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
562 }
563 safe_byte_stream_index += 1;
564
565 break;
566
567 case LIBUNA_CODEPAGE_WINDOWS_874:
568 if( byte_stream_character < 0x80 )
569 {
570 safe_unicode_character = byte_stream_character;
571 }
572 else
573 {
574 byte_stream_character -= 0x80;
575
576 safe_unicode_character = libuna_codepage_windows_874_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
577 }
578 safe_byte_stream_index += 1;
579
580 break;
581
582 case LIBUNA_CODEPAGE_WINDOWS_932:
583 result = libuna_codepage_windows_932_copy_from_byte_stream(
584 &safe_unicode_character,
585 byte_stream,
586 byte_stream_size,
587 &safe_byte_stream_index,
588 error );
589 break;
590
591 case LIBUNA_CODEPAGE_WINDOWS_936:
592 result = libuna_codepage_windows_936_copy_from_byte_stream(
593 &safe_unicode_character,
594 byte_stream,
595 byte_stream_size,
596 &safe_byte_stream_index,
597 error );
598 break;
599
600 case LIBUNA_CODEPAGE_WINDOWS_949:
601 result = libuna_codepage_windows_949_copy_from_byte_stream(
602 &safe_unicode_character,
603 byte_stream,
604 byte_stream_size,
605 &safe_byte_stream_index,
606 error );
607 break;
608
609 case LIBUNA_CODEPAGE_WINDOWS_950:
610 result = libuna_codepage_windows_950_copy_from_byte_stream(
611 &safe_unicode_character,
612 byte_stream,
613 byte_stream_size,
614 &safe_byte_stream_index,
615 error );
616 break;
617
618 case LIBUNA_CODEPAGE_WINDOWS_1250:
619 if( byte_stream_character < 0x80 )
620 {
621 safe_unicode_character = byte_stream_character;
622 }
623 else
624 {
625 byte_stream_character -= 0x80;
626
627 safe_unicode_character = libuna_codepage_windows_1250_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
628 }
629 safe_byte_stream_index += 1;
630
631 break;
632
633 case LIBUNA_CODEPAGE_WINDOWS_1251:
634 if( byte_stream_character < 0x80 )
635 {
636 safe_unicode_character = byte_stream_character;
637 }
638 else
639 {
640 byte_stream_character -= 0x80;
641
642 safe_unicode_character = libuna_codepage_windows_1251_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
643 }
644 safe_byte_stream_index += 1;
645
646 break;
647
648 case LIBUNA_CODEPAGE_WINDOWS_1252:
649 if( ( byte_stream_character < 0x80 )
650 || ( byte_stream_character >= 0xa0 ) )
651 {
652 safe_unicode_character = byte_stream_character;
653 }
654 else
655 {
656 byte_stream_character -= 0x80;
657
658 safe_unicode_character = libuna_codepage_windows_1252_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
659 }
660 safe_byte_stream_index += 1;
661
662 break;
663
664 case LIBUNA_CODEPAGE_WINDOWS_1253:
665 if( byte_stream_character < 0x80 )
666 {
667 safe_unicode_character = byte_stream_character;
668 }
669 else
670 {
671 byte_stream_character -= 0x80;
672
673 safe_unicode_character = libuna_codepage_windows_1253_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
674 }
675 safe_byte_stream_index += 1;
676
677 break;
678
679 case LIBUNA_CODEPAGE_WINDOWS_1254:
680 if( byte_stream_character < 0x80 )
681 {
682 safe_unicode_character = byte_stream_character;
683 }
684 else if( byte_stream_character < 0xa0 )
685 {
686 byte_stream_character -= 0x80;
687
688 safe_unicode_character = libuna_codepage_windows_1254_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
689 }
690 else if( byte_stream_character < 0xd0 )
691 {
692 safe_unicode_character = byte_stream_character;
693 }
694 else if( byte_stream_character < 0xe0 )
695 {
696 byte_stream_character -= 0xd0;
697
698 safe_unicode_character = libuna_codepage_windows_1254_byte_stream_to_unicode_base_0xd0[ byte_stream_character ];
699 }
700 else if( byte_stream_character < 0xf0 )
701 {
702 safe_unicode_character = byte_stream_character;
703 }
704 else
705 {
706 byte_stream_character -= 0xf0;
707
708 safe_unicode_character = libuna_codepage_windows_1254_byte_stream_to_unicode_base_0xf0[ byte_stream_character ];
709 }
710 safe_byte_stream_index += 1;
711
712 break;
713
714 case LIBUNA_CODEPAGE_WINDOWS_1255:
715 if( byte_stream_character < 0x80 )
716 {
717 safe_unicode_character = byte_stream_character;
718 }
719 else
720 {
721 byte_stream_character -= 0x80;
722
723 safe_unicode_character = libuna_codepage_windows_1255_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
724 }
725 safe_byte_stream_index += 1;
726
727 break;
728
729 case LIBUNA_CODEPAGE_WINDOWS_1256:
730 if( byte_stream_character < 0x80 )
731 {
732 safe_unicode_character = byte_stream_character;
733 }
734 else
735 {
736 byte_stream_character -= 0x80;
737
738 safe_unicode_character = libuna_codepage_windows_1256_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
739 }
740 safe_byte_stream_index += 1;
741
742 break;
743
744 case LIBUNA_CODEPAGE_WINDOWS_1257:
745 if( byte_stream_character < 0x80 )
746 {
747 safe_unicode_character = byte_stream_character;
748 }
749 else
750 {
751 byte_stream_character -= 0x80;
752
753 safe_unicode_character = libuna_codepage_windows_1257_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
754 }
755 safe_byte_stream_index += 1;
756
757 break;
758
759 case LIBUNA_CODEPAGE_WINDOWS_1258:
760 if( byte_stream_character < 0x80 )
761 {
762 safe_unicode_character = byte_stream_character;
763 }
764 else
765 {
766 byte_stream_character -= 0x80;
767
768 safe_unicode_character = libuna_codepage_windows_1258_byte_stream_to_unicode_base_0x80[ byte_stream_character ];
769 }
770 safe_byte_stream_index += 1;
771
772 break;
773
774 default:
775 libcerror_error_set(
776 error,
777 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
778 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
779 "%s: unsupported codepage: %d.",
780 function,
781 codepage );
782
783 return( -1 );
784 }
785 if( result != 1 )
786 {
787 libcerror_error_set(
788 error,
789 LIBCERROR_ERROR_DOMAIN_RUNTIME,
790 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
791 "%s: unable to copy Unicode character from byte stream.",
792 function );
793
794 return( -1 );
795 }
796 *unicode_character = safe_unicode_character;
797 *byte_stream_index = safe_byte_stream_index;
798
799 return( result );
800 }
801
802 /* Copies an Unicode character to a byte stream
803 * Returns 1 if successful or -1 on error
804 */
libuna_unicode_character_copy_to_byte_stream(libuna_unicode_character_t unicode_character,uint8_t * byte_stream,size_t byte_stream_size,size_t * byte_stream_index,int codepage,libcerror_error_t ** error)805 int libuna_unicode_character_copy_to_byte_stream(
806 libuna_unicode_character_t unicode_character,
807 uint8_t *byte_stream,
808 size_t byte_stream_size,
809 size_t *byte_stream_index,
810 int codepage,
811 libcerror_error_t **error )
812 {
813 static char *function = "libuna_unicode_character_copy_to_byte_stream";
814 size_t safe_byte_stream_index = 0;
815 int result = 1;
816
817 if( byte_stream == NULL )
818 {
819 libcerror_error_set(
820 error,
821 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
822 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
823 "%s: invalid byte stream.",
824 function );
825
826 return( -1 );
827 }
828 if( byte_stream_size > (size_t) SSIZE_MAX )
829 {
830 libcerror_error_set(
831 error,
832 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
833 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
834 "%s: invalid byte stream size value exceeds maximum.",
835 function );
836
837 return( -1 );
838 }
839 if( byte_stream_index == NULL )
840 {
841 libcerror_error_set(
842 error,
843 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
844 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
845 "%s: invalid byte stream index.",
846 function );
847
848 return( -1 );
849 }
850 safe_byte_stream_index = *byte_stream_index;
851
852 if( safe_byte_stream_index >= byte_stream_size )
853 {
854 libcerror_error_set(
855 error,
856 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
857 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
858 "%s: byte stream too small.",
859 function );
860
861 return( -1 );
862 }
863 switch( codepage )
864 {
865 case LIBUNA_CODEPAGE_ASCII:
866 if( unicode_character < 0x0080 )
867 {
868 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
869 }
870 else
871 {
872 byte_stream[ safe_byte_stream_index ] = 0x1a;
873 }
874 safe_byte_stream_index += 1;
875
876 break;
877
878 case LIBUNA_CODEPAGE_ISO_8859_1:
879 if( unicode_character < 0x0100 )
880 {
881 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
882 }
883 else
884 {
885 byte_stream[ safe_byte_stream_index ] = 0x1a;
886 }
887 safe_byte_stream_index += 1;
888
889 break;
890
891 case LIBUNA_CODEPAGE_ISO_8859_2:
892 if( unicode_character < 0x00a0 )
893 {
894 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
895 }
896 else if( ( unicode_character >= 0x00a0 )
897 && ( unicode_character < 0x0120 ) )
898 {
899 unicode_character -= 0x00a0;
900
901 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
902 }
903 else if( ( unicode_character >= 0x0138 )
904 && ( unicode_character < 0x0180 ) )
905 {
906 unicode_character -= 0x0138;
907
908 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x0138[ unicode_character ];
909 }
910 else if( ( unicode_character >= 0x02d8 )
911 && ( unicode_character < 0x02e0 ) )
912 {
913 unicode_character -= 0x02d8;
914
915 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x02d8[ unicode_character ];
916 }
917 else if( unicode_character == 0x02c7 )
918 {
919 byte_stream[ safe_byte_stream_index ] = 0xb7;
920 }
921 else
922 {
923 byte_stream[ safe_byte_stream_index ] = 0x1a;
924 }
925 safe_byte_stream_index += 1;
926
927 break;
928
929 case LIBUNA_CODEPAGE_ISO_8859_3:
930 if( unicode_character < 0x00a0 )
931 {
932 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
933 }
934 else if( ( unicode_character >= 0x00a0 )
935 && ( unicode_character < 0x0100 ) )
936 {
937 unicode_character -= 0x00a0;
938
939 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
940 }
941 else if( ( unicode_character >= 0x0108 )
942 && ( unicode_character < 0x0110 ) )
943 {
944 unicode_character -= 0x0108;
945
946 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0108[ unicode_character ];
947 }
948 else if( ( unicode_character >= 0x0118 )
949 && ( unicode_character < 0x0128 ) )
950 {
951 unicode_character -= 0x0118;
952
953 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0118[ unicode_character ];
954 }
955 else if( ( unicode_character >= 0x0130 )
956 && ( unicode_character < 0x0138 ) )
957 {
958 unicode_character -= 0x0130;
959
960 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0130[ unicode_character ];
961 }
962 else if( ( unicode_character >= 0x0158 )
963 && ( unicode_character < 0x0160 ) )
964 {
965 unicode_character -= 0x0158;
966
967 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0158[ unicode_character ];
968 }
969 else switch( unicode_character )
970 {
971 case 0x016c:
972 byte_stream[ safe_byte_stream_index ] = 0xdd;
973 break;
974
975 case 0x016d:
976 byte_stream[ safe_byte_stream_index ] = 0xfd;
977 break;
978
979 case 0x017b:
980 byte_stream[ safe_byte_stream_index ] = 0xaf;
981 break;
982
983 case 0x017c:
984 byte_stream[ safe_byte_stream_index ] = 0xbf;
985 break;
986
987 case 0x02d8:
988 byte_stream[ safe_byte_stream_index ] = 0xa2;
989 break;
990
991 case 0x02d9:
992 byte_stream[ safe_byte_stream_index ] = 0xff;
993 break;
994
995 default:
996 byte_stream[ safe_byte_stream_index ] = 0x1a;
997 break;
998 }
999 safe_byte_stream_index += 1;
1000
1001 break;
1002
1003 case LIBUNA_CODEPAGE_ISO_8859_4:
1004 if( unicode_character < 0x00a0 )
1005 {
1006 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1007 }
1008 else if( ( unicode_character >= 0x00a0 )
1009 && ( unicode_character < 0x0158 ) )
1010 {
1011 unicode_character -= 0x00a0;
1012
1013 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_4_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1014 }
1015 else if( ( unicode_character >= 0x0160 )
1016 && ( unicode_character < 0x0180 ) )
1017 {
1018 unicode_character -= 0x0160;
1019
1020 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_4_unicode_to_byte_stream_base_0x0160[ unicode_character ];
1021 }
1022 else switch( unicode_character )
1023 {
1024 case 0x02c7:
1025 byte_stream[ safe_byte_stream_index ] = 0xb7;
1026 break;
1027
1028 case 0x02d9:
1029 byte_stream[ safe_byte_stream_index ] = 0xff;
1030 break;
1031
1032 case 0x02db:
1033 byte_stream[ safe_byte_stream_index ] = 0xb2;
1034 break;
1035
1036 default:
1037 byte_stream[ safe_byte_stream_index ] = 0x1a;
1038 break;
1039 }
1040 safe_byte_stream_index += 1;
1041
1042 break;
1043
1044 case LIBUNA_CODEPAGE_ISO_8859_5:
1045 if( unicode_character < 0x00a1 )
1046 {
1047 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1048 }
1049 else if( ( unicode_character >= 0x0400 )
1050 && ( unicode_character < 0x0460 ) )
1051 {
1052 unicode_character -= 0x0400;
1053
1054 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_5_unicode_to_byte_stream_base_0x0400[ unicode_character ];
1055 }
1056 else switch( unicode_character )
1057 {
1058 case 0x00a7:
1059 byte_stream[ safe_byte_stream_index ] = 0xfd;
1060 break;
1061
1062 case 0x00ad:
1063 byte_stream[ safe_byte_stream_index ] = 0xad;
1064 break;
1065
1066 case 0x2116:
1067 byte_stream[ safe_byte_stream_index ] = 0xf0;
1068 break;
1069
1070 default:
1071 byte_stream[ safe_byte_stream_index ] = 0x1a;
1072 break;
1073 }
1074 safe_byte_stream_index += 1;
1075
1076 break;
1077
1078 case LIBUNA_CODEPAGE_ISO_8859_6:
1079 if( unicode_character < 0x00a1 )
1080 {
1081 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1082 }
1083 else if( ( unicode_character >= 0x0618 )
1084 && ( unicode_character < 0x658 ) )
1085 {
1086 unicode_character -= 0x0618;
1087
1088 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_6_unicode_to_byte_stream_base_0x0618[ unicode_character ];
1089 }
1090 else switch( unicode_character )
1091 {
1092 case 0x00a4:
1093 byte_stream[ safe_byte_stream_index ] = 0xa4;
1094 break;
1095
1096 case 0x00ad:
1097 byte_stream[ safe_byte_stream_index ] = 0xad;
1098 break;
1099
1100 case 0x060c:
1101 byte_stream[ safe_byte_stream_index ] = 0xac;
1102 break;
1103
1104 default:
1105 byte_stream[ safe_byte_stream_index ] = 0x1a;
1106 break;
1107 }
1108 safe_byte_stream_index += 1;
1109
1110 break;
1111
1112 case LIBUNA_CODEPAGE_ISO_8859_7:
1113 if( unicode_character < 0x00a0 )
1114 {
1115 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1116 }
1117 else if( ( unicode_character >= 0x00a0 )
1118 && ( unicode_character < 0x00b8 ) )
1119 {
1120 unicode_character -= 0x00a0;
1121
1122 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_7_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1123 }
1124 else if( ( unicode_character >= 0x0380 )
1125 && ( unicode_character < 0x03d0 ) )
1126 {
1127 unicode_character -= 0x0380;
1128
1129 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_7_unicode_to_byte_stream_base_0x0380[ unicode_character ];
1130 }
1131 else switch( unicode_character )
1132 {
1133 case 0x00bb:
1134 byte_stream[ safe_byte_stream_index ] = 0xbb;
1135 break;
1136
1137 case 0x00bd:
1138 byte_stream[ safe_byte_stream_index ] = 0xbd;
1139 break;
1140
1141 case 0x037a:
1142 byte_stream[ safe_byte_stream_index ] = 0xaa;
1143 break;
1144
1145 case 0x2015:
1146 byte_stream[ safe_byte_stream_index ] = 0xaf;
1147 break;
1148
1149 case 0x2018:
1150 byte_stream[ safe_byte_stream_index ] = 0xa1;
1151 break;
1152
1153 case 0x2019:
1154 byte_stream[ safe_byte_stream_index ] = 0xa2;
1155 break;
1156
1157 case 0x20ac:
1158 byte_stream[ safe_byte_stream_index ] = 0xa4;
1159 break;
1160
1161 case 0x20af:
1162 byte_stream[ safe_byte_stream_index ] = 0xa5;
1163 break;
1164
1165 default:
1166 byte_stream[ safe_byte_stream_index ] = 0x1a;
1167 break;
1168 }
1169 safe_byte_stream_index += 1;
1170
1171 break;
1172
1173 case LIBUNA_CODEPAGE_ISO_8859_8:
1174 if( unicode_character < 0x00a0 )
1175 {
1176 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1177 }
1178 else if( ( unicode_character >= 0x00a0 )
1179 && ( unicode_character < 0x00c0 ) )
1180 {
1181 unicode_character -= 0x00a0;
1182
1183 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_8_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1184 }
1185 else if( ( unicode_character >= 0x05d0 )
1186 && ( unicode_character < 0x05f0 ) )
1187 {
1188 unicode_character -= 0x05d0;
1189
1190 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_8_unicode_to_byte_stream_base_0x05d0[ unicode_character ];
1191 }
1192 else switch( unicode_character )
1193 {
1194 case 0x00d7:
1195 byte_stream[ safe_byte_stream_index ] = 0xaa;
1196 break;
1197
1198 case 0x00f7:
1199 byte_stream[ safe_byte_stream_index ] = 0xba;
1200 break;
1201
1202 case 0x200e:
1203 byte_stream[ safe_byte_stream_index ] = 0xfd;
1204 break;
1205
1206 case 0x200f:
1207 byte_stream[ safe_byte_stream_index ] = 0xfe;
1208 break;
1209
1210 case 0x2017:
1211 byte_stream[ safe_byte_stream_index ] = 0xdf;
1212 break;
1213
1214 default:
1215 byte_stream[ safe_byte_stream_index ] = 0x1a;
1216 break;
1217 }
1218 safe_byte_stream_index += 1;
1219
1220 break;
1221
1222 case LIBUNA_CODEPAGE_ISO_8859_9:
1223 if( unicode_character < 0x00d0 )
1224 {
1225 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1226 }
1227 else if( ( unicode_character >= 0x00d0 )
1228 && ( unicode_character < 0x0100 ) )
1229 {
1230 unicode_character -= 0x00d0;
1231
1232 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_9_unicode_to_byte_stream_base_0x00d0[ unicode_character ];
1233 }
1234 else switch( unicode_character )
1235 {
1236 case 0x011e:
1237 byte_stream[ safe_byte_stream_index ] = 0xd0;
1238 break;
1239
1240 case 0x011f:
1241 byte_stream[ safe_byte_stream_index ] = 0xf0;
1242 break;
1243
1244 case 0x0130:
1245 byte_stream[ safe_byte_stream_index ] = 0xdd;
1246 break;
1247
1248 case 0x0131:
1249 byte_stream[ safe_byte_stream_index ] = 0xfd;
1250 break;
1251
1252 case 0x015e:
1253 byte_stream[ safe_byte_stream_index ] = 0xde;
1254 break;
1255
1256 case 0x015f:
1257 byte_stream[ safe_byte_stream_index ] = 0xfe;
1258 break;
1259
1260 default:
1261 byte_stream[ safe_byte_stream_index ] = 0x1a;
1262 break;
1263 }
1264 safe_byte_stream_index += 1;
1265
1266 break;
1267
1268 case LIBUNA_CODEPAGE_ISO_8859_10:
1269 if( unicode_character < 0x00a1 )
1270 {
1271 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1272 }
1273 else if( ( unicode_character >= 0x00c0 )
1274 && ( unicode_character < 0x0150 ) )
1275 {
1276 unicode_character -= 0x00c0;
1277
1278 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_10_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
1279 }
1280 else if( ( unicode_character >= 0x0160 )
1281 && ( unicode_character < 0x0170 ) )
1282 {
1283 unicode_character -= 0x0160;
1284
1285 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_10_unicode_to_byte_stream_base_0x0160[ unicode_character ];
1286 }
1287 else switch( unicode_character )
1288 {
1289 case 0x00a7:
1290 byte_stream[ safe_byte_stream_index ] = 0xa7;
1291 break;
1292
1293 case 0x00ad:
1294 byte_stream[ safe_byte_stream_index ] = 0xad;
1295 break;
1296
1297 case 0x00b0:
1298 byte_stream[ safe_byte_stream_index ] = 0xb0;
1299 break;
1300
1301 case 0x00b7:
1302 byte_stream[ safe_byte_stream_index ] = 0xb7;
1303 break;
1304
1305 case 0x0172:
1306 byte_stream[ safe_byte_stream_index ] = 0xd9;
1307 break;
1308
1309 case 0x0173:
1310 byte_stream[ safe_byte_stream_index ] = 0xf9;
1311 break;
1312
1313 case 0x017d:
1314 byte_stream[ safe_byte_stream_index ] = 0xac;
1315 break;
1316
1317 case 0x017e:
1318 byte_stream[ safe_byte_stream_index ] = 0xbc;
1319 break;
1320
1321 case 0x2015:
1322 byte_stream[ safe_byte_stream_index ] = 0xbd;
1323 break;
1324
1325 default:
1326 byte_stream[ safe_byte_stream_index ] = 0x1a;
1327 break;
1328 }
1329 safe_byte_stream_index += 1;
1330
1331 break;
1332
1333 case LIBUNA_CODEPAGE_ISO_8859_11:
1334 if( unicode_character < 0x00a1 )
1335 {
1336 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1337 }
1338 else if( ( unicode_character >= 0x0e01 )
1339 && ( unicode_character < 0x0e3b ) )
1340 {
1341 byte_stream[ safe_byte_stream_index ] = (uint8_t) ( unicode_character - 0x0d60 );
1342 }
1343 else if( ( unicode_character >= 0x0e3f )
1344 && ( unicode_character < 0x0e5c ) )
1345 {
1346 byte_stream[ safe_byte_stream_index ] = (uint8_t) ( unicode_character - 0x0d60 );
1347 }
1348 else
1349 {
1350 byte_stream[ safe_byte_stream_index ] = 0x1a;
1351 }
1352 safe_byte_stream_index += 1;
1353
1354 break;
1355
1356 case LIBUNA_CODEPAGE_ISO_8859_13:
1357 if( unicode_character < 0x00a0 )
1358 {
1359 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1360 }
1361 else if( ( unicode_character >= 0x00a0 )
1362 && ( unicode_character < 0x0180 ) )
1363 {
1364 unicode_character -= 0x00a0;
1365
1366 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_13_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1367 }
1368 else if( ( unicode_character >= 0x2018 )
1369 && ( unicode_character < 0x2020 ) )
1370 {
1371 unicode_character -= 0x2018;
1372
1373 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_13_unicode_to_byte_stream_base_0x2018[ unicode_character ];
1374 }
1375 else
1376 {
1377 byte_stream[ safe_byte_stream_index ] = 0x1a;
1378 }
1379 safe_byte_stream_index += 1;
1380
1381 break;
1382
1383 case LIBUNA_CODEPAGE_ISO_8859_14:
1384 if( unicode_character < 0x00a1 )
1385 {
1386 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1387 }
1388 else if( ( unicode_character >= 0x00c0 )
1389 && ( unicode_character < 0x0100 ) )
1390 {
1391 unicode_character -= 0x00c0;
1392
1393 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
1394 }
1395 else if( ( unicode_character >= 0x0170 )
1396 && ( unicode_character < 0x0178 ) )
1397 {
1398 unicode_character -= 0x0170;
1399
1400 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x0170[ unicode_character ];
1401 }
1402 else if( ( unicode_character >= 0x1e80 )
1403 && ( unicode_character < 0x1e88 ) )
1404 {
1405 unicode_character -= 0x1e80;
1406
1407 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x1e80[ unicode_character ];
1408 }
1409 else switch( unicode_character )
1410 {
1411 case 0x00a3:
1412 byte_stream[ safe_byte_stream_index ] = 0xa3;
1413 break;
1414
1415 case 0x00a7:
1416 byte_stream[ safe_byte_stream_index ] = 0xa7;
1417 break;
1418
1419 case 0x00a9:
1420 byte_stream[ safe_byte_stream_index ] = 0xa9;
1421 break;
1422
1423 case 0x00ad:
1424 byte_stream[ safe_byte_stream_index ] = 0xad;
1425 break;
1426
1427 case 0x00ae:
1428 byte_stream[ safe_byte_stream_index ] = 0xae;
1429 break;
1430
1431 case 0x00b6:
1432 byte_stream[ safe_byte_stream_index ] = 0xb6;
1433 break;
1434
1435 case 0x010a:
1436 byte_stream[ safe_byte_stream_index ] = 0xa4;
1437 break;
1438
1439 case 0x010b:
1440 byte_stream[ safe_byte_stream_index ] = 0xa5;
1441 break;
1442
1443 case 0x0120:
1444 byte_stream[ safe_byte_stream_index ] = 0xb2;
1445 break;
1446
1447 case 0x0121:
1448 byte_stream[ safe_byte_stream_index ] = 0xb3;
1449 break;
1450
1451 case 0x0178:
1452 byte_stream[ safe_byte_stream_index ] = 0xaf;
1453 break;
1454
1455 case 0x1e02:
1456 byte_stream[ safe_byte_stream_index ] = 0xa1;
1457 break;
1458
1459 case 0x1e03:
1460 byte_stream[ safe_byte_stream_index ] = 0xa2;
1461 break;
1462
1463 case 0x1e0a:
1464 byte_stream[ safe_byte_stream_index ] = 0xa6;
1465 break;
1466
1467 case 0x1e0b:
1468 byte_stream[ safe_byte_stream_index ] = 0xab;
1469 break;
1470
1471 case 0x1e1e:
1472 byte_stream[ safe_byte_stream_index ] = 0xb0;
1473 break;
1474
1475 case 0x1e1f:
1476 byte_stream[ safe_byte_stream_index ] = 0xb1;
1477 break;
1478
1479 case 0x1e40:
1480 byte_stream[ safe_byte_stream_index ] = 0xb4;
1481 break;
1482
1483 case 0x1e41:
1484 byte_stream[ safe_byte_stream_index ] = 0xb5;
1485 break;
1486
1487 case 0x1e56:
1488 byte_stream[ safe_byte_stream_index ] = 0xb7;
1489 break;
1490
1491 case 0x1e57:
1492 byte_stream[ safe_byte_stream_index ] = 0xb9;
1493 break;
1494
1495 case 0x1e60:
1496 byte_stream[ safe_byte_stream_index ] = 0xbb;
1497 break;
1498
1499 case 0x1e61:
1500 byte_stream[ safe_byte_stream_index ] = 0xbf;
1501 break;
1502
1503 case 0x1e6a:
1504 byte_stream[ safe_byte_stream_index ] = 0xd7;
1505 break;
1506
1507 case 0x1e6b:
1508 byte_stream[ safe_byte_stream_index ] = 0xf7;
1509 break;
1510
1511 case 0x1ef2:
1512 byte_stream[ safe_byte_stream_index ] = 0xac;
1513 break;
1514
1515 case 0x1ef3:
1516 byte_stream[ safe_byte_stream_index ] = 0xbc;
1517 break;
1518
1519 default:
1520 byte_stream[ safe_byte_stream_index ] = 0x1a;
1521 break;
1522 }
1523 safe_byte_stream_index += 1;
1524
1525 break;
1526
1527 case LIBUNA_CODEPAGE_ISO_8859_15:
1528 if( unicode_character < 0x00a0 )
1529 {
1530 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1531 }
1532 else if( ( unicode_character >= 0x00a0 )
1533 && ( unicode_character < 0x00c0 ) )
1534 {
1535 unicode_character -= 0x00a0;
1536
1537 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_15_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1538 }
1539 else if( unicode_character < 0x0100 )
1540 {
1541 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1542 }
1543 else switch( unicode_character )
1544 {
1545 case 0x0152:
1546 byte_stream[ safe_byte_stream_index ] = 0xbc;
1547 break;
1548
1549 case 0x0153:
1550 byte_stream[ safe_byte_stream_index ] = 0xbd;
1551 break;
1552
1553 case 0x0160:
1554 byte_stream[ safe_byte_stream_index ] = 0xa6;
1555 break;
1556
1557 case 0x0161:
1558 byte_stream[ safe_byte_stream_index ] = 0xa8;
1559 break;
1560
1561 case 0x0178:
1562 byte_stream[ safe_byte_stream_index ] = 0xbe;
1563 break;
1564
1565 case 0x017d:
1566 byte_stream[ safe_byte_stream_index ] = 0xb4;
1567 break;
1568
1569 case 0x017e:
1570 byte_stream[ safe_byte_stream_index ] = 0xb8;
1571 break;
1572
1573 case 0x20ac:
1574 byte_stream[ safe_byte_stream_index ] = 0xa4;
1575 break;
1576
1577 default:
1578 byte_stream[ safe_byte_stream_index ] = 0x1a;
1579 break;
1580 }
1581 safe_byte_stream_index += 1;
1582
1583 break;
1584
1585 case LIBUNA_CODEPAGE_ISO_8859_16:
1586 if( unicode_character < 0x00a1 )
1587 {
1588 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1589 }
1590 else if( ( unicode_character >= 0x00a8 )
1591 && ( unicode_character < 0x0108 ) )
1592 {
1593 unicode_character -= 0x00a8;
1594
1595 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x00a8[ unicode_character ];
1596 }
1597 else if( ( unicode_character >= 0x0140 )
1598 && ( unicode_character < 0x0148 ) )
1599 {
1600 unicode_character -= 0x0140;
1601
1602 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0140[ unicode_character ];
1603 }
1604 else if( ( unicode_character >= 0x0150 )
1605 && ( unicode_character < 0x0158 ) )
1606 {
1607 unicode_character -= 0x0150;
1608
1609 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0150[ unicode_character ];
1610 }
1611 else if( ( unicode_character >= 0x0178 )
1612 && ( unicode_character < 0x0180 ) )
1613 {
1614 unicode_character -= 0x0178;
1615
1616 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0178[ unicode_character ];
1617 }
1618 else if( ( unicode_character >= 0x0218 )
1619 && ( unicode_character < 0x0220 ) )
1620 {
1621 unicode_character -= 0x0218;
1622
1623 byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0218[ unicode_character ];
1624 }
1625 else switch( unicode_character )
1626 {
1627 case 0x00a7:
1628 byte_stream[ safe_byte_stream_index ] = 0xa7;
1629 break;
1630
1631 case 0x010c:
1632 byte_stream[ safe_byte_stream_index ] = 0xb2;
1633 break;
1634
1635 case 0x010d:
1636 byte_stream[ safe_byte_stream_index ] = 0xb9;
1637 break;
1638
1639 case 0x0110:
1640 byte_stream[ safe_byte_stream_index ] = 0xd0;
1641 break;
1642
1643 case 0x0111:
1644 byte_stream[ safe_byte_stream_index ] = 0xf0;
1645 break;
1646
1647 case 0x0118:
1648 byte_stream[ safe_byte_stream_index ] = 0xdd;
1649 break;
1650
1651 case 0x0119:
1652 byte_stream[ safe_byte_stream_index ] = 0xfd;
1653 break;
1654
1655 case 0x015a:
1656 byte_stream[ safe_byte_stream_index ] = 0xd7;
1657 break;
1658
1659 case 0x015b:
1660 byte_stream[ safe_byte_stream_index ] = 0xf7;
1661 break;
1662
1663 case 0x0160:
1664 byte_stream[ safe_byte_stream_index ] = 0xa6;
1665 break;
1666
1667 case 0x0161:
1668 byte_stream[ safe_byte_stream_index ] = 0xa8;
1669 break;
1670
1671 case 0x0170:
1672 byte_stream[ safe_byte_stream_index ] = 0xd8;
1673 break;
1674
1675 case 0x0171:
1676 byte_stream[ safe_byte_stream_index ] = 0xf8;
1677 break;
1678
1679 case 0x201d:
1680 byte_stream[ safe_byte_stream_index ] = 0xb5;
1681 break;
1682
1683 case 0x201e:
1684 byte_stream[ safe_byte_stream_index ] = 0xa5;
1685 break;
1686
1687 case 0x20ac:
1688 byte_stream[ safe_byte_stream_index ] = 0xa4;
1689 break;
1690
1691 default:
1692 byte_stream[ safe_byte_stream_index ] = 0x1a;
1693 break;
1694 }
1695 safe_byte_stream_index += 1;
1696
1697 break;
1698
1699 case LIBUNA_CODEPAGE_KOI8_R:
1700 if( unicode_character < 0x0080 )
1701 {
1702 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1703 }
1704 else if( ( unicode_character >= 0x0410 )
1705 && ( unicode_character < 0x0450 ) )
1706 {
1707 unicode_character -= 0x0410;
1708
1709 byte_stream[ safe_byte_stream_index ] = libuna_codepage_koi8_r_unicode_to_byte_stream_base_0x0410[ unicode_character ];
1710 }
1711 else if( ( unicode_character >= 0x2550 )
1712 && ( unicode_character < 0x2570 ) )
1713 {
1714 unicode_character -= 0x2550;
1715
1716 byte_stream[ safe_byte_stream_index ] = libuna_codepage_koi8_r_unicode_to_byte_stream_base_0x2550[ unicode_character ];
1717 }
1718 else switch( unicode_character )
1719 {
1720 case 0x00a0:
1721 byte_stream[ safe_byte_stream_index ] = 0x9a;
1722 break;
1723
1724 case 0x00a9:
1725 byte_stream[ safe_byte_stream_index ] = 0xbf;
1726 break;
1727
1728 case 0x00b0:
1729 byte_stream[ safe_byte_stream_index ] = 0x9c;
1730 break;
1731
1732 case 0x00b2:
1733 byte_stream[ safe_byte_stream_index ] = 0x9d;
1734 break;
1735
1736 case 0x00b7:
1737 byte_stream[ safe_byte_stream_index ] = 0x9e;
1738 break;
1739
1740 case 0x00f7:
1741 byte_stream[ safe_byte_stream_index ] = 0x9f;
1742 break;
1743
1744 case 0x0401:
1745 byte_stream[ safe_byte_stream_index ] = 0xb3;
1746 break;
1747
1748 case 0x0451:
1749 byte_stream[ safe_byte_stream_index ] = 0xa3;
1750 break;
1751
1752 case 0x2219:
1753 byte_stream[ safe_byte_stream_index ] = 0x95;
1754 break;
1755
1756 case 0x221a:
1757 byte_stream[ safe_byte_stream_index ] = 0x96;
1758 break;
1759
1760 case 0x2248:
1761 byte_stream[ safe_byte_stream_index ] = 0x97;
1762 break;
1763
1764 case 0x2264:
1765 byte_stream[ safe_byte_stream_index ] = 0x98;
1766 break;
1767
1768 case 0x2265:
1769 byte_stream[ safe_byte_stream_index ] = 0x99;
1770 break;
1771
1772 case 0x2320:
1773 byte_stream[ safe_byte_stream_index ] = 0x93;
1774 break;
1775
1776 case 0x2321:
1777 byte_stream[ safe_byte_stream_index ] = 0x9b;
1778 break;
1779
1780 case 0x2500:
1781 byte_stream[ safe_byte_stream_index ] = 0x80;
1782 break;
1783
1784 case 0x2502:
1785 byte_stream[ safe_byte_stream_index ] = 0x81;
1786 break;
1787
1788 case 0x250c:
1789 byte_stream[ safe_byte_stream_index ] = 0x82;
1790 break;
1791
1792 case 0x2510:
1793 byte_stream[ safe_byte_stream_index ] = 0x83;
1794 break;
1795
1796 case 0x2514:
1797 byte_stream[ safe_byte_stream_index ] = 0x84;
1798 break;
1799
1800 case 0x2518:
1801 byte_stream[ safe_byte_stream_index ] = 0x85;
1802 break;
1803
1804 case 0x251c:
1805 byte_stream[ safe_byte_stream_index ] = 0x86;
1806 break;
1807
1808 case 0x2524:
1809 byte_stream[ safe_byte_stream_index ] = 0x87;
1810 break;
1811
1812 case 0x252c:
1813 byte_stream[ safe_byte_stream_index ] = 0x88;
1814 break;
1815
1816 case 0x2534:
1817 byte_stream[ safe_byte_stream_index ] = 0x89;
1818 break;
1819
1820 case 0x253c:
1821 byte_stream[ safe_byte_stream_index ] = 0x8a;
1822 break;
1823
1824 case 0x2580:
1825 byte_stream[ safe_byte_stream_index ] = 0x8b;
1826 break;
1827
1828 case 0x2584:
1829 byte_stream[ safe_byte_stream_index ] = 0x8c;
1830 break;
1831
1832 case 0x2588:
1833 byte_stream[ safe_byte_stream_index ] = 0x8d;
1834 break;
1835
1836 case 0x258c:
1837 byte_stream[ safe_byte_stream_index ] = 0x8e;
1838 break;
1839
1840 case 0x2590:
1841 byte_stream[ safe_byte_stream_index ] = 0x8f;
1842 break;
1843
1844 case 0x2591:
1845 byte_stream[ safe_byte_stream_index ] = 0x90;
1846 break;
1847
1848 case 0x2592:
1849 byte_stream[ safe_byte_stream_index ] = 0x91;
1850 break;
1851
1852 case 0x2593:
1853 byte_stream[ safe_byte_stream_index ] = 0x92;
1854 break;
1855
1856 case 0x25a0:
1857 byte_stream[ safe_byte_stream_index ] = 0x94;
1858 break;
1859
1860 default:
1861 byte_stream[ safe_byte_stream_index ] = 0x1a;
1862 break;
1863 }
1864 safe_byte_stream_index += 1;
1865
1866 break;
1867
1868 case LIBUNA_CODEPAGE_KOI8_U:
1869 if( unicode_character < 0x0080 )
1870 {
1871 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1872 }
1873 else if( ( unicode_character >= 0x0410 )
1874 && ( unicode_character < 0x0450 ) )
1875 {
1876 unicode_character -= 0x0410;
1877
1878 byte_stream[ safe_byte_stream_index ] = libuna_codepage_koi8_u_unicode_to_byte_stream_base_0x0410[ unicode_character ];
1879 }
1880 else if( ( unicode_character >= 0x2550 )
1881 && ( unicode_character < 0x2570 ) )
1882 {
1883 unicode_character -= 0x2550;
1884
1885 byte_stream[ safe_byte_stream_index ] = libuna_codepage_koi8_u_unicode_to_byte_stream_base_0x2550[ unicode_character ];
1886 }
1887 else switch( unicode_character )
1888 {
1889 case 0x00a0:
1890 byte_stream[ safe_byte_stream_index ] = 0x9a;
1891 break;
1892
1893 case 0x00a9:
1894 byte_stream[ safe_byte_stream_index ] = 0xbf;
1895 break;
1896
1897 case 0x00b0:
1898 byte_stream[ safe_byte_stream_index ] = 0x9c;
1899 break;
1900
1901 case 0x00b2:
1902 byte_stream[ safe_byte_stream_index ] = 0x9d;
1903 break;
1904
1905 case 0x00b7:
1906 byte_stream[ safe_byte_stream_index ] = 0x9e;
1907 break;
1908
1909 case 0x00f7:
1910 byte_stream[ safe_byte_stream_index ] = 0x9f;
1911 break;
1912
1913 case 0x0401:
1914 byte_stream[ safe_byte_stream_index ] = 0xb3;
1915 break;
1916
1917 case 0x0404:
1918 byte_stream[ safe_byte_stream_index ] = 0xb4;
1919 break;
1920
1921 case 0x0406:
1922 byte_stream[ safe_byte_stream_index ] = 0xb6;
1923 break;
1924
1925 case 0x0407:
1926 byte_stream[ safe_byte_stream_index ] = 0xb7;
1927 break;
1928
1929 case 0x0451:
1930 byte_stream[ safe_byte_stream_index ] = 0xa3;
1931 break;
1932
1933 case 0x0454:
1934 byte_stream[ safe_byte_stream_index ] = 0xa4;
1935 break;
1936
1937 case 0x0456:
1938 byte_stream[ safe_byte_stream_index ] = 0xa6;
1939 break;
1940
1941 case 0x0457:
1942 byte_stream[ safe_byte_stream_index ] = 0xa7;
1943 break;
1944
1945 case 0x0490:
1946 byte_stream[ safe_byte_stream_index ] = 0xbd;
1947 break;
1948
1949 case 0x0491:
1950 byte_stream[ safe_byte_stream_index ] = 0xad;
1951 break;
1952
1953 case 0x2219:
1954 byte_stream[ safe_byte_stream_index ] = 0x95;
1955 break;
1956
1957 case 0x221a:
1958 byte_stream[ safe_byte_stream_index ] = 0x96;
1959 break;
1960
1961 case 0x2248:
1962 byte_stream[ safe_byte_stream_index ] = 0x97;
1963 break;
1964
1965 case 0x2264:
1966 byte_stream[ safe_byte_stream_index ] = 0x98;
1967 break;
1968
1969 case 0x2265:
1970 byte_stream[ safe_byte_stream_index ] = 0x99;
1971 break;
1972
1973 case 0x2320:
1974 byte_stream[ safe_byte_stream_index ] = 0x93;
1975 break;
1976
1977 case 0x2321:
1978 byte_stream[ safe_byte_stream_index ] = 0x9b;
1979 break;
1980
1981 case 0x2500:
1982 byte_stream[ safe_byte_stream_index ] = 0x80;
1983 break;
1984
1985 case 0x2502:
1986 byte_stream[ safe_byte_stream_index ] = 0x81;
1987 break;
1988
1989 case 0x250c:
1990 byte_stream[ safe_byte_stream_index ] = 0x82;
1991 break;
1992
1993 case 0x2510:
1994 byte_stream[ safe_byte_stream_index ] = 0x83;
1995 break;
1996
1997 case 0x2514:
1998 byte_stream[ safe_byte_stream_index ] = 0x84;
1999 break;
2000
2001 case 0x2518:
2002 byte_stream[ safe_byte_stream_index ] = 0x85;
2003 break;
2004
2005 case 0x251c:
2006 byte_stream[ safe_byte_stream_index ] = 0x86;
2007 break;
2008
2009 case 0x2524:
2010 byte_stream[ safe_byte_stream_index ] = 0x87;
2011 break;
2012
2013 case 0x252c:
2014 byte_stream[ safe_byte_stream_index ] = 0x88;
2015 break;
2016
2017 case 0x2534:
2018 byte_stream[ safe_byte_stream_index ] = 0x89;
2019 break;
2020
2021 case 0x253c:
2022 byte_stream[ safe_byte_stream_index ] = 0x8a;
2023 break;
2024
2025 case 0x2580:
2026 byte_stream[ safe_byte_stream_index ] = 0x8b;
2027 break;
2028
2029 case 0x2584:
2030 byte_stream[ safe_byte_stream_index ] = 0x8c;
2031 break;
2032
2033 case 0x2588:
2034 byte_stream[ safe_byte_stream_index ] = 0x8d;
2035 break;
2036
2037 case 0x258c:
2038 byte_stream[ safe_byte_stream_index ] = 0x8e;
2039 break;
2040
2041 case 0x2590:
2042 byte_stream[ safe_byte_stream_index ] = 0x8f;
2043 break;
2044
2045 case 0x2591:
2046 byte_stream[ safe_byte_stream_index ] = 0x90;
2047 break;
2048
2049 case 0x2592:
2050 byte_stream[ safe_byte_stream_index ] = 0x91;
2051 break;
2052
2053 case 0x2593:
2054 byte_stream[ safe_byte_stream_index ] = 0x92;
2055 break;
2056
2057 case 0x25a0:
2058 byte_stream[ safe_byte_stream_index ] = 0x94;
2059 break;
2060
2061 default:
2062 byte_stream[ safe_byte_stream_index ] = 0x1a;
2063 break;
2064 }
2065 safe_byte_stream_index += 1;
2066
2067 break;
2068
2069 case LIBUNA_CODEPAGE_WINDOWS_874:
2070 if( ( unicode_character < 0x0080 )
2071 || ( unicode_character == 0x00a0 ) )
2072 {
2073 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2074 }
2075 else if( ( unicode_character >= 0x0e00 )
2076 && ( unicode_character < 0x0e60 ) )
2077 {
2078 unicode_character -= 0x0e00;
2079
2080 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_874_unicode_to_byte_stream_base_0x0e00[ unicode_character ];
2081 }
2082 else if( ( unicode_character >= 0x2018 )
2083 && ( unicode_character < 0x2020 ) )
2084 {
2085 unicode_character -= 0x2018;
2086
2087 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_874_unicode_to_byte_stream_base_0x2018[ unicode_character ];
2088 }
2089 else switch( unicode_character )
2090 {
2091 case 0x2013:
2092 byte_stream[ safe_byte_stream_index ] = 0x96;
2093 break;
2094
2095 case 0x2014:
2096 byte_stream[ safe_byte_stream_index ] = 0x97;
2097 break;
2098
2099 case 0x2022:
2100 byte_stream[ safe_byte_stream_index ] = 0x95;
2101 break;
2102
2103 case 0x2026:
2104 byte_stream[ safe_byte_stream_index ] = 0x85;
2105 break;
2106
2107 case 0x20ac:
2108 byte_stream[ safe_byte_stream_index ] = 0x80;
2109 break;
2110
2111 default:
2112 byte_stream[ safe_byte_stream_index ] = 0x1a;
2113 break;
2114 }
2115 safe_byte_stream_index += 1;
2116
2117 break;
2118
2119 case LIBUNA_CODEPAGE_WINDOWS_932:
2120 result = libuna_codepage_windows_932_copy_to_byte_stream(
2121 unicode_character,
2122 byte_stream,
2123 byte_stream_size,
2124 &safe_byte_stream_index,
2125 error );
2126 break;
2127
2128 case LIBUNA_CODEPAGE_WINDOWS_936:
2129 result = libuna_codepage_windows_936_copy_to_byte_stream(
2130 unicode_character,
2131 byte_stream,
2132 byte_stream_size,
2133 &safe_byte_stream_index,
2134 error );
2135 break;
2136
2137 case LIBUNA_CODEPAGE_WINDOWS_949:
2138 result = libuna_codepage_windows_949_copy_to_byte_stream(
2139 unicode_character,
2140 byte_stream,
2141 byte_stream_size,
2142 &safe_byte_stream_index,
2143 error );
2144 break;
2145
2146 case LIBUNA_CODEPAGE_WINDOWS_950:
2147 result = libuna_codepage_windows_950_copy_to_byte_stream(
2148 unicode_character,
2149 byte_stream,
2150 byte_stream_size,
2151 &safe_byte_stream_index,
2152 error );
2153 break;
2154
2155 case LIBUNA_CODEPAGE_WINDOWS_1250:
2156 if( unicode_character < 0x0080 )
2157 {
2158 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2159 }
2160 else if( ( unicode_character >= 0x00a0 )
2161 && ( unicode_character < 0x0120 ) )
2162 {
2163 unicode_character -= 0x00a0;
2164
2165 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1250_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2166 }
2167 else if( ( unicode_character >= 0x0138 )
2168 && ( unicode_character < 0x0180 ) )
2169 {
2170 unicode_character -= 0x0138;
2171
2172 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1250_unicode_to_byte_stream_base_0x0138[ unicode_character ];
2173 }
2174 else if( ( unicode_character >= 0x02d8 )
2175 && ( unicode_character < 0x02e0 ) )
2176 {
2177 unicode_character -= 0x02d8;
2178
2179 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1250_unicode_to_byte_stream_base_0x02d8[ unicode_character ];
2180 }
2181 else if( ( unicode_character >= 0x2010 )
2182 && ( unicode_character < 0x2028 ) )
2183 {
2184 unicode_character -= 0x2010;
2185
2186 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1250_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2187 }
2188 else if( ( unicode_character >= 0x2030 )
2189 && ( unicode_character < 0x2040 ) )
2190 {
2191 unicode_character -= 0x2030;
2192
2193 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1250_unicode_to_byte_stream_base_0x2030[ unicode_character ];
2194 }
2195 else switch( unicode_character )
2196 {
2197 case 0x02c7:
2198 byte_stream[ safe_byte_stream_index ] = 0xa1;
2199 break;
2200
2201 case 0x20ac:
2202 byte_stream[ safe_byte_stream_index ] = 0x80;
2203 break;
2204
2205 case 0x2122:
2206 byte_stream[ safe_byte_stream_index ] = 0x99;
2207 break;
2208
2209 default:
2210 byte_stream[ safe_byte_stream_index ] = 0x1a;
2211 break;
2212 }
2213 safe_byte_stream_index += 1;
2214
2215 break;
2216
2217 case LIBUNA_CODEPAGE_WINDOWS_1251:
2218 if( unicode_character < 0x0080 )
2219 {
2220 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2221 }
2222 else if( ( unicode_character >= 0x00a0 )
2223 && ( unicode_character < 0x00c0 ) )
2224 {
2225 unicode_character -= 0x00a0;
2226
2227 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1251_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2228 }
2229 else if( ( unicode_character >= 0x0400 )
2230 && ( unicode_character < 0x0460 ) )
2231 {
2232 unicode_character -= 0x0400;
2233
2234 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1251_unicode_to_byte_stream_base_0x0400[ unicode_character ];
2235 }
2236 else if( ( unicode_character >= 0x2010 )
2237 && ( unicode_character < 0x2028 ) )
2238 {
2239 unicode_character -= 0x2010;
2240
2241 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1251_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2242 }
2243 else switch( unicode_character )
2244 {
2245 case 0x0490:
2246 byte_stream[ safe_byte_stream_index ] = 0xa5;
2247 break;
2248
2249 case 0x0491:
2250 byte_stream[ safe_byte_stream_index ] = 0xb4;
2251 break;
2252
2253 case 0x2030:
2254 byte_stream[ safe_byte_stream_index ] = 0x89;
2255 break;
2256
2257 case 0x2039:
2258 byte_stream[ safe_byte_stream_index ] = 0x8b;
2259 break;
2260
2261 case 0x203a:
2262 byte_stream[ safe_byte_stream_index ] = 0x9b;
2263 break;
2264
2265 case 0x20ac:
2266 byte_stream[ safe_byte_stream_index ] = 0x88;
2267 break;
2268
2269 case 0x2116:
2270 byte_stream[ safe_byte_stream_index ] = 0xb9;
2271 break;
2272
2273 case 0x2122:
2274 byte_stream[ safe_byte_stream_index ] = 0x99;
2275 break;
2276
2277 default:
2278 byte_stream[ safe_byte_stream_index ] = 0x1a;
2279 break;
2280 }
2281 safe_byte_stream_index += 1;
2282
2283 break;
2284
2285 case LIBUNA_CODEPAGE_WINDOWS_1252:
2286 if( unicode_character < 0x0080 )
2287 {
2288 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2289 }
2290 else if( ( unicode_character >= 0x00a0 )
2291 && ( unicode_character < 0x0100 ) )
2292 {
2293 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2294 }
2295 else if( ( unicode_character >= 0x2010 )
2296 && ( unicode_character < 0x2028 ) )
2297 {
2298 unicode_character -= 0x2010;
2299
2300 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1252_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2301 }
2302 else switch( unicode_character )
2303 {
2304 case 0x0152:
2305 byte_stream[ safe_byte_stream_index ] = 0x8c;
2306 break;
2307
2308 case 0x0153:
2309 byte_stream[ safe_byte_stream_index ] = 0x9c;
2310 break;
2311
2312 case 0x0160:
2313 byte_stream[ safe_byte_stream_index ] = 0x8a;
2314 break;
2315
2316 case 0x0161:
2317 byte_stream[ safe_byte_stream_index ] = 0x9a;
2318 break;
2319
2320 case 0x0178:
2321 byte_stream[ safe_byte_stream_index ] = 0x9f;
2322 break;
2323
2324 case 0x017d:
2325 byte_stream[ safe_byte_stream_index ] = 0x8e;
2326 break;
2327
2328 case 0x017e:
2329 byte_stream[ safe_byte_stream_index ] = 0x9e;
2330 break;
2331
2332 case 0x0192:
2333 byte_stream[ safe_byte_stream_index ] = 0x83;
2334 break;
2335
2336 case 0x02c6:
2337 byte_stream[ safe_byte_stream_index ] = 0x88;
2338 break;
2339
2340 case 0x02dc:
2341 byte_stream[ safe_byte_stream_index ] = 0x98;
2342 break;
2343
2344 case 0x2030:
2345 byte_stream[ safe_byte_stream_index ] = 0x89;
2346 break;
2347
2348 case 0x2039:
2349 byte_stream[ safe_byte_stream_index ] = 0x8b;
2350 break;
2351
2352 case 0x203a:
2353 byte_stream[ safe_byte_stream_index ] = 0x9b;
2354 break;
2355
2356 case 0x20ac:
2357 byte_stream[ safe_byte_stream_index ] = 0x80;
2358 break;
2359
2360 case 0x2122:
2361 byte_stream[ safe_byte_stream_index ] = 0x99;
2362 break;
2363
2364 default:
2365 byte_stream[ safe_byte_stream_index ] = 0x1a;
2366 break;
2367 }
2368 safe_byte_stream_index += 1;
2369
2370 break;
2371
2372 case LIBUNA_CODEPAGE_WINDOWS_1253:
2373 if( unicode_character < 0x0080 )
2374 {
2375 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2376 }
2377 else if( ( unicode_character >= 0x00a0 )
2378 && ( unicode_character < 0x00c0 ) )
2379 {
2380 unicode_character -= 0x00a0;
2381
2382 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1253_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2383 }
2384 else if( ( unicode_character >= 0x0380 )
2385 && ( unicode_character < 0x03d0 ) )
2386 {
2387 unicode_character -= 0x0380;
2388
2389 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1253_unicode_to_byte_stream_base_0x0380[ unicode_character ];
2390 }
2391 else if( ( unicode_character >= 0x2010 )
2392 && ( unicode_character < 0x2028 ) )
2393 {
2394 unicode_character -= 0x2010;
2395
2396 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1253_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2397 }
2398 else switch( unicode_character )
2399 {
2400 case 0x0192:
2401 byte_stream[ safe_byte_stream_index ] = 0x83;
2402 break;
2403
2404 case 0x2030:
2405 byte_stream[ safe_byte_stream_index ] = 0x89;
2406 break;
2407
2408 case 0x2039:
2409 byte_stream[ safe_byte_stream_index ] = 0x8b;
2410 break;
2411
2412 case 0x203a:
2413 byte_stream[ safe_byte_stream_index ] = 0x9b;
2414 break;
2415
2416 case 0x20ac:
2417 byte_stream[ safe_byte_stream_index ] = 0x80;
2418 break;
2419
2420 case 0x2122:
2421 byte_stream[ safe_byte_stream_index ] = 0x99;
2422 break;
2423
2424 default:
2425 byte_stream[ safe_byte_stream_index ] = 0x1a;
2426 break;
2427 }
2428 safe_byte_stream_index += 1;
2429
2430 break;
2431
2432 case LIBUNA_CODEPAGE_WINDOWS_1254:
2433 if( unicode_character < 0x0080 )
2434 {
2435 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2436 }
2437 else if( ( unicode_character >= 0x00a0 )
2438 && ( unicode_character < 0x00d0 ) )
2439 {
2440 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2441 }
2442 else if( ( unicode_character >= 0x00d0 )
2443 && ( unicode_character < 0x0100 ) )
2444 {
2445 unicode_character -= 0x00d0;
2446
2447 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1254_unicode_to_byte_stream_base_0x00d0[ unicode_character ];
2448 }
2449 else if( ( unicode_character >= 0x2010 )
2450 && ( unicode_character < 0x2028 ) )
2451 {
2452 unicode_character -= 0x2010;
2453
2454 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1254_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2455 }
2456 else switch( unicode_character )
2457 {
2458 case 0x011e:
2459 byte_stream[ safe_byte_stream_index ] = 0xd0;
2460 break;
2461
2462 case 0x011f:
2463 byte_stream[ safe_byte_stream_index ] = 0xf0;
2464 break;
2465
2466 case 0x0130:
2467 byte_stream[ safe_byte_stream_index ] = 0xdd;
2468 break;
2469
2470 case 0x0131:
2471 byte_stream[ safe_byte_stream_index ] = 0xfd;
2472 break;
2473
2474 case 0x0152:
2475 byte_stream[ safe_byte_stream_index ] = 0x8c;
2476 break;
2477
2478 case 0x0153:
2479 byte_stream[ safe_byte_stream_index ] = 0x9c;
2480 break;
2481
2482 case 0x015e:
2483 byte_stream[ safe_byte_stream_index ] = 0xde;
2484 break;
2485
2486 case 0x015f:
2487 byte_stream[ safe_byte_stream_index ] = 0xfe;
2488 break;
2489
2490 case 0x0160:
2491 byte_stream[ safe_byte_stream_index ] = 0x8a;
2492 break;
2493
2494 case 0x0161:
2495 byte_stream[ safe_byte_stream_index ] = 0x9a;
2496 break;
2497
2498 case 0x0178:
2499 byte_stream[ safe_byte_stream_index ] = 0x9f;
2500 break;
2501
2502 case 0x0192:
2503 byte_stream[ safe_byte_stream_index ] = 0x83;
2504 break;
2505
2506 case 0x02c6:
2507 byte_stream[ safe_byte_stream_index ] = 0x88;
2508 break;
2509
2510 case 0x02dc:
2511 byte_stream[ safe_byte_stream_index ] = 0x98;
2512 break;
2513
2514 case 0x2030:
2515 byte_stream[ safe_byte_stream_index ] = 0x89;
2516 break;
2517
2518 case 0x2039:
2519 byte_stream[ safe_byte_stream_index ] = 0x8b;
2520 break;
2521
2522 case 0x203a:
2523 byte_stream[ safe_byte_stream_index ] = 0x9b;
2524 break;
2525
2526 case 0x20ac:
2527 byte_stream[ safe_byte_stream_index ] = 0x80;
2528 break;
2529
2530 case 0x2122:
2531 byte_stream[ safe_byte_stream_index ] = 0x99;
2532 break;
2533
2534 default:
2535 byte_stream[ safe_byte_stream_index ] = 0x1a;
2536 break;
2537 }
2538 safe_byte_stream_index += 1;
2539
2540 break;
2541
2542 case LIBUNA_CODEPAGE_WINDOWS_1255:
2543 if( unicode_character < 0x0080 )
2544 {
2545 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2546 }
2547 else if( ( unicode_character >= 0x00a0 )
2548 && ( unicode_character < 0x00c0 ) )
2549 {
2550 unicode_character -= 0x00a0;
2551
2552 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1255_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2553 }
2554 else if( ( unicode_character >= 0x05b0 )
2555 && ( unicode_character < 0x05c8 ) )
2556 {
2557 unicode_character -= 0x05b0;
2558
2559 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1255_unicode_to_byte_stream_base_0x05b0[ unicode_character ];
2560 }
2561 else if( ( unicode_character >= 0x05d0 )
2562 && ( unicode_character < 0x05f8 ) )
2563 {
2564 unicode_character -= 0x05d0;
2565
2566 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1255_unicode_to_byte_stream_base_0x05d0[ unicode_character ];
2567 }
2568 else if( ( unicode_character >= 0x2010 )
2569 && ( unicode_character < 0x2028 ) )
2570 {
2571 unicode_character -= 0x2010;
2572
2573 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1255_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2574 }
2575 else switch( unicode_character )
2576 {
2577 case 0x00d7:
2578 byte_stream[ safe_byte_stream_index ] = 0xaa;
2579 break;
2580
2581 case 0x00f7:
2582 byte_stream[ safe_byte_stream_index ] = 0xba;
2583 break;
2584
2585 case 0x0192:
2586 byte_stream[ safe_byte_stream_index ] = 0x83;
2587 break;
2588
2589 case 0x02c6:
2590 byte_stream[ safe_byte_stream_index ] = 0x88;
2591 break;
2592
2593 case 0x02dc:
2594 byte_stream[ safe_byte_stream_index ] = 0x98;
2595 break;
2596
2597 case 0x200e:
2598 byte_stream[ safe_byte_stream_index ] = 0xfd;
2599 break;
2600
2601 case 0x200f:
2602 byte_stream[ safe_byte_stream_index ] = 0xfe;
2603 break;
2604
2605 case 0x2030:
2606 byte_stream[ safe_byte_stream_index ] = 0x89;
2607 break;
2608
2609 case 0x2039:
2610 byte_stream[ safe_byte_stream_index ] = 0x8b;
2611 break;
2612
2613 case 0x203a:
2614 byte_stream[ safe_byte_stream_index ] = 0x9b;
2615 break;
2616
2617 case 0x20aa:
2618 byte_stream[ safe_byte_stream_index ] = 0xa4;
2619 break;
2620
2621 case 0x20ac:
2622 byte_stream[ safe_byte_stream_index ] = 0x80;
2623 break;
2624
2625 case 0x2122:
2626 byte_stream[ safe_byte_stream_index ] = 0x99;
2627 break;
2628
2629 default:
2630 byte_stream[ safe_byte_stream_index ] = 0x1a;
2631 break;
2632 }
2633 safe_byte_stream_index += 1;
2634
2635 break;
2636
2637 case LIBUNA_CODEPAGE_WINDOWS_1256:
2638 if( unicode_character < 0x0080 )
2639 {
2640 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2641 }
2642 else if( ( unicode_character >= 0x00a0 )
2643 && ( unicode_character < 0x00c0 ) )
2644 {
2645 unicode_character -= 0x00a0;
2646
2647 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1256_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2648 }
2649 else if( ( unicode_character >= 0x00e0 )
2650 && ( unicode_character < 0x0100 ) )
2651 {
2652 unicode_character -= 0x00e0;
2653
2654 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1256_unicode_to_byte_stream_base_0x00e0[ unicode_character ];
2655 }
2656 else if( ( unicode_character >= 0x0618 )
2657 && ( unicode_character < 0x0658 ) )
2658 {
2659 unicode_character -= 0x0618;
2660
2661 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1256_unicode_to_byte_stream_base_0x0618[ unicode_character ];
2662 }
2663 else if( ( unicode_character >= 0x2008 )
2664 && ( unicode_character < 0x2028 ) )
2665 {
2666 unicode_character -= 0x2008;
2667
2668 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1256_unicode_to_byte_stream_base_0x2008[ unicode_character ];
2669 }
2670 else switch( unicode_character )
2671 {
2672 case 0x00d7:
2673 byte_stream[ safe_byte_stream_index ] = 0xd7;
2674 break;
2675
2676 case 0x0152:
2677 byte_stream[ safe_byte_stream_index ] = 0x8c;
2678 break;
2679
2680 case 0x0153:
2681 byte_stream[ safe_byte_stream_index ] = 0x9c;
2682 break;
2683
2684 case 0x0192:
2685 byte_stream[ safe_byte_stream_index ] = 0x83;
2686 break;
2687
2688 case 0x02c6:
2689 byte_stream[ safe_byte_stream_index ] = 0x88;
2690 break;
2691
2692 case 0x060c:
2693 byte_stream[ safe_byte_stream_index ] = 0xa1;
2694 break;
2695
2696 case 0x0679:
2697 byte_stream[ safe_byte_stream_index ] = 0x8a;
2698 break;
2699
2700 case 0x067e:
2701 byte_stream[ safe_byte_stream_index ] = 0x81;
2702 break;
2703
2704 case 0x0686:
2705 byte_stream[ safe_byte_stream_index ] = 0x8d;
2706 break;
2707
2708 case 0x0688:
2709 byte_stream[ safe_byte_stream_index ] = 0x8f;
2710 break;
2711
2712 case 0x0691:
2713 byte_stream[ safe_byte_stream_index ] = 0x9a;
2714 break;
2715
2716 case 0x0698:
2717 byte_stream[ safe_byte_stream_index ] = 0x8e;
2718 break;
2719
2720 case 0x06a9:
2721 byte_stream[ safe_byte_stream_index ] = 0x98;
2722 break;
2723
2724 case 0x06af:
2725 byte_stream[ safe_byte_stream_index ] = 0x90;
2726 break;
2727
2728 case 0x06ba:
2729 byte_stream[ safe_byte_stream_index ] = 0x9f;
2730 break;
2731
2732 case 0x06be:
2733 byte_stream[ safe_byte_stream_index ] = 0xaa;
2734 break;
2735
2736 case 0x06c1:
2737 byte_stream[ safe_byte_stream_index ] = 0xc0;
2738 break;
2739
2740 case 0x06d2:
2741 byte_stream[ safe_byte_stream_index ] = 0xff;
2742 break;
2743
2744 case 0x2030:
2745 byte_stream[ safe_byte_stream_index ] = 0x89;
2746 break;
2747
2748 case 0x2039:
2749 byte_stream[ safe_byte_stream_index ] = 0x8b;
2750 break;
2751
2752 case 0x203a:
2753 byte_stream[ safe_byte_stream_index ] = 0x9b;
2754 break;
2755
2756 case 0x20ac:
2757 byte_stream[ safe_byte_stream_index ] = 0x80;
2758 break;
2759
2760 case 0x2122:
2761 byte_stream[ safe_byte_stream_index ] = 0x99;
2762 break;
2763
2764 default:
2765 byte_stream[ safe_byte_stream_index ] = 0x1a;
2766 break;
2767 }
2768 safe_byte_stream_index += 1;
2769
2770 break;
2771
2772 case LIBUNA_CODEPAGE_WINDOWS_1257:
2773 if( unicode_character < 0x0080 )
2774 {
2775 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2776 }
2777 else if( ( unicode_character >= 0x00a0 )
2778 && ( unicode_character < 0x0180 ) )
2779 {
2780 unicode_character -= 0x00a0;
2781
2782 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1257_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
2783 }
2784 else if( ( unicode_character >= 0x2010 )
2785 && ( unicode_character < 0x2028 ) )
2786 {
2787 unicode_character -= 0x2010;
2788
2789 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1257_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2790 }
2791 else switch( unicode_character )
2792 {
2793 case 0x02c7:
2794 byte_stream[ safe_byte_stream_index ] = 0x8e;
2795 break;
2796
2797 case 0x02d9:
2798 byte_stream[ safe_byte_stream_index ] = 0xff;
2799 break;
2800
2801 case 0x02db:
2802 byte_stream[ safe_byte_stream_index ] = 0x9e;
2803 break;
2804
2805 case 0x2030:
2806 byte_stream[ safe_byte_stream_index ] = 0x89;
2807 break;
2808
2809 case 0x2039:
2810 byte_stream[ safe_byte_stream_index ] = 0x8b;
2811 break;
2812
2813 case 0x203a:
2814 byte_stream[ safe_byte_stream_index ] = 0x9b;
2815 break;
2816
2817 case 0x20ac:
2818 byte_stream[ safe_byte_stream_index ] = 0x80;
2819 break;
2820
2821 case 0x2122:
2822 byte_stream[ safe_byte_stream_index ] = 0x99;
2823 break;
2824
2825 default:
2826 byte_stream[ safe_byte_stream_index ] = 0x1a;
2827 break;
2828 }
2829 safe_byte_stream_index += 1;
2830
2831 break;
2832
2833 case LIBUNA_CODEPAGE_WINDOWS_1258:
2834 if( unicode_character < 0x0080 )
2835 {
2836 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2837 }
2838 else if( ( unicode_character >= 0x00a0 )
2839 && ( unicode_character < 0x00c0 ) )
2840 {
2841 byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
2842 }
2843 else if( ( unicode_character >= 0x00c0 )
2844 && ( unicode_character < 0x0108 ) )
2845 {
2846 unicode_character -= 0x00c0;
2847
2848 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1258_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
2849 }
2850 else if( ( unicode_character >= 0x2010 )
2851 && ( unicode_character < 0x2028 ) )
2852 {
2853 unicode_character -= 0x2010;
2854
2855 byte_stream[ safe_byte_stream_index ] = libuna_codepage_windows_1258_unicode_to_byte_stream_base_0x2010[ unicode_character ];
2856 }
2857 else switch( unicode_character )
2858 {
2859 case 0x0110:
2860 byte_stream[ safe_byte_stream_index ] = 0xd0;
2861 break;
2862
2863 case 0x0111:
2864 byte_stream[ safe_byte_stream_index ] = 0xf0;
2865 break;
2866
2867 case 0x0152:
2868 byte_stream[ safe_byte_stream_index ] = 0x8c;
2869 break;
2870
2871 case 0x0153:
2872 byte_stream[ safe_byte_stream_index ] = 0x9c;
2873 break;
2874
2875 case 0x0178:
2876 byte_stream[ safe_byte_stream_index ] = 0x9f;
2877 break;
2878
2879 case 0x0192:
2880 byte_stream[ safe_byte_stream_index ] = 0x83;
2881 break;
2882
2883 case 0x01a0:
2884 byte_stream[ safe_byte_stream_index ] = 0xd5;
2885 break;
2886
2887 case 0x01a1:
2888 byte_stream[ safe_byte_stream_index ] = 0xf5;
2889 break;
2890
2891 case 0x01af:
2892 byte_stream[ safe_byte_stream_index ] = 0xdd;
2893 break;
2894
2895 case 0x01b0:
2896 byte_stream[ safe_byte_stream_index ] = 0xfd;
2897 break;
2898
2899 case 0x02c6:
2900 byte_stream[ safe_byte_stream_index ] = 0x88;
2901 break;
2902
2903 case 0x02dc:
2904 byte_stream[ safe_byte_stream_index ] = 0x98;
2905 break;
2906
2907 case 0x0300:
2908 byte_stream[ safe_byte_stream_index ] = 0xcc;
2909 break;
2910
2911 case 0x0301:
2912 byte_stream[ safe_byte_stream_index ] = 0xec;
2913 break;
2914
2915 case 0x0303:
2916 byte_stream[ safe_byte_stream_index ] = 0xde;
2917 break;
2918
2919 case 0x0309:
2920 byte_stream[ safe_byte_stream_index ] = 0xd2;
2921 break;
2922
2923 case 0x0323:
2924 byte_stream[ safe_byte_stream_index ] = 0xf2;
2925 break;
2926
2927 case 0x2030:
2928 byte_stream[ safe_byte_stream_index ] = 0x89;
2929 break;
2930
2931 case 0x2039:
2932 byte_stream[ safe_byte_stream_index ] = 0x8b;
2933 break;
2934
2935 case 0x203a:
2936 byte_stream[ safe_byte_stream_index ] = 0x9b;
2937 break;
2938
2939 case 0x20ab:
2940 byte_stream[ safe_byte_stream_index ] = 0xfe;
2941 break;
2942
2943 case 0x20ac:
2944 byte_stream[ safe_byte_stream_index ] = 0x80;
2945 break;
2946
2947 case 0x2122:
2948 byte_stream[ safe_byte_stream_index ] = 0x99;
2949 break;
2950
2951 default:
2952 byte_stream[ safe_byte_stream_index ] = 0x1a;
2953 break;
2954 }
2955 safe_byte_stream_index += 1;
2956
2957 break;
2958
2959 default:
2960 libcerror_error_set(
2961 error,
2962 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2963 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2964 "%s: unsupported codepage: %d.",
2965 function,
2966 codepage );
2967
2968 return( -1 );
2969 }
2970 if( result != 1 )
2971 {
2972 libcerror_error_set(
2973 error,
2974 LIBCERROR_ERROR_DOMAIN_RUNTIME,
2975 LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2976 "%s: unable to copy Unicode character to byte stream.",
2977 function );
2978
2979 return( -1 );
2980 }
2981 *byte_stream_index = safe_byte_stream_index;
2982
2983 return( 1 );
2984 }
2985
2986 /* Determines the size of an UTF-7 stream character from an Unicode character
2987 * Adds the size to the UTF-7 stream character size value
2988 * Returns 1 if successful or -1 on error
2989 */
libuna_unicode_character_size_to_utf7_stream(libuna_unicode_character_t unicode_character,size_t * utf7_stream_character_size,uint32_t * utf7_stream_base64_data,libcerror_error_t ** error)2990 int libuna_unicode_character_size_to_utf7_stream(
2991 libuna_unicode_character_t unicode_character,
2992 size_t *utf7_stream_character_size,
2993 uint32_t *utf7_stream_base64_data,
2994 libcerror_error_t **error )
2995 {
2996 static char *function = "libuna_unicode_character_size_to_utf7_stream";
2997 libuna_utf16_character_t utf16_surrogate = 0;
2998 size_t safe_utf7_stream_character_size = 0;
2999 uint32_t base64_triplet = 0;
3000 uint32_t safe_utf7_stream_base64_data = 0;
3001 uint8_t base64_encode_character = 0;
3002 uint8_t byte_bit_shift = 0;
3003 uint8_t current_byte = 0;
3004 uint8_t number_of_bytes = 0;
3005
3006 if( utf7_stream_character_size == NULL )
3007 {
3008 libcerror_error_set(
3009 error,
3010 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3011 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3012 "%s: invalid UTF-7 stream character size.",
3013 function );
3014
3015 return( -1 );
3016 }
3017 if( utf7_stream_base64_data == NULL )
3018 {
3019 libcerror_error_set(
3020 error,
3021 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3022 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3023 "%s: invalid UTF-7 stream base64 data.",
3024 function );
3025
3026 return( -1 );
3027 }
3028 safe_utf7_stream_character_size = *utf7_stream_character_size;
3029 safe_utf7_stream_base64_data = *utf7_stream_base64_data;
3030
3031 /* Determine if the Unicode character is valid
3032 */
3033 if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
3034 {
3035 unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
3036 }
3037 /* The + character must be escaped
3038 */
3039 if( unicode_character == (libuna_unicode_character_t) '+' )
3040 {
3041 }
3042 /* Allow for the end of string character
3043 */
3044 else if( unicode_character == 0 )
3045 {
3046 }
3047 else if( ( unicode_character >= 256 )
3048 || ( libuna_unicode_character_utf7_valid_directly_encoded_character[ (uint8_t) unicode_character ] == 0 ) )
3049 {
3050 base64_encode_character = 1;
3051 }
3052 if( base64_encode_character == 0 )
3053 {
3054 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3055 {
3056 safe_utf7_stream_base64_data = 0;
3057 }
3058 safe_utf7_stream_character_size += 1;
3059
3060 /* The + character must be escaped
3061 */
3062 if( unicode_character == (libuna_unicode_character_t) '+' )
3063 {
3064 safe_utf7_stream_character_size += 1;
3065 }
3066 }
3067 else
3068 {
3069 /* Escape the base64 encoded characters with a +
3070 */
3071 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3072 {
3073 safe_utf7_stream_character_size += 1;
3074 }
3075 /* Otherwise continue the previously base64 encoded characters
3076 */
3077 else
3078 {
3079 base64_triplet = safe_utf7_stream_base64_data & 0x00ffffff;
3080 number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
3081 current_byte = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
3082
3083 if( number_of_bytes > 0 )
3084 {
3085 if( safe_utf7_stream_character_size < (size_t) ( number_of_bytes + 1 ) )
3086 {
3087 libcerror_error_set(
3088 error,
3089 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3090 LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3091 "%s: invalid UTF-7 stream character size value out of bounds.",
3092 function );
3093
3094 return( -1 );
3095 }
3096 /* Correct the size for the last partial base64 stream
3097 */
3098 safe_utf7_stream_character_size -= number_of_bytes + 1;
3099 }
3100 if( safe_utf7_stream_character_size < 1 )
3101 {
3102 libcerror_error_set(
3103 error,
3104 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3105 LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3106 "%s: invalid UTF-7 stream character size value out of bounds.",
3107 function );
3108
3109 return( -1 );
3110 }
3111 /* Correct the size for the base64 stream termination character
3112 */
3113 safe_utf7_stream_character_size -= 1;
3114 }
3115 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3116
3117 if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
3118 {
3119 unicode_character -= 0x010000;
3120
3121 utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
3122
3123 byte_bit_shift = 16 - ( current_byte * 8 );
3124 base64_triplet += (uint32_t) ( ( utf16_surrogate >> 8 ) & 0xff ) << byte_bit_shift;
3125 current_byte += 1;
3126 number_of_bytes += 1;
3127
3128 if( number_of_bytes == 3 )
3129 {
3130 safe_utf7_stream_character_size += 4;
3131 number_of_bytes = 0;
3132 current_byte = 0;
3133 base64_triplet = 0;
3134 }
3135 byte_bit_shift = 16 - ( current_byte * 8 );
3136 base64_triplet += (uint32_t) ( utf16_surrogate & 0xff ) << byte_bit_shift;
3137 current_byte += 1;
3138 number_of_bytes += 1;
3139
3140 if( number_of_bytes == 3 )
3141 {
3142 safe_utf7_stream_character_size += 4;
3143 number_of_bytes = 0;
3144 current_byte = 0;
3145 base64_triplet = 0;
3146 }
3147 unicode_character = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
3148 }
3149 byte_bit_shift = 16 - ( current_byte * 8 );
3150 base64_triplet += (uint32_t) ( ( unicode_character >> 8 ) & 0xff ) << byte_bit_shift;
3151 current_byte += 1;
3152 number_of_bytes += 1;
3153
3154 if( number_of_bytes == 3 )
3155 {
3156 safe_utf7_stream_character_size += 4;
3157 number_of_bytes = 0;
3158 current_byte = 0;
3159 base64_triplet = 0;
3160 }
3161 byte_bit_shift = 16 - ( current_byte * 8 );
3162 base64_triplet += (uint32_t) ( unicode_character & 0xff ) << byte_bit_shift;
3163 current_byte += 1;
3164 number_of_bytes += 1;
3165
3166 if( number_of_bytes == 3 )
3167 {
3168 safe_utf7_stream_character_size += 4;
3169 number_of_bytes = 0;
3170 current_byte = 0;
3171 base64_triplet = 0;
3172 }
3173 /* Terminate the base64 encoded characters
3174 */
3175 if( number_of_bytes > 0 )
3176 {
3177 safe_utf7_stream_character_size += number_of_bytes + 1;
3178 }
3179 safe_utf7_stream_character_size += 1;
3180 }
3181 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3182 {
3183 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3184 safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3185 safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3186 safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3187 }
3188 *utf7_stream_character_size = safe_utf7_stream_character_size;
3189 *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3190
3191 return( 1 );
3192 }
3193
3194 /* Copies an Unicode character from an UTF-7 stream
3195 * The bits of the base64 data contain:
3196 * 0 - 23 the base64 triplet
3197 * 24 - 25 the number of bytes in the triplet
3198 * 26 - 27 unused
3199 * 28 - 29 the current byte
3200 * 30 unused
3201 * 31 flag to indicate the current UTF-7 characters are (modified) base64 encoded
3202 *
3203 * Returns 1 if successful or -1 on error
3204 */
libuna_unicode_character_copy_from_utf7_stream(libuna_unicode_character_t * unicode_character,const uint8_t * utf7_stream,size_t utf7_stream_size,size_t * utf7_stream_index,uint32_t * utf7_stream_base64_data,libcerror_error_t ** error)3205 int libuna_unicode_character_copy_from_utf7_stream(
3206 libuna_unicode_character_t *unicode_character,
3207 const uint8_t *utf7_stream,
3208 size_t utf7_stream_size,
3209 size_t *utf7_stream_index,
3210 uint32_t *utf7_stream_base64_data,
3211 libcerror_error_t **error )
3212 {
3213 static char *function = "libuna_unicode_character_copy_from_utf7_stream";
3214 libuna_unicode_character_t safe_unicode_character = 0;
3215 libuna_utf16_character_t utf16_surrogate = 0;
3216 size_t safe_utf7_stream_index = 0;
3217 uint32_t base64_triplet = 0;
3218 uint32_t safe_utf7_stream_base64_data = 0;
3219 uint8_t byte_bit_shift = 0;
3220 uint8_t current_byte = 0;
3221 uint8_t number_of_bytes = 0;
3222 uint8_t padding_size = 0;
3223 uint8_t utf7_character_value = 0;
3224
3225 if( unicode_character == NULL )
3226 {
3227 libcerror_error_set(
3228 error,
3229 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3230 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3231 "%s: invalid Unicode character.",
3232 function );
3233
3234 return( -1 );
3235 }
3236 if( utf7_stream == NULL )
3237 {
3238 libcerror_error_set(
3239 error,
3240 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3241 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3242 "%s: invalid UTF-7 stream.",
3243 function );
3244
3245 return( -1 );
3246 }
3247 if( utf7_stream_size > (size_t) SSIZE_MAX )
3248 {
3249 libcerror_error_set(
3250 error,
3251 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3252 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3253 "%s: invalid UTF-7 stream size value exceeds maximum.",
3254 function );
3255
3256 return( -1 );
3257 }
3258 if( utf7_stream_index == NULL )
3259 {
3260 libcerror_error_set(
3261 error,
3262 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3263 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3264 "%s: invalid UTF-7 stream index.",
3265 function );
3266
3267 return( -1 );
3268 }
3269 if( utf7_stream_base64_data == NULL )
3270 {
3271 libcerror_error_set(
3272 error,
3273 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3274 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3275 "%s: invalid UTF-7 base64 data.",
3276 function );
3277
3278 return( -1 );
3279 }
3280 safe_utf7_stream_index = *utf7_stream_index;
3281 safe_utf7_stream_base64_data = *utf7_stream_base64_data;
3282
3283 if( safe_utf7_stream_index >= utf7_stream_size )
3284 {
3285 libcerror_error_set(
3286 error,
3287 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3288 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3289 "%s: UTF-7 stream too small.",
3290 function );
3291
3292 return( -1 );
3293 }
3294 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3295 {
3296 base64_triplet = safe_utf7_stream_base64_data & 0x00ffffff;
3297 number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
3298 current_byte = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
3299
3300 if( current_byte >= number_of_bytes )
3301 {
3302 if( safe_utf7_stream_index >= utf7_stream_size )
3303 {
3304 libcerror_error_set(
3305 error,
3306 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3307 LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3308 "%s: invalid UTF-7 stream character size value out of bounds.",
3309 function );
3310
3311 return( -1 );
3312 }
3313 utf7_character_value = utf7_stream[ safe_utf7_stream_index ];
3314
3315 /* Any character not in the modified base64 alphabet terminates the base64 encoded sequence
3316 */
3317 if( libuna_unicode_character_utf7_valid_base64_character[ utf7_character_value ] == 0 )
3318 {
3319 safe_utf7_stream_base64_data = 0;
3320 }
3321 }
3322 }
3323 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3324 {
3325 if( safe_utf7_stream_index >= utf7_stream_size )
3326 {
3327 libcerror_error_set(
3328 error,
3329 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3330 LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3331 "%s: invalid UTF-7 stream character size value out of bounds.",
3332 function );
3333
3334 return( -1 );
3335 }
3336 utf7_character_value = utf7_stream[ safe_utf7_stream_index ];
3337
3338 /* Determine if the character is modified base64 encoded
3339 * or a + character
3340 */
3341 if( utf7_character_value == (uint8_t) '+' )
3342 {
3343 if( ( safe_utf7_stream_index + 1 ) >= utf7_stream_size )
3344 {
3345 libcerror_error_set(
3346 error,
3347 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3348 LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3349 "%s: invalid UTF-7 stream character size value out of bounds.",
3350 function );
3351
3352 return( -1 );
3353 }
3354 if( utf7_stream[ safe_utf7_stream_index + 1 ] != (uint8_t) '-' )
3355 {
3356 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3357
3358 safe_utf7_stream_index++;
3359 }
3360 }
3361 /* Allow for the end of string character
3362 */
3363 else if( utf7_character_value == 0 )
3364 {
3365 }
3366 else if( libuna_unicode_character_utf7_valid_directly_encoded_character[ utf7_character_value ] == 0 )
3367 {
3368 libcerror_error_set(
3369 error,
3370 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3371 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3372 "%s: invalid directly encoded UTF-7 character byte: 0x%02" PRIx8 ".",
3373 function,
3374 utf7_character_value );
3375
3376 return( -1 );
3377 }
3378 }
3379 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3380 {
3381 safe_unicode_character = utf7_stream[ safe_utf7_stream_index++ ];
3382
3383 if( ( safe_unicode_character == (libuna_unicode_character_t) '+' )
3384 && ( utf7_stream[ safe_utf7_stream_index ] == (uint8_t) '-' ) )
3385 {
3386 safe_utf7_stream_index++;
3387 }
3388 }
3389 else if( ( number_of_bytes == 0 )
3390 || ( current_byte >= number_of_bytes ) )
3391 {
3392 if( libuna_base64_triplet_copy_from_base64_stream(
3393 &base64_triplet,
3394 utf7_stream,
3395 utf7_stream_size - 1,
3396 &safe_utf7_stream_index,
3397 &padding_size,
3398 LIBUNA_BASE64_VARIANT_UTF7,
3399 error ) != 1 )
3400 {
3401 libcerror_error_set(
3402 error,
3403 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3404 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3405 "%s: unable to copy base64 encoded UTF-7 characters.",
3406 function );
3407
3408 return( -1 );
3409 }
3410 if( padding_size > 2 )
3411 {
3412 libcerror_error_set(
3413 error,
3414 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3415 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3416 "%s: unsupported padding in base64 encoded UTF-7 characters.",
3417 function );
3418
3419 return( -1 );
3420 }
3421 number_of_bytes = 3 - padding_size;
3422 current_byte = 0;
3423 }
3424 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3425 {
3426 byte_bit_shift = 16 - ( current_byte * 8 );
3427 safe_unicode_character = ( ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL ) << 8;
3428 current_byte += 1;
3429
3430 if( current_byte >= number_of_bytes )
3431 {
3432 if( libuna_base64_triplet_copy_from_base64_stream(
3433 &base64_triplet,
3434 utf7_stream,
3435 utf7_stream_size - 1,
3436 &safe_utf7_stream_index,
3437 &padding_size,
3438 LIBUNA_BASE64_VARIANT_UTF7,
3439 error ) != 1 )
3440 {
3441 libcerror_error_set(
3442 error,
3443 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3444 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3445 "%s: unable to copy base64 encoded UTF-7 characters.",
3446 function );
3447
3448 return( -1 );
3449 }
3450 if( padding_size > 2 )
3451 {
3452 libcerror_error_set(
3453 error,
3454 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3455 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3456 "%s: unsupported padding in base64 encoded UTF-7 characters.",
3457 function );
3458
3459 return( -1 );
3460 }
3461 number_of_bytes = 3 - padding_size;
3462 current_byte = 0;
3463 }
3464 byte_bit_shift = 16 - ( current_byte * 8 );
3465 safe_unicode_character += ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL;
3466 current_byte += 1;
3467
3468 if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
3469 && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
3470 {
3471 if( current_byte >= number_of_bytes )
3472 {
3473 if( libuna_base64_triplet_copy_from_base64_stream(
3474 &base64_triplet,
3475 utf7_stream,
3476 utf7_stream_size - 1,
3477 &safe_utf7_stream_index,
3478 &padding_size,
3479 LIBUNA_BASE64_VARIANT_UTF7,
3480 error ) != 1 )
3481 {
3482 libcerror_error_set(
3483 error,
3484 LIBCERROR_ERROR_DOMAIN_CONVERSION,
3485 LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3486 "%s: unable to copy base64 encoded UTF-7 characters.",
3487 function );
3488
3489 return( -1 );
3490 }
3491 if( padding_size > 2 )
3492 {
3493 libcerror_error_set(
3494 error,
3495 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3496 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3497 "%s: unsupported padding in base64 encoded UTF-7 characters.",
3498 function );
3499
3500 return( -1 );
3501 }
3502 number_of_bytes = 3 - padding_size;
3503 current_byte = 0;
3504 }
3505 byte_bit_shift = 16 - ( current_byte * 8 );
3506 utf16_surrogate = ( ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL ) << 8;
3507 current_byte += 1;
3508
3509 if( current_byte >= number_of_bytes )
3510 {
3511 if( libuna_base64_triplet_copy_from_base64_stream(
3512 &base64_triplet,
3513 utf7_stream,
3514 utf7_stream_size - 1,
3515 &safe_utf7_stream_index,
3516 &padding_size,
3517 LIBUNA_BASE64_VARIANT_UTF7,
3518 error ) != 1 )
3519 {
3520 libcerror_error_set(
3521 error,
3522 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3523 LIBCERROR_RUNTIME_ERROR_GET_FAILED,
3524 "%s: unable to retrieve base64 encoded UTF-7 characters.",
3525 function );
3526
3527 return( -1 );
3528 }
3529 if( padding_size > 2 )
3530 {
3531 libcerror_error_set(
3532 error,
3533 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3534 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3535 "%s: unsupported padding in base64 encoded UTF-7 characters.",
3536 function );
3537
3538 return( -1 );
3539 }
3540 number_of_bytes = 3 - padding_size;
3541 current_byte = 0;
3542 }
3543 byte_bit_shift = 16 - ( current_byte * 8 );
3544 utf16_surrogate += ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL;
3545 current_byte += 1;
3546
3547 /* Determine if the UTF-16 character is within the low surrogate range
3548 */
3549 if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
3550 && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
3551 {
3552 safe_unicode_character -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
3553 safe_unicode_character <<= 10;
3554 safe_unicode_character += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
3555 safe_unicode_character += 0x010000;
3556 }
3557 else
3558 {
3559 safe_unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
3560 }
3561 }
3562 if( safe_utf7_stream_index >= utf7_stream_size )
3563 {
3564 libcerror_error_set(
3565 error,
3566 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3567 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3568 "%s: UTF-7 stream too small.",
3569 function );
3570
3571 return( -1 );
3572 }
3573 if( ( current_byte >= number_of_bytes )
3574 && ( utf7_stream[ safe_utf7_stream_index ] == (uint8_t) '-' ) )
3575 {
3576 safe_utf7_stream_base64_data = 0;
3577
3578 safe_utf7_stream_index++;
3579 }
3580 }
3581 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3582 {
3583 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3584 safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3585 safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3586 safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3587 }
3588 *unicode_character = safe_unicode_character;
3589 *utf7_stream_index = safe_utf7_stream_index;
3590 *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3591
3592 return( 1 );
3593 }
3594
3595 /* Copies an Unicode character into a UTF-7 stream
3596 * The bits of the base64 data contain:
3597 * 0 - 23 the base64 triplet
3598 * 24 - 25 the number of bytes in the triplet
3599 * 26 - 27 unused
3600 * 28 - 29 the current byte
3601 * 30 unused
3602 * 31 flag to indicate the current UTF-7 characters are (modified) base64 encoded
3603 *
3604 * Returns 1 if successful or -1 on error
3605 */
libuna_unicode_character_copy_to_utf7_stream(libuna_unicode_character_t unicode_character,uint8_t * utf7_stream,size_t utf7_stream_size,size_t * utf7_stream_index,uint32_t * utf7_stream_base64_data,libcerror_error_t ** error)3606 int libuna_unicode_character_copy_to_utf7_stream(
3607 libuna_unicode_character_t unicode_character,
3608 uint8_t *utf7_stream,
3609 size_t utf7_stream_size,
3610 size_t *utf7_stream_index,
3611 uint32_t *utf7_stream_base64_data,
3612 libcerror_error_t **error )
3613 {
3614 static char *function = "libuna_unicode_character_copy_to_utf7_stream";
3615 libuna_utf16_character_t utf16_surrogate = 0;
3616 size_t safe_utf7_stream_index = 0;
3617 uint32_t base64_triplet = 0;
3618 uint32_t safe_utf7_stream_base64_data = 0;
3619 uint8_t base64_encode_character = 0;
3620 uint8_t byte_bit_shift = 0;
3621 uint8_t current_byte = 0;
3622 uint8_t number_of_bytes = 0;
3623
3624 if( utf7_stream == NULL )
3625 {
3626 libcerror_error_set(
3627 error,
3628 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3629 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3630 "%s: invalid UTF-7 stream.",
3631 function );
3632
3633 return( -1 );
3634 }
3635 if( utf7_stream_size > (size_t) SSIZE_MAX )
3636 {
3637 libcerror_error_set(
3638 error,
3639 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3640 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3641 "%s: invalid UTF-7 stream size value exceeds maximum.",
3642 function );
3643
3644 return( -1 );
3645 }
3646 if( utf7_stream_index == NULL )
3647 {
3648 libcerror_error_set(
3649 error,
3650 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3651 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3652 "%s: invalid UTF-7 stream index.",
3653 function );
3654
3655 return( -1 );
3656 }
3657 if( utf7_stream_base64_data == NULL )
3658 {
3659 libcerror_error_set(
3660 error,
3661 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3662 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3663 "%s: invalid UTF-7 stream base64 data.",
3664 function );
3665
3666 return( -1 );
3667 }
3668 safe_utf7_stream_index = *utf7_stream_index;
3669 safe_utf7_stream_base64_data = *utf7_stream_base64_data;
3670
3671 /* Determine if the Unicode character is valid
3672 */
3673 if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
3674 {
3675 unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
3676 }
3677 /* A-Z is not a continous range on an EBCDIC based system
3678 * it consists of the ranges: A-I, J-R, S-Z
3679 */
3680 if( ( unicode_character >= 0x41 )
3681 && ( unicode_character <= 0x49 ) )
3682 {
3683 unicode_character = ( unicode_character - 0x41 ) + (libuna_unicode_character_t) 'A';
3684 }
3685 else if( ( unicode_character >= 0x4a )
3686 && ( unicode_character <= 0x52 ) )
3687 {
3688 unicode_character = ( unicode_character - 0x4a ) + (libuna_unicode_character_t) 'J';
3689 }
3690 else if( ( unicode_character >= 0x53 )
3691 && ( unicode_character <= 0x5a ) )
3692 {
3693 unicode_character = ( unicode_character - 0x53 ) + (libuna_unicode_character_t) 'S';
3694 }
3695 /* a-z is not a continous range on an EBCDIC based system
3696 * it consists of the ranges: a-i, j-r, s-z
3697 */
3698 else if( ( unicode_character >= 0x61 )
3699 && ( unicode_character <= 0x69 ) )
3700 {
3701 unicode_character = ( unicode_character - 0x61 ) + (libuna_unicode_character_t) 'a';
3702 }
3703 else if( ( unicode_character >= 0x6a )
3704 && ( unicode_character <= 0x72 ) )
3705 {
3706 unicode_character = ( unicode_character - 0x6a ) + (libuna_unicode_character_t) 'j';
3707 }
3708 else if( ( unicode_character >= 0x73 )
3709 && ( unicode_character <= 0x7a ) )
3710 {
3711 unicode_character = ( unicode_character - 0x73 ) + (libuna_unicode_character_t) 's';
3712 }
3713 /* 0-9
3714 */
3715 else if( ( unicode_character >= 0x30 )
3716 && ( unicode_character <= 0x39 ) )
3717 {
3718 unicode_character = ( unicode_character - 0x30 ) + (libuna_unicode_character_t) '0';
3719 }
3720 /* The + character must be escaped
3721 */
3722 else if( unicode_character == (libuna_unicode_character_t) '+' )
3723 {
3724 }
3725 /* Allow for the end of string character
3726 */
3727 else if( unicode_character == 0 )
3728 {
3729 }
3730 else if( ( unicode_character >= 256 )
3731 || ( libuna_unicode_character_utf7_valid_directly_encoded_character[ (uint8_t) unicode_character ] == 0 ) )
3732 {
3733 base64_encode_character = 1;
3734 }
3735 if( base64_encode_character == 0 )
3736 {
3737 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3738 {
3739 safe_utf7_stream_base64_data = 0;
3740 }
3741 if( safe_utf7_stream_index >= utf7_stream_size )
3742 {
3743 libcerror_error_set(
3744 error,
3745 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3746 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3747 "%s: UTF-7 stream too small.",
3748 function );
3749
3750 return( -1 );
3751 }
3752 utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) unicode_character;
3753
3754 /* The + character must be escaped
3755 */
3756 if( unicode_character == (libuna_unicode_character_t) '+' )
3757 {
3758 if( safe_utf7_stream_index >= utf7_stream_size )
3759 {
3760 libcerror_error_set(
3761 error,
3762 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3763 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3764 "%s: UTF-7 stream too small.",
3765 function );
3766
3767 return( -1 );
3768 }
3769 utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '-';
3770 }
3771 }
3772 else
3773 {
3774 /* Escape the base64 encoded chracters with a +
3775 */
3776 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3777 {
3778 if( safe_utf7_stream_index >= utf7_stream_size )
3779 {
3780 libcerror_error_set(
3781 error,
3782 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3783 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3784 "%s: UTF-7 stream too small.",
3785 function );
3786
3787 return( -1 );
3788 }
3789 utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '+';
3790 }
3791 /* Otherwise continue the previously base64 encoded characters
3792 */
3793 else
3794 {
3795 base64_triplet = safe_utf7_stream_base64_data & 0x00ffffff;
3796 number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
3797 current_byte = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
3798
3799 if( number_of_bytes > 0 )
3800 {
3801 /* Correct the index for the last partial base64 stream
3802 */
3803 safe_utf7_stream_index -= number_of_bytes + 1;
3804 }
3805 /* Correct the index for the base64 stream termination character
3806 */
3807 safe_utf7_stream_index -= 1;
3808 }
3809 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3810
3811 if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
3812 {
3813 unicode_character -= 0x010000;
3814
3815 utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 )
3816 + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
3817
3818 byte_bit_shift = 16 - ( current_byte * 8 );
3819 base64_triplet += (uint32_t) ( ( utf16_surrogate >> 8 ) & 0xff ) << byte_bit_shift;
3820 current_byte += 1;
3821 number_of_bytes += 1;
3822
3823 if( number_of_bytes == 3 )
3824 {
3825 if( libuna_base64_triplet_copy_to_base64_stream(
3826 base64_triplet,
3827 utf7_stream,
3828 utf7_stream_size,
3829 &safe_utf7_stream_index,
3830 0,
3831 LIBUNA_BASE64_VARIANT_UTF7,
3832 error ) != 1 )
3833 {
3834 libcerror_error_set(
3835 error,
3836 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3837 LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3838 "%s: unable to set base64 encoded UTF-7 characters.",
3839 function );
3840
3841 return( -1 );
3842 }
3843 number_of_bytes = 0;
3844 current_byte = 0;
3845 base64_triplet = 0;
3846 }
3847 byte_bit_shift = 16 - ( current_byte * 8 );
3848 base64_triplet += (uint32_t) ( utf16_surrogate & 0xff ) << byte_bit_shift;
3849 current_byte += 1;
3850 number_of_bytes += 1;
3851
3852 if( number_of_bytes == 3 )
3853 {
3854 if( libuna_base64_triplet_copy_to_base64_stream(
3855 base64_triplet,
3856 utf7_stream,
3857 utf7_stream_size,
3858 &safe_utf7_stream_index,
3859 0,
3860 LIBUNA_BASE64_VARIANT_UTF7,
3861 error ) != 1 )
3862 {
3863 libcerror_error_set(
3864 error,
3865 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3866 LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3867 "%s: unable to set base64 encoded UTF-7 characters.",
3868 function );
3869
3870 return( -1 );
3871 }
3872 number_of_bytes = 0;
3873 current_byte = 0;
3874 base64_triplet = 0;
3875 }
3876 unicode_character = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff )
3877 + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
3878 }
3879 byte_bit_shift = 16 - ( current_byte * 8 );
3880 base64_triplet += (uint32_t) ( ( unicode_character >> 8 ) & 0xff ) << byte_bit_shift;
3881 current_byte += 1;
3882 number_of_bytes += 1;
3883
3884 if( number_of_bytes == 3 )
3885 {
3886 if( libuna_base64_triplet_copy_to_base64_stream(
3887 base64_triplet,
3888 utf7_stream,
3889 utf7_stream_size,
3890 &safe_utf7_stream_index,
3891 0,
3892 LIBUNA_BASE64_VARIANT_UTF7,
3893 error ) != 1 )
3894 {
3895 libcerror_error_set(
3896 error,
3897 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3898 LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3899 "%s: unable to set base64 encoded UTF-7 characters.",
3900 function );
3901
3902 return( -1 );
3903 }
3904 number_of_bytes = 0;
3905 current_byte = 0;
3906 base64_triplet = 0;
3907 }
3908 byte_bit_shift = 16 - ( current_byte * 8 );
3909 base64_triplet += (uint32_t) ( unicode_character & 0xff ) << byte_bit_shift;
3910 current_byte += 1;
3911 number_of_bytes += 1;
3912
3913 if( number_of_bytes == 3 )
3914 {
3915 if( libuna_base64_triplet_copy_to_base64_stream(
3916 base64_triplet,
3917 utf7_stream,
3918 utf7_stream_size,
3919 &safe_utf7_stream_index,
3920 0,
3921 LIBUNA_BASE64_VARIANT_UTF7,
3922 error ) != 1 )
3923 {
3924 libcerror_error_set(
3925 error,
3926 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3927 LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3928 "%s: unable to set base64 encoded UTF-7 characters.",
3929 function );
3930
3931 return( -1 );
3932 }
3933 number_of_bytes = 0;
3934 current_byte = 0;
3935 base64_triplet = 0;
3936 }
3937 /* Terminate the base64 encoded characters
3938 */
3939 if( number_of_bytes > 0 )
3940 {
3941 if( libuna_base64_triplet_copy_to_base64_stream(
3942 base64_triplet,
3943 utf7_stream,
3944 utf7_stream_size,
3945 &safe_utf7_stream_index,
3946 3 - number_of_bytes,
3947 LIBUNA_BASE64_VARIANT_UTF7,
3948 error ) != 1 )
3949 {
3950 libcerror_error_set(
3951 error,
3952 LIBCERROR_ERROR_DOMAIN_RUNTIME,
3953 LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3954 "%s: unable to set base64 encoded UTF-7 characters.",
3955 function );
3956
3957 return( -1 );
3958 }
3959 }
3960 if( safe_utf7_stream_index >= utf7_stream_size )
3961 {
3962 libcerror_error_set(
3963 error,
3964 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3965 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3966 "%s: UTF-7 stream too small.",
3967 function );
3968
3969 return( -1 );
3970 }
3971 utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '-';
3972 }
3973 if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3974 {
3975 safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3976 safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3977 safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3978 safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3979 }
3980 *utf7_stream_index = safe_utf7_stream_index;
3981 *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3982
3983 return( 1 );
3984 }
3985
3986 /* Determines the size of an UTF-8 character from an Unicode character
3987 * Adds the size to the UTF-8 character size value
3988 * Returns 1 if successful or -1 on error
3989 */
libuna_unicode_character_size_to_utf8(libuna_unicode_character_t unicode_character,size_t * utf8_character_size,libcerror_error_t ** error)3990 int libuna_unicode_character_size_to_utf8(
3991 libuna_unicode_character_t unicode_character,
3992 size_t *utf8_character_size,
3993 libcerror_error_t **error )
3994 {
3995 static char *function = "libuna_unicode_character_size_to_utf8";
3996 size_t safe_utf8_character_size = 0;
3997
3998 if( utf8_character_size == NULL )
3999 {
4000 libcerror_error_set(
4001 error,
4002 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4003 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4004 "%s: invalid UTF-8 character size.",
4005 function );
4006
4007 return( -1 );
4008 }
4009 if( unicode_character < 0x00000080UL )
4010 {
4011 safe_utf8_character_size += 1;
4012 }
4013 else if( unicode_character < 0x00000800UL )
4014 {
4015 safe_utf8_character_size += 2;
4016 }
4017 else if( unicode_character < 0x00010000UL )
4018 {
4019 safe_utf8_character_size += 3;
4020 }
4021 else if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
4022 {
4023 safe_utf8_character_size += 3;
4024 }
4025 else
4026 {
4027 safe_utf8_character_size += 4;
4028 }
4029
4030 /* If UTF-8 USC support is needed it should be implemented in
4031 * utf8_usc or something, but for now leave this here as a reminder
4032
4033 else if( unicode_character < 0x010000 )
4034 {
4035 safe_utf8_character_size += 3;
4036 }
4037 else if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
4038 {
4039 safe_utf8_character_size += 2;
4040 }
4041 else if( unicode_character < 0x0200000 )
4042 {
4043 safe_utf8_character_size += 4;
4044 }
4045 else if( unicode_character < 0x0400000 )
4046 {
4047 safe_utf8_character_size += 5;
4048 }
4049 else
4050 {
4051 safe_utf8_character_size += 6;
4052 }
4053 */
4054 *utf8_character_size += safe_utf8_character_size;
4055
4056 return( 1 );
4057 }
4058
4059 /* Copies an Unicode character from an UTF-8 string
4060 * Returns 1 if successful or -1 on error
4061 */
libuna_unicode_character_copy_from_utf8(libuna_unicode_character_t * unicode_character,const libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,libcerror_error_t ** error)4062 int libuna_unicode_character_copy_from_utf8(
4063 libuna_unicode_character_t *unicode_character,
4064 const libuna_utf8_character_t *utf8_string,
4065 size_t utf8_string_size,
4066 size_t *utf8_string_index,
4067 libcerror_error_t **error )
4068 {
4069 static char *function = "libuna_unicode_character_copy_from_utf8";
4070 libuna_unicode_character_t safe_unicode_character = 0;
4071 size_t safe_utf8_string_index = 0;
4072 uint8_t byte_value1 = 0;
4073 uint8_t byte_value2 = 0;
4074 uint8_t byte_value3 = 0;
4075 uint8_t byte_value4 = 0;
4076 uint8_t byte_value5 = 0;
4077 uint8_t utf8_character_additional_bytes = 0;
4078
4079 if( unicode_character == NULL )
4080 {
4081 libcerror_error_set(
4082 error,
4083 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4084 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4085 "%s: invalid Unicode character.",
4086 function );
4087
4088 return( -1 );
4089 }
4090 if( utf8_string == NULL )
4091 {
4092 libcerror_error_set(
4093 error,
4094 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4095 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4096 "%s: invalid UTF-8 string.",
4097 function );
4098
4099 return( -1 );
4100 }
4101 if( utf8_string_size > (size_t) SSIZE_MAX )
4102 {
4103 libcerror_error_set(
4104 error,
4105 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4106 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4107 "%s: invalid UTF-8 string size value exceeds maximum.",
4108 function );
4109
4110 return( -1 );
4111 }
4112 if( utf8_string_index == NULL )
4113 {
4114 libcerror_error_set(
4115 error,
4116 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4117 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4118 "%s: invalid UTF-8 string index.",
4119 function );
4120
4121 return( -1 );
4122 }
4123 safe_utf8_string_index = *utf8_string_index;
4124
4125 if( safe_utf8_string_index >= utf8_string_size )
4126 {
4127 libcerror_error_set(
4128 error,
4129 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4130 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4131 "%s: UTF-8 string too small.",
4132 function );
4133
4134 return( -1 );
4135 }
4136 /* Determine the number of additional bytes of the UTF-8 character
4137 */
4138 byte_value1 = utf8_string[ safe_utf8_string_index ];
4139
4140 if( byte_value1 < 0xc0 )
4141 {
4142 utf8_character_additional_bytes = 0;
4143 }
4144 else if( byte_value1 < 0xe0 )
4145 {
4146 utf8_character_additional_bytes = 1;
4147 }
4148 else if( byte_value1 < 0xf0 )
4149 {
4150 utf8_character_additional_bytes = 2;
4151 }
4152 else if( byte_value1 < 0xf8 )
4153 {
4154 utf8_character_additional_bytes = 3;
4155 }
4156 else if( byte_value1 < 0xfc )
4157 {
4158 utf8_character_additional_bytes = 4;
4159 }
4160 else
4161 {
4162 utf8_character_additional_bytes = 5;
4163 }
4164 if( ( ( (size_t) utf8_character_additional_bytes + 1 ) > utf8_string_size )
4165 || ( safe_utf8_string_index > ( utf8_string_size - ( utf8_character_additional_bytes + 1 ) ) ) )
4166 {
4167 libcerror_error_set(
4168 error,
4169 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4170 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4171 "%s: missing UTF-8 character bytes.",
4172 function );
4173
4174 return( -1 );
4175 }
4176 /* Determine the UTF-8 character and make sure it is valid
4177 * Unicode limits the UTF-8 character to consist of a maximum of 4 bytes
4178 * while ISO 10646 Universal Character Set (UCS) allows up to 6 bytes
4179 */
4180 if( byte_value1 > 0xf4 )
4181 {
4182 libcerror_error_set(
4183 error,
4184 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4185 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4186 "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
4187 function,
4188 byte_value1 );
4189
4190 return( -1 );
4191 }
4192 safe_unicode_character = byte_value1;
4193
4194 if( utf8_character_additional_bytes == 0 )
4195 {
4196 if( ( byte_value1 >= 0x80 )
4197 && ( byte_value1 < 0xc2 ) )
4198 {
4199 libcerror_error_set(
4200 error,
4201 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4202 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4203 "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
4204 function,
4205 byte_value1 );
4206
4207 return( -1 );
4208 }
4209 }
4210 if( utf8_character_additional_bytes >= 1 )
4211 {
4212 byte_value2 = utf8_string[ safe_utf8_string_index + 1 ];
4213
4214 if( byte_value2 > 0xbf )
4215 {
4216 libcerror_error_set(
4217 error,
4218 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4219 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4220 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4221 function,
4222 byte_value2 );
4223
4224 return( -1 );
4225 }
4226 if( ( byte_value1 == 0xe0 )
4227 && ( byte_value2 < 0xa0 ) )
4228 {
4229 libcerror_error_set(
4230 error,
4231 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4232 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4233 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4234 function,
4235 byte_value2 );
4236
4237 return( -1 );
4238 }
4239 else if( ( byte_value1 == 0xed )
4240 && ( byte_value2 > 0x9f ) )
4241 {
4242 libcerror_error_set(
4243 error,
4244 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4245 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4246 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4247 function,
4248 byte_value2 );
4249
4250 return( -1 );
4251 }
4252 else if( ( byte_value1 == 0xf0 )
4253 && ( byte_value2 < 0x90 ) )
4254 {
4255 libcerror_error_set(
4256 error,
4257 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4258 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4259 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4260 function,
4261 byte_value2 );
4262
4263 return( -1 );
4264 }
4265 else if( ( byte_value1 == 0xf4 )
4266 && ( byte_value2 > 0x8f ) )
4267 {
4268 libcerror_error_set(
4269 error,
4270 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4271 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4272 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4273 function,
4274 byte_value2 );
4275
4276 return( -1 );
4277 }
4278 else if( byte_value2 < 0x80 )
4279 {
4280 libcerror_error_set(
4281 error,
4282 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4283 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4284 "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4285 function,
4286 byte_value2 );
4287
4288 return( -1 );
4289 }
4290 safe_unicode_character <<= 6;
4291 safe_unicode_character += byte_value2;
4292
4293 if( utf8_character_additional_bytes == 1 )
4294 {
4295 safe_unicode_character -= 0x03080;
4296 }
4297 }
4298 if( utf8_character_additional_bytes >= 2 )
4299 {
4300 byte_value3 = utf8_string[ safe_utf8_string_index + 2 ];
4301
4302 if( ( byte_value3 < 0x80 )
4303 || ( byte_value3 > 0xbf ) )
4304 {
4305 libcerror_error_set(
4306 error,
4307 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4308 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4309 "%s: invalid 3rd UTF-8 character byte: 0x%02" PRIx8 ".",
4310 function,
4311 byte_value3 );
4312
4313 return( -1 );
4314 }
4315 safe_unicode_character <<= 6;
4316 safe_unicode_character += byte_value3;
4317
4318 if( utf8_character_additional_bytes == 2 )
4319 {
4320 safe_unicode_character -= 0x0e2080;
4321 }
4322 }
4323 if( utf8_character_additional_bytes >= 3 )
4324 {
4325 byte_value4 = utf8_string[ safe_utf8_string_index + 3 ];
4326
4327 if( ( byte_value4 < 0x80 )
4328 || ( byte_value4 > 0xbf ) )
4329 {
4330 libcerror_error_set(
4331 error,
4332 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4333 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4334 "%s: invalid 4th UTF-8 character byte: 0x%02" PRIx8 ".",
4335 function,
4336 byte_value4 );
4337
4338 return( -1 );
4339 }
4340 safe_unicode_character <<= 6;
4341 safe_unicode_character += byte_value4;
4342
4343 if( utf8_character_additional_bytes == 3 )
4344 {
4345 safe_unicode_character -= 0x03c82080;
4346 }
4347 }
4348 if( utf8_character_additional_bytes >= 4 )
4349 {
4350 byte_value5 = utf8_string[ safe_utf8_string_index + 4 ];
4351
4352 if( ( byte_value5 < 0x80 )
4353 || ( byte_value5 > 0xbf ) )
4354 {
4355 libcerror_error_set(
4356 error,
4357 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4358 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4359 "%s: invalid 5th UTF-8 character byte: 0x%02" PRIx8 ".",
4360 function,
4361 byte_value5 );
4362
4363 return( -1 );
4364 }
4365 safe_unicode_character <<= 6;
4366 safe_unicode_character += byte_value5;
4367
4368 if( utf8_character_additional_bytes == 4 )
4369 {
4370 safe_unicode_character -= 0x0fa082080;
4371 }
4372 }
4373 if( utf8_character_additional_bytes == 5 )
4374 {
4375 if( ( utf8_string[ safe_utf8_string_index + 5 ] < 0x80 )
4376 || ( utf8_string[ safe_utf8_string_index + 5 ] > 0xbf ) )
4377 {
4378 libcerror_error_set(
4379 error,
4380 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4381 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4382 "%s: invalid 6th UTF-8 character byte: 0x%02" PRIx8 ".",
4383 function,
4384 utf8_string[ safe_utf8_string_index + 5 ] );
4385
4386 return( -1 );
4387 }
4388 safe_unicode_character <<= 6;
4389 safe_unicode_character += utf8_string[ safe_utf8_string_index + 5 ];
4390 safe_unicode_character -= 0x082082080;
4391 }
4392 /* Determine if the Unicode character is valid
4393 */
4394 if( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
4395 {
4396 safe_unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4397 }
4398 *unicode_character = safe_unicode_character;
4399 *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4400
4401 return( 1 );
4402 }
4403
4404 /* Copies an Unicode character into a UTF-8 string
4405 * Returns 1 if successful or -1 on error
4406 */
libuna_unicode_character_copy_to_utf8(libuna_unicode_character_t unicode_character,libuna_utf8_character_t * utf8_string,size_t utf8_string_size,size_t * utf8_string_index,libcerror_error_t ** error)4407 int libuna_unicode_character_copy_to_utf8(
4408 libuna_unicode_character_t unicode_character,
4409 libuna_utf8_character_t *utf8_string,
4410 size_t utf8_string_size,
4411 size_t *utf8_string_index,
4412 libcerror_error_t **error )
4413 {
4414 static char *function = "libuna_unicode_character_copy_to_utf8";
4415 size_t safe_utf8_string_index = 0;
4416 size_t utf8_character_iterator = 0;
4417 uint8_t utf8_character_additional_bytes = 0;
4418 uint8_t utf8_first_character_mark = 0;
4419
4420 if( utf8_string == NULL )
4421 {
4422 libcerror_error_set(
4423 error,
4424 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4425 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4426 "%s: invalid UTF-8 string.",
4427 function );
4428
4429 return( -1 );
4430 }
4431 if( utf8_string_size > (size_t) SSIZE_MAX )
4432 {
4433 libcerror_error_set(
4434 error,
4435 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4436 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4437 "%s: invalid UTF-8 string size value exceeds maximum.",
4438 function );
4439
4440 return( -1 );
4441 }
4442 if( utf8_string_index == NULL )
4443 {
4444 libcerror_error_set(
4445 error,
4446 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4447 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4448 "%s: invalid UTF-8 string index.",
4449 function );
4450
4451 return( -1 );
4452 }
4453 safe_utf8_string_index = *utf8_string_index;
4454
4455 if( safe_utf8_string_index >= utf8_string_size )
4456 {
4457 libcerror_error_set(
4458 error,
4459 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4460 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4461 "%s: UTF-8 string too small.",
4462 function );
4463
4464 return( -1 );
4465 }
4466 /* Determine if the Unicode character is valid
4467 */
4468 if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
4469 {
4470 unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4471 }
4472 /* Determine how many UTF-8 character bytes are required
4473 */
4474 if( unicode_character < 0x080 )
4475 {
4476 utf8_character_additional_bytes = 0;
4477 utf8_first_character_mark = 0;
4478 }
4479 else if( unicode_character < 0x0800 )
4480 {
4481 utf8_character_additional_bytes = 1;
4482 utf8_first_character_mark = 0x0c0;
4483 }
4484 else if( unicode_character < 0x010000 )
4485 {
4486 utf8_character_additional_bytes = 2;
4487 utf8_first_character_mark = 0x0e0;
4488 }
4489 else if( unicode_character < 0x0200000 )
4490 {
4491 utf8_character_additional_bytes = 3;
4492 utf8_first_character_mark = 0x0f0;
4493 }
4494 else if( unicode_character < 0x0400000 )
4495 {
4496 utf8_character_additional_bytes = 4;
4497 utf8_first_character_mark = 0x0f8;
4498 }
4499 else
4500 {
4501 utf8_character_additional_bytes = 5;
4502 utf8_first_character_mark = 0x0fc;
4503 }
4504 /* Convert Unicode character into UTF-8 character bytes
4505 */
4506 if( ( utf8_character_additional_bytes > utf8_string_size )
4507 || ( safe_utf8_string_index >= ( utf8_string_size - utf8_character_additional_bytes ) ) )
4508 {
4509 libcerror_error_set(
4510 error,
4511 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4512 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4513 "%s: UTF-8 string too small.",
4514 function );
4515
4516 return( -1 );
4517 }
4518 for( utf8_character_iterator = safe_utf8_string_index + utf8_character_additional_bytes;
4519 utf8_character_iterator > safe_utf8_string_index;
4520 utf8_character_iterator-- )
4521 {
4522 utf8_string[ utf8_character_iterator ] = (libuna_utf8_character_t) ( ( unicode_character & 0x0bf ) | 0x080 );
4523
4524 unicode_character >>= 6;
4525 }
4526 utf8_string[ safe_utf8_string_index ] = (libuna_utf8_character_t) ( unicode_character | utf8_first_character_mark );
4527
4528 *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4529
4530 return( 1 );
4531 }
4532
4533 /* Determines the size of an UTF-16 character from an Unicode character
4534 * Adds the size to the UTF-16 character size value
4535 * Returns 1 if successful or -1 on error
4536 */
libuna_unicode_character_size_to_utf16(libuna_unicode_character_t unicode_character,size_t * utf16_character_size,libcerror_error_t ** error)4537 int libuna_unicode_character_size_to_utf16(
4538 libuna_unicode_character_t unicode_character,
4539 size_t *utf16_character_size,
4540 libcerror_error_t **error )
4541 {
4542 static char *function = "libuna_unicode_character_size_to_utf16";
4543
4544 if( utf16_character_size == NULL )
4545 {
4546 libcerror_error_set(
4547 error,
4548 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4549 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4550 "%s: invalid UTF-16 character size.",
4551 function );
4552
4553 return( -1 );
4554 }
4555 if( ( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
4556 && ( unicode_character <= LIBUNA_UTF16_CHARACTER_MAX ) )
4557 {
4558 *utf16_character_size += 2;
4559 }
4560 else
4561 {
4562 *utf16_character_size += 1;
4563 }
4564 return( 1 );
4565 }
4566
4567 /* Copies an Unicode character from an UTF-16 string
4568 * Returns 1 if successful or -1 on error
4569 */
libuna_unicode_character_copy_from_utf16(libuna_unicode_character_t * unicode_character,const libuna_utf16_character_t * utf16_string,size_t utf16_string_size,size_t * utf16_string_index,libcerror_error_t ** error)4570 int libuna_unicode_character_copy_from_utf16(
4571 libuna_unicode_character_t *unicode_character,
4572 const libuna_utf16_character_t *utf16_string,
4573 size_t utf16_string_size,
4574 size_t *utf16_string_index,
4575 libcerror_error_t **error )
4576 {
4577 static char *function = "libuna_unicode_character_copy_from_utf16";
4578 libuna_unicode_character_t safe_unicode_character = 0;
4579 libuna_utf16_character_t utf16_surrogate = 0;
4580 size_t safe_utf16_string_index = 0;
4581
4582 if( unicode_character == NULL )
4583 {
4584 libcerror_error_set(
4585 error,
4586 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4587 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4588 "%s: invalid Unicode character.",
4589 function );
4590
4591 return( -1 );
4592 }
4593 if( utf16_string == NULL )
4594 {
4595 libcerror_error_set(
4596 error,
4597 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4598 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4599 "%s: invalid UTF-16 string.",
4600 function );
4601
4602 return( -1 );
4603 }
4604 if( utf16_string_size > (size_t) SSIZE_MAX )
4605 {
4606 libcerror_error_set(
4607 error,
4608 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4609 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4610 "%s: invalid UTF-16 string size value exceeds maximum.",
4611 function );
4612
4613 return( -1 );
4614 }
4615 if( utf16_string_index == NULL )
4616 {
4617 libcerror_error_set(
4618 error,
4619 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4620 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4621 "%s: invalid UTF-16 string index.",
4622 function );
4623
4624 return( -1 );
4625 }
4626 safe_utf16_string_index = *utf16_string_index;
4627
4628 if( safe_utf16_string_index >= utf16_string_size )
4629 {
4630 libcerror_error_set(
4631 error,
4632 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4633 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4634 "%s: UTF-16 string too small.",
4635 function );
4636
4637 return( -1 );
4638 }
4639 safe_unicode_character = utf16_string[ safe_utf16_string_index ];
4640 safe_utf16_string_index += 1;
4641
4642 /* Determine if the UTF-16 character is within the high surrogate range
4643 */
4644 if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4645 && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
4646 {
4647 if( safe_utf16_string_index >= utf16_string_size )
4648 {
4649 libcerror_error_set(
4650 error,
4651 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4652 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4653 "%s: missing surrogate UTF-16 character bytes.",
4654 function );
4655
4656 return( -1 );
4657 }
4658 utf16_surrogate = utf16_string[ safe_utf16_string_index ];
4659 safe_utf16_string_index += 1;
4660
4661 /* Determine if the UTF-16 character is within the low surrogate range
4662 */
4663 if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
4664 && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4665 {
4666 safe_unicode_character -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
4667 safe_unicode_character <<= 10;
4668 safe_unicode_character += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
4669 safe_unicode_character += 0x010000;
4670 }
4671 else
4672 {
4673 safe_unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4674 }
4675 }
4676 /* Determine if the Unicode character is valid
4677 */
4678 else if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
4679 && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4680 {
4681 safe_unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4682 }
4683 *unicode_character = safe_unicode_character;
4684 *utf16_string_index = safe_utf16_string_index;
4685
4686 return( 1 );
4687 }
4688
4689 /* Copies an Unicode character into a UTF-16 string
4690 * Returns 1 if successful or -1 on error
4691 */
libuna_unicode_character_copy_to_utf16(libuna_unicode_character_t unicode_character,libuna_utf16_character_t * utf16_string,size_t utf16_string_size,size_t * utf16_string_index,libcerror_error_t ** error)4692 int libuna_unicode_character_copy_to_utf16(
4693 libuna_unicode_character_t unicode_character,
4694 libuna_utf16_character_t *utf16_string,
4695 size_t utf16_string_size,
4696 size_t *utf16_string_index,
4697 libcerror_error_t **error )
4698 {
4699 static char *function = "libuna_unicode_character_copy_to_utf16";
4700 size_t safe_utf16_string_index = 0;
4701
4702 if( utf16_string == NULL )
4703 {
4704 libcerror_error_set(
4705 error,
4706 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4707 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4708 "%s: invalid UTF-16 string.",
4709 function );
4710
4711 return( -1 );
4712 }
4713 if( utf16_string_size > (size_t) SSIZE_MAX )
4714 {
4715 libcerror_error_set(
4716 error,
4717 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4718 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4719 "%s: invalid UTF-16 string size value exceeds maximum.",
4720 function );
4721
4722 return( -1 );
4723 }
4724 if( utf16_string_index == NULL )
4725 {
4726 libcerror_error_set(
4727 error,
4728 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4729 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4730 "%s: invalid UTF-16 string index.",
4731 function );
4732
4733 return( -1 );
4734 }
4735 safe_utf16_string_index = *utf16_string_index;
4736
4737 if( safe_utf16_string_index >= utf16_string_size )
4738 {
4739 libcerror_error_set(
4740 error,
4741 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4742 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4743 "%s: UTF-16 string too small.",
4744 function );
4745
4746 return( -1 );
4747 }
4748 /* Determine if the Unicode character is valid
4749 */
4750 if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4751 && ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4752 || ( unicode_character > LIBUNA_UTF16_CHARACTER_MAX ) )
4753 {
4754 unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
4755 }
4756 if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
4757 {
4758 utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) unicode_character;
4759 }
4760 else
4761 {
4762 if( ( utf16_string_size < 2 )
4763 || ( safe_utf16_string_index > ( utf16_string_size - 2 ) ) )
4764 {
4765 libcerror_error_set(
4766 error,
4767 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4768 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4769 "%s: UTF-16 string too small.",
4770 function );
4771
4772 return( -1 );
4773 }
4774 unicode_character -= 0x010000;
4775 utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
4776 utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
4777 }
4778 *utf16_string_index = safe_utf16_string_index;
4779
4780 return( 1 );
4781 }
4782
4783 /* Copies an Unicode character from an UTF-16 stream
4784 * Returns 1 if successful or -1 on error
4785 */
libuna_unicode_character_copy_from_utf16_stream(libuna_unicode_character_t * unicode_character,const uint8_t * utf16_stream,size_t utf16_stream_size,size_t * utf16_stream_index,int byte_order,libcerror_error_t ** error)4786 int libuna_unicode_character_copy_from_utf16_stream(
4787 libuna_unicode_character_t *unicode_character,
4788 const uint8_t *utf16_stream,
4789 size_t utf16_stream_size,
4790 size_t *utf16_stream_index,
4791 int byte_order,
4792 libcerror_error_t **error )
4793 {
4794 static char *function = "libuna_unicode_character_copy_from_utf16_stream";
4795 libuna_unicode_character_t safe_unicode_character = 0;
4796 libuna_utf16_character_t utf16_surrogate = 0;
4797 size_t safe_utf16_stream_index = 0;
4798
4799 if( unicode_character == NULL )
4800 {
4801 libcerror_error_set(
4802 error,
4803 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4804 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4805 "%s: invalid Unicode character.",
4806 function );
4807
4808 return( -1 );
4809 }
4810 if( utf16_stream == NULL )
4811 {
4812 libcerror_error_set(
4813 error,
4814 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4815 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4816 "%s: invalid UTF-16 stream.",
4817 function );
4818
4819 return( -1 );
4820 }
4821 if( utf16_stream_size > (size_t) SSIZE_MAX )
4822 {
4823 libcerror_error_set(
4824 error,
4825 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4826 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4827 "%s: invalid UTF-16 stream size value exceeds maximum.",
4828 function );
4829
4830 return( -1 );
4831 }
4832 if( utf16_stream_index == NULL )
4833 {
4834 libcerror_error_set(
4835 error,
4836 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4837 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4838 "%s: invalid UTF-16 stream index.",
4839 function );
4840
4841 return( -1 );
4842 }
4843 if( ( byte_order != LIBUNA_ENDIAN_BIG )
4844 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
4845 {
4846 libcerror_error_set(
4847 error,
4848 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4849 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4850 "%s: unsupported byte order.",
4851 function );
4852
4853 return( -1 );
4854 }
4855 safe_utf16_stream_index = *utf16_stream_index;
4856
4857 if( ( utf16_stream_size < 2 )
4858 || ( safe_utf16_stream_index > ( utf16_stream_size - 2 ) ) )
4859 {
4860 libcerror_error_set(
4861 error,
4862 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4863 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4864 "%s: UTF-16 stream too small.",
4865 function );
4866
4867 return( -1 );
4868 }
4869 if( byte_order == LIBUNA_ENDIAN_BIG )
4870 {
4871 safe_unicode_character = utf16_stream[ safe_utf16_stream_index ];
4872 safe_unicode_character <<= 8;
4873 safe_unicode_character += utf16_stream[ safe_utf16_stream_index + 1 ];
4874 }
4875 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
4876 {
4877 safe_unicode_character = utf16_stream[ safe_utf16_stream_index + 1 ];
4878 safe_unicode_character <<= 8;
4879 safe_unicode_character += utf16_stream[ safe_utf16_stream_index ];
4880 }
4881 safe_utf16_stream_index += 2;
4882
4883 /* Determine if the Unicode character is valid
4884 */
4885 if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
4886 && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4887 {
4888 libcerror_error_set(
4889 error,
4890 LIBCERROR_ERROR_DOMAIN_RUNTIME,
4891 LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4892 "%s: unsupported UTF-16 character.",
4893 function );
4894
4895 return( -1 );
4896 }
4897 /* Determine if the UTF-16 character is within the high surrogate range
4898 */
4899 if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4900 && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
4901 {
4902 if( safe_utf16_stream_index > ( utf16_stream_size - 2 ) )
4903 {
4904 libcerror_error_set(
4905 error,
4906 LIBCERROR_ERROR_DOMAIN_RUNTIME,
4907 LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4908 "%s: missing surrogate UTF-16 character bytes.",
4909 function );
4910
4911 return( -1 );
4912 }
4913 if( byte_order == LIBUNA_ENDIAN_BIG )
4914 {
4915 utf16_surrogate = utf16_stream[ safe_utf16_stream_index ];
4916 utf16_surrogate <<= 8;
4917 utf16_surrogate += utf16_stream[ safe_utf16_stream_index + 1 ];
4918 }
4919 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
4920 {
4921 utf16_surrogate = utf16_stream[ safe_utf16_stream_index + 1 ];
4922 utf16_surrogate <<= 8;
4923 utf16_surrogate += utf16_stream[ safe_utf16_stream_index ];
4924 }
4925 safe_utf16_stream_index += 2;
4926
4927 /* Determine if the UTF-16 character is within the low surrogate range
4928 */
4929 if( ( utf16_surrogate < LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
4930 || ( utf16_surrogate > LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4931 {
4932 libcerror_error_set(
4933 error,
4934 LIBCERROR_ERROR_DOMAIN_RUNTIME,
4935 LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4936 "%s: unsupported low surrogate UTF-16 character.",
4937 function );
4938
4939 return( -1 );
4940 }
4941 safe_unicode_character -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
4942 safe_unicode_character <<= 10;
4943 safe_unicode_character += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
4944 safe_unicode_character += 0x010000;
4945 }
4946 *unicode_character = safe_unicode_character;
4947 *utf16_stream_index = safe_utf16_stream_index;
4948
4949 return( 1 );
4950 }
4951
4952 /* Copies an Unicode character to an UTF-16 stream
4953 * Returns 1 if successful or -1 on error
4954 */
libuna_unicode_character_copy_to_utf16_stream(libuna_unicode_character_t unicode_character,uint8_t * utf16_stream,size_t utf16_stream_size,size_t * utf16_stream_index,int byte_order,libcerror_error_t ** error)4955 int libuna_unicode_character_copy_to_utf16_stream(
4956 libuna_unicode_character_t unicode_character,
4957 uint8_t *utf16_stream,
4958 size_t utf16_stream_size,
4959 size_t *utf16_stream_index,
4960 int byte_order,
4961 libcerror_error_t **error )
4962 {
4963 static char *function = "libuna_unicode_character_copy_to_utf16_stream";
4964 libuna_utf16_character_t utf16_surrogate = 0;
4965 size_t safe_utf16_stream_index = 0;
4966
4967 if( utf16_stream == NULL )
4968 {
4969 libcerror_error_set(
4970 error,
4971 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4972 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4973 "%s: invalid UTF-16 stream.",
4974 function );
4975
4976 return( -1 );
4977 }
4978 if( utf16_stream_size > (size_t) SSIZE_MAX )
4979 {
4980 libcerror_error_set(
4981 error,
4982 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4983 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4984 "%s: invalid UTF-16 stream size value exceeds maximum.",
4985 function );
4986
4987 return( -1 );
4988 }
4989 if( utf16_stream_index == NULL )
4990 {
4991 libcerror_error_set(
4992 error,
4993 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4994 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4995 "%s: invalid UTF-16 stream index.",
4996 function );
4997
4998 return( -1 );
4999 }
5000 if( ( byte_order != LIBUNA_ENDIAN_BIG )
5001 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
5002 {
5003 libcerror_error_set(
5004 error,
5005 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5006 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5007 "%s: unsupported byte order.",
5008 function );
5009
5010 return( -1 );
5011 }
5012 safe_utf16_stream_index = *utf16_stream_index;
5013
5014 /* Determine if the Unicode character is valid
5015 */
5016 if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5017 && ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5018 || ( unicode_character > LIBUNA_UTF16_CHARACTER_MAX ) )
5019 {
5020 libcerror_error_set(
5021 error,
5022 LIBCERROR_ERROR_DOMAIN_RUNTIME,
5023 LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5024 "%s: unsupported Unicode character.",
5025 function );
5026
5027 return( -1 );
5028 }
5029 if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
5030 {
5031 if( ( utf16_stream_size < 2 )
5032 || ( safe_utf16_stream_index > ( utf16_stream_size - 2 ) ) )
5033 {
5034 libcerror_error_set(
5035 error,
5036 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5037 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5038 "%s: UTF-16 stream too small.",
5039 function );
5040
5041 return( -1 );
5042 }
5043 if( byte_order == LIBUNA_ENDIAN_BIG )
5044 {
5045 utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5046 unicode_character >>= 8;
5047 utf16_stream[ safe_utf16_stream_index ] = (uint8_t) ( unicode_character & 0xff );
5048 }
5049 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5050 {
5051 utf16_stream[ safe_utf16_stream_index ] = (uint8_t) ( unicode_character & 0xff );
5052 unicode_character >>= 8;
5053 utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5054 }
5055 safe_utf16_stream_index += 2;
5056 }
5057 else
5058 {
5059 if( ( utf16_stream_size < 4 )
5060 || ( safe_utf16_stream_index > ( utf16_stream_size - 4 ) ) )
5061 {
5062 libcerror_error_set(
5063 error,
5064 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5065 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5066 "%s: UTF-16 stream too small.",
5067 function );
5068
5069 return( -1 );
5070 }
5071 unicode_character -= 0x010000;
5072
5073 utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
5074
5075 if( byte_order == LIBUNA_ENDIAN_BIG )
5076 {
5077 utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5078 utf16_surrogate >>= 8;
5079 utf16_stream[ safe_utf16_stream_index ] = (uint8_t) ( utf16_surrogate & 0xff );
5080 }
5081 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5082 {
5083 utf16_stream[ safe_utf16_stream_index ] = (uint8_t) ( utf16_surrogate & 0xff );
5084 utf16_surrogate >>= 8;
5085 utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5086 }
5087 safe_utf16_stream_index += 2;
5088
5089 utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
5090
5091 if( byte_order == LIBUNA_ENDIAN_BIG )
5092 {
5093 utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5094 utf16_surrogate >>= 8;
5095 utf16_stream[ safe_utf16_stream_index ] = (uint8_t) ( utf16_surrogate & 0xff );
5096 }
5097 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5098 {
5099 utf16_stream[ safe_utf16_stream_index ] = (uint8_t) ( utf16_surrogate & 0xff );
5100 utf16_surrogate >>= 8;
5101 utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5102 }
5103 safe_utf16_stream_index += 2;
5104 }
5105 *utf16_stream_index = safe_utf16_stream_index;
5106
5107 return( 1 );
5108 }
5109
5110 /* Determines the size of an UTF-32 character from an Unicode character
5111 * Adds the size to the UTF-32 character size value
5112 * Returns 1 if successful or -1 on error
5113 */
libuna_unicode_character_size_to_utf32(libuna_unicode_character_t unicode_character LIBUNA_ATTRIBUTE_UNUSED,size_t * utf32_character_size,libcerror_error_t ** error)5114 int libuna_unicode_character_size_to_utf32(
5115 libuna_unicode_character_t unicode_character LIBUNA_ATTRIBUTE_UNUSED,
5116 size_t *utf32_character_size,
5117 libcerror_error_t **error )
5118 {
5119 static char *function = "libuna_unicode_character_size_to_utf32";
5120
5121 LIBUNA_UNREFERENCED_PARAMETER( unicode_character )
5122
5123 if( utf32_character_size == NULL )
5124 {
5125 libcerror_error_set(
5126 error,
5127 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5128 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5129 "%s: invalid UTF-32 character size.",
5130 function );
5131
5132 return( -1 );
5133 }
5134 *utf32_character_size += 1;
5135
5136 return( 1 );
5137 }
5138
5139 /* Copies an Unicode character from an UTF-32 string
5140 * Returns 1 if successful or -1 on error
5141 */
libuna_unicode_character_copy_from_utf32(libuna_unicode_character_t * unicode_character,const libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,libcerror_error_t ** error)5142 int libuna_unicode_character_copy_from_utf32(
5143 libuna_unicode_character_t *unicode_character,
5144 const libuna_utf32_character_t *utf32_string,
5145 size_t utf32_string_size,
5146 size_t *utf32_string_index,
5147 libcerror_error_t **error )
5148 {
5149 static char *function = "libuna_unicode_character_copy_from_utf32";
5150 libuna_unicode_character_t safe_unicode_character = 0;
5151 size_t safe_utf32_string_index = 0;
5152
5153 if( unicode_character == NULL )
5154 {
5155 libcerror_error_set(
5156 error,
5157 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5158 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5159 "%s: invalid Unicode character.",
5160 function );
5161
5162 return( -1 );
5163 }
5164 if( utf32_string == NULL )
5165 {
5166 libcerror_error_set(
5167 error,
5168 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5169 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5170 "%s: invalid UTF-32 string.",
5171 function );
5172
5173 return( -1 );
5174 }
5175 if( utf32_string_size > (size_t) SSIZE_MAX )
5176 {
5177 libcerror_error_set(
5178 error,
5179 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5180 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5181 "%s: invalid UTF-32 string size value exceeds maximum.",
5182 function );
5183
5184 return( -1 );
5185 }
5186 if( utf32_string_index == NULL )
5187 {
5188 libcerror_error_set(
5189 error,
5190 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5191 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5192 "%s: invalid UTF-32 string index.",
5193 function );
5194
5195 return( -1 );
5196 }
5197 safe_utf32_string_index = *utf32_string_index;
5198
5199 if( safe_utf32_string_index >= utf32_string_size )
5200 {
5201 libcerror_error_set(
5202 error,
5203 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5204 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5205 "%s: UTF-32 string too small.",
5206 function );
5207
5208 return( -1 );
5209 }
5210 /* Determine if the Unicode character is valid
5211 */
5212 if( ( utf32_string[ safe_utf32_string_index ] >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
5213 && ( utf32_string[ safe_utf32_string_index ] <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5214 {
5215 safe_unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
5216 }
5217 else
5218 {
5219 safe_unicode_character = utf32_string[ safe_utf32_string_index ];
5220 }
5221 *unicode_character = safe_unicode_character;
5222 *utf32_string_index = safe_utf32_string_index + 1;
5223
5224 return( 1 );
5225 }
5226
5227 /* Copies an Unicode character into a UTF-32 string
5228 * Returns 1 if successful or -1 on error
5229 */
libuna_unicode_character_copy_to_utf32(libuna_unicode_character_t unicode_character,libuna_utf32_character_t * utf32_string,size_t utf32_string_size,size_t * utf32_string_index,libcerror_error_t ** error)5230 int libuna_unicode_character_copy_to_utf32(
5231 libuna_unicode_character_t unicode_character,
5232 libuna_utf32_character_t *utf32_string,
5233 size_t utf32_string_size,
5234 size_t *utf32_string_index,
5235 libcerror_error_t **error )
5236 {
5237 static char *function = "libuna_unicode_character_copy_to_utf32";
5238 size_t safe_utf32_string_index = 0;
5239
5240 if( utf32_string == NULL )
5241 {
5242 libcerror_error_set(
5243 error,
5244 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5245 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5246 "%s: invalid UTF-32 string.",
5247 function );
5248
5249 return( -1 );
5250 }
5251 if( utf32_string_size > (size_t) SSIZE_MAX )
5252 {
5253 libcerror_error_set(
5254 error,
5255 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5256 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5257 "%s: invalid UTF-32 string size value exceeds maximum.",
5258 function );
5259
5260 return( -1 );
5261 }
5262 if( utf32_string_index == NULL )
5263 {
5264 libcerror_error_set(
5265 error,
5266 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5267 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5268 "%s: invalid UTF-32 string index.",
5269 function );
5270
5271 return( -1 );
5272 }
5273 safe_utf32_string_index = *utf32_string_index;
5274
5275 if( safe_utf32_string_index >= utf32_string_size )
5276 {
5277 libcerror_error_set(
5278 error,
5279 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5280 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5281 "%s: UTF-32 string too small.",
5282 function );
5283
5284 return( -1 );
5285 }
5286 /* Determine if the Unicode character is valid
5287 */
5288 if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5289 && ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5290 || ( unicode_character > LIBUNA_UTF32_CHARACTER_MAX ) )
5291 {
5292 utf32_string[ safe_utf32_string_index ] = (libuna_utf32_character_t) LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
5293 }
5294 else
5295 {
5296 utf32_string[ safe_utf32_string_index ] = (libuna_utf32_character_t) unicode_character;
5297 }
5298 *utf32_string_index = safe_utf32_string_index + 1;
5299
5300 return( 1 );
5301 }
5302
5303 /* Copies an Unicode character from an UTF-32 stream
5304 * Returns 1 if successful or -1 on error
5305 */
libuna_unicode_character_copy_from_utf32_stream(libuna_unicode_character_t * unicode_character,const uint8_t * utf32_stream,size_t utf32_stream_size,size_t * utf32_stream_index,int byte_order,libcerror_error_t ** error)5306 int libuna_unicode_character_copy_from_utf32_stream(
5307 libuna_unicode_character_t *unicode_character,
5308 const uint8_t *utf32_stream,
5309 size_t utf32_stream_size,
5310 size_t *utf32_stream_index,
5311 int byte_order,
5312 libcerror_error_t **error )
5313 {
5314 static char *function = "libuna_unicode_character_copy_from_utf32_stream";
5315 libuna_unicode_character_t safe_unicode_character = 0;
5316 size_t safe_utf32_stream_index = 0;
5317
5318 if( unicode_character == NULL )
5319 {
5320 libcerror_error_set(
5321 error,
5322 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5323 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5324 "%s: invalid Unicode character.",
5325 function );
5326
5327 return( -1 );
5328 }
5329 if( utf32_stream == NULL )
5330 {
5331 libcerror_error_set(
5332 error,
5333 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5334 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5335 "%s: invalid UTF-32 stream.",
5336 function );
5337
5338 return( -1 );
5339 }
5340 if( utf32_stream_size > (size_t) SSIZE_MAX )
5341 {
5342 libcerror_error_set(
5343 error,
5344 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5345 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5346 "%s: invalid UTF-32 stream size value exceeds maximum.",
5347 function );
5348
5349 return( -1 );
5350 }
5351 if( utf32_stream_index == NULL )
5352 {
5353 libcerror_error_set(
5354 error,
5355 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5356 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5357 "%s: invalid UTF-32 stream index.",
5358 function );
5359
5360 return( -1 );
5361 }
5362 safe_utf32_stream_index = *utf32_stream_index;
5363
5364 if( ( utf32_stream_size < 4 )
5365 || ( safe_utf32_stream_index > ( utf32_stream_size - 4 ) ) )
5366 {
5367 libcerror_error_set(
5368 error,
5369 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5370 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5371 "%s: UTF-32 stream too small.",
5372 function );
5373
5374 return( -1 );
5375 }
5376 if( ( byte_order != LIBUNA_ENDIAN_BIG )
5377 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
5378 {
5379 libcerror_error_set(
5380 error,
5381 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5382 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5383 "%s: unsupported byte order.",
5384 function );
5385
5386 return( -1 );
5387 }
5388 if( byte_order == LIBUNA_ENDIAN_BIG )
5389 {
5390 safe_unicode_character = utf32_stream[ safe_utf32_stream_index ];
5391 safe_unicode_character <<= 8;
5392 safe_unicode_character += utf32_stream[ safe_utf32_stream_index + 1 ];
5393 safe_unicode_character <<= 8;
5394 safe_unicode_character += utf32_stream[ safe_utf32_stream_index + 2 ];
5395 safe_unicode_character <<= 8;
5396 safe_unicode_character += utf32_stream[ safe_utf32_stream_index + 3 ];
5397 }
5398 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5399 {
5400 safe_unicode_character = utf32_stream[ safe_utf32_stream_index + 3 ];
5401 safe_unicode_character <<= 8;
5402 safe_unicode_character += utf32_stream[ safe_utf32_stream_index + 2 ];
5403 safe_unicode_character <<= 8;
5404 safe_unicode_character += utf32_stream[ safe_utf32_stream_index + 1 ];
5405 safe_unicode_character <<= 8;
5406 safe_unicode_character += utf32_stream[ safe_utf32_stream_index ];
5407 }
5408 /* Determine if the Unicode character is valid
5409 */
5410 if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
5411 && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5412 {
5413 safe_unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
5414 }
5415 *unicode_character = safe_unicode_character;
5416 *utf32_stream_index = safe_utf32_stream_index + 4;
5417
5418 return( 1 );
5419 }
5420
5421 /* Copies an Unicode character to an UTF-32 stream
5422 * Returns 1 if successful or -1 on error
5423 */
libuna_unicode_character_copy_to_utf32_stream(libuna_unicode_character_t unicode_character,uint8_t * utf32_stream,size_t utf32_stream_size,size_t * utf32_stream_index,int byte_order,libcerror_error_t ** error)5424 int libuna_unicode_character_copy_to_utf32_stream(
5425 libuna_unicode_character_t unicode_character,
5426 uint8_t *utf32_stream,
5427 size_t utf32_stream_size,
5428 size_t *utf32_stream_index,
5429 int byte_order,
5430 libcerror_error_t **error )
5431 {
5432 static char *function = "libuna_unicode_character_copy_to_utf32_stream";
5433 size_t safe_utf32_stream_index = 0;
5434
5435 if( utf32_stream == NULL )
5436 {
5437 libcerror_error_set(
5438 error,
5439 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5440 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5441 "%s: invalid UTF-32 stream.",
5442 function );
5443
5444 return( -1 );
5445 }
5446 if( utf32_stream_size > (size_t) SSIZE_MAX )
5447 {
5448 libcerror_error_set(
5449 error,
5450 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5451 LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5452 "%s: invalid UTF-32 stream size value exceeds maximum.",
5453 function );
5454
5455 return( -1 );
5456 }
5457 if( utf32_stream_index == NULL )
5458 {
5459 libcerror_error_set(
5460 error,
5461 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5462 LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5463 "%s: invalid UTF-32 stream index.",
5464 function );
5465
5466 return( -1 );
5467 }
5468 safe_utf32_stream_index = *utf32_stream_index;
5469
5470 if( ( utf32_stream_size < 4 )
5471 || ( safe_utf32_stream_index > ( utf32_stream_size - 4 ) ) )
5472 {
5473 libcerror_error_set(
5474 error,
5475 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5476 LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5477 "%s: UTF-32 stream too small.",
5478 function );
5479
5480 return( -1 );
5481 }
5482 if( ( byte_order != LIBUNA_ENDIAN_BIG )
5483 && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
5484 {
5485 libcerror_error_set(
5486 error,
5487 LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5488 LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5489 "%s: unsupported byte order.",
5490 function );
5491
5492 return( -1 );
5493 }
5494 /* Determine if the Unicode character is valid
5495 */
5496 if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5497 && ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5498 || ( unicode_character > LIBUNA_UTF32_CHARACTER_MAX ) )
5499 {
5500 unicode_character = LIBUNA_UNICODE_REPLACEMENT_CHARACTER;
5501 }
5502 if( byte_order == LIBUNA_ENDIAN_BIG )
5503 {
5504 utf32_stream[ safe_utf32_stream_index + 3 ] = (uint8_t) ( unicode_character & 0xff );
5505 unicode_character >>= 8;
5506 utf32_stream[ safe_utf32_stream_index + 2 ] = (uint8_t) ( unicode_character & 0xff );
5507 unicode_character >>= 8;
5508 utf32_stream[ safe_utf32_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5509 unicode_character >>= 8;
5510 utf32_stream[ safe_utf32_stream_index ] = (uint8_t) ( unicode_character & 0xff );
5511 }
5512 else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5513 {
5514 utf32_stream[ safe_utf32_stream_index ] = (uint8_t) ( unicode_character & 0xff );
5515 unicode_character >>= 8;
5516 utf32_stream[ safe_utf32_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5517 unicode_character >>= 8;
5518 utf32_stream[ safe_utf32_stream_index + 2 ] = (uint8_t) ( unicode_character & 0xff );
5519 unicode_character >>= 8;
5520 utf32_stream[ safe_utf32_stream_index + 3 ] = (uint8_t) ( unicode_character & 0xff );
5521 }
5522 *utf32_stream_index = safe_utf32_stream_index + 4;
5523
5524 return( 1 );
5525 }
5526
5527