1 /*
2
3 G N O K I I
4
5 A Linux/Unix toolset and driver for the mobile phones.
6
7 This file is part of gnokii.
8
9 Gnokii is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
13
14 Gnokii is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with gnokii; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
23 Copyright (C) 1999-2000 Pavel Janik ml.
24 Copyright (C) 2001-2011 Pawel Kot
25 Copyright (C) 2002 Markus Plail, Manfred Jonsson
26 Copyright (C) 2002-2004 BORBELY Zoltan
27 Copyright (C) 2003 Martin Goldhahn
28
29 Functions for encoding SMS, calendar and other things.
30
31 */
32
33 #include "config.h"
34
35 #include <stdlib.h>
36 #include <stdio.h>
37 #include <string.h>
38
39 #include "compat.h"
40 #include "misc.h"
41 #include "gnokii.h"
42 #include "gnokii-internal.h"
43
44 #ifdef HAVE_ICONV
45 # include <iconv.h>
46 #endif
47 #ifdef HAVE_LANGINFO_CODESET
48 # include <langinfo.h>
49 #endif
50 #ifdef HAVE_LOCALE_CHARSET
51 # include <libcharset.h>
52 #else
53 /* FIXME: We should include here somehow ../intl/localcharset.h, but it may
54 * cause problems with MSVC. */
55 extern const char *locale_charset(void); /* from ../intl/localcharset.c */
56 #endif
57
58 #include <glib.h>
59
60 /**
61 * base64_alphabet:
62 *
63 * Mapping from 8-bit binary values to base 64 encoding.
64 */
65 static const char *base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
66
67 /**
68 * bcd_digits:
69 *
70 * Mapping from ASCII to BCD digits representing phone numbers and vice versa.
71 * BCD digits are those from Table 10.5.118 of 3GPP TS 04.08 with 'a' replaced by 'p'.
72 */
73 static const char *bcd_digits = "0123456789*#pbc";
74
75 /**
76 * GN_CHAR_UNI_ALPHABET_SIZE:
77 *
78 * Number of characters in GSM default alphabet (for UCS-2 encoding).
79 */
80 #define GN_CHAR_UNI_ALPHABET_SIZE 128
81
82 /**
83 * GN_CHAR_UNI_ESCAPE:
84 *
85 * Value of the escape character for the GSM Alphabet (in UCS-2 encoding).
86 */
87 #define GN_CHAR_UNI_ESCAPE 0x001b
88
89 /**
90 * gsm_default_unicode_alphabet:
91 *
92 * Mapping from GSM default alphabet to UCS-2.
93 *
94 * ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet. Mapping to UCS-2.
95 * Mapping according to http://unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
96 */
97 static unsigned int gsm_default_unicode_alphabet[GN_CHAR_UNI_ALPHABET_SIZE] = {
98 /* @ £ $ ¥ è é ù ì */
99 0x0040, 0x00a3, 0x0024, 0x00a5, 0x00e8, 0x00e9, 0x00f9, 0x00ec,
100 /* ò Ç \n Ø ø \r Å å */
101 0x00f2, 0x00c7, 0x000a, 0x00d8, 0x00f8, 0x000d, 0x00c5, 0x00e5,
102 /* Δ _ Φ Γ Λ Ω Π Ψ */
103 0x0394, 0x005f, 0x03a6, 0x0393, 0x039b, 0x03a9, 0x03a0, 0x03a8,
104 /* Σ Θ Ξ NBSP Æ æ ß É */
105 0x03a3, 0x0398, 0x039e, 0x00a0, 0x00c6, 0x00e6, 0x00df, 0x00c9,
106 /* ' ' ! " # ¤ % & ' */
107 0x0020, 0x0021, 0x0022, 0x0023, 0x00a4, 0x0025, 0x0026, 0x0027,
108 /* ( ) * + , - . / */
109 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
110 /* 0 1 2 3 4 5 6 7 */
111 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
112 /* 8 9 : ; < = > ? */
113 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
114 /* ¡ A B C D E F G */
115 0x00a1, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
116 /* H I J K L M N O */
117 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
118 /* P Q R S T U V W */
119 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
120 /* X Y Z Ä Ö Ñ Ü § */
121 0x0058, 0x0059, 0x005a, 0x00c4, 0x00d6, 0x00d1, 0x00dc, 0x00a7,
122 /* ¿ a b c d e f g */
123 0x00bf, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
124 /* h i j k l m n o */
125 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
126 /* p q r s t u v w */
127 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
128 /* x y z ä ö ñ ü à */
129 0x0078, 0x0079, 0x007a, 0x00e4, 0x00f6, 0x00f1, 0x00fc, 0x00e0
130 };
131
132 static char application_encoding[64] = "";
133
134 /**
135 * char_def_alphabet:
136 * @value: the UCS-2 character to validate
137 *
138 * Returns: true if the given character matches default alphabet, false otherwise
139 *
140 * It could be possibly optimized but let's face it: nowadays full
141 * lookup of 128 elements table is not that time consuming.
142 */
char_def_alphabet(unsigned int value)143 static int char_def_alphabet(unsigned int value)
144 {
145 int i;
146 for (i = 0; i < GN_CHAR_UNI_ALPHABET_SIZE; i++) {
147 if (gsm_default_unicode_alphabet[i] == value) {
148 return true;
149 }
150 }
151 return false;
152 }
153
154 /**
155 * char_is_escape:
156 * @value: the char to test
157 *
158 * Returns: non zero if @value is an escape character, zero otherwise
159 *
160 * Determines if @value is an escape character for GSM Alphabet.
161 */
char_is_escape(unsigned int value)162 static bool char_is_escape(unsigned int value)
163 {
164 return (value == GN_CHAR_UNI_ESCAPE);
165 }
166
167 /**
168 * get_langinfo_codeset:
169 *
170 * Returns: a constant string representing a charset encoding
171 *
172 * Gets the current charset encoding.
173 * Uses different methods on different platforms.
174 */
get_langinfo_codeset(void)175 static const char *get_langinfo_codeset(void)
176 {
177 static const char *codeset = NULL;
178
179 if (!codeset) {
180 #ifdef HAVE_LANGINFO_CODESET
181 codeset = nl_langinfo(CODESET);
182 #else
183 codeset = locale_charset();
184 #endif
185 }
186 return codeset;
187 }
188
189 /**
190 * gn_char_get_encoding:
191 *
192 * Returns: a constant string representing a charset encoding
193 *
194 * Gets the encoding set by the application or the default one.
195 */
gn_char_get_encoding()196 GNOKII_API const char *gn_char_get_encoding()
197 {
198 const char *coding;
199 if (*application_encoding)
200 coding = application_encoding; /* app has overriden encoding setting */
201 else
202 coding = get_langinfo_codeset(); /* return default codeset */
203 return coding;
204 }
205
206 /**
207 * gn_char_set_encoding:
208 * @encoding: a string representing the name of a charset encoding
209 *
210 * Sets the encoding preferred by the application.
211 */
gn_char_set_encoding(const char * encoding)212 void gn_char_set_encoding(const char* encoding)
213 {
214 snprintf(application_encoding, sizeof(application_encoding), "%s", encoding);
215 }
216
217 /**
218 * char_mblen:
219 * @src: the string to measure
220 *
221 * Returns: the lenght of the string
222 *
223 * Detects the correct length of a string (also for multibyte chars like "umlaute").
224 */
char_mblen(const char * src)225 int char_mblen(const char *src)
226 {
227 int len = mbstowcs(NULL, src, 0);
228 dprintf("char_mblen(%s): %i\n", src, len);
229 return len;
230 }
231
232 #ifndef ICONV_CONST
233 # define ICONV_CONST const
234 #endif
235
236 /**
237 * char_mbtowc:
238 * @wchar_t: buffer for the converted wide char string
239 * @src: buffer with the multibyte string to be converted
240 * @maxlen: size of @wchar_t buffer
241 * @mbs: pointer to a variable holding the shift state
242 * or NULL to use a global variable
243 *
244 * Returns: the number of bytes from @src that have been used
245 * or -1 in case of error
246 *
247 * Converts a multibyte string to a wide char string.
248 * Uses iconv() if it is available and iconv_open() succeeds, else mbrtowc()
249 * if available, else mbtowc().
250 */
char_mbtowc(wchar_t * dst,const char * src,int maxlen,MBSTATE * mbs)251 static int char_mbtowc(wchar_t *dst, const char *src, int maxlen, MBSTATE *mbs)
252 {
253 #ifdef HAVE_ICONV
254 size_t nconv;
255 ICONV_CONST char *pin;
256 char *pout;
257 size_t inlen;
258 size_t outlen;
259 iconv_t cd;
260
261 pin = (char *)src;
262 pout = (char *)dst;
263 /* Let's assume that we have at most 4-bytes wide characters */
264 inlen = maxlen;
265 outlen = maxlen * sizeof(wchar_t);
266
267 cd = iconv_open("WCHAR_T", gn_char_get_encoding());
268 if (cd == (iconv_t)-1)
269 goto fallback;
270 nconv = iconv(cd, &pin, &inlen, &pout, &outlen);
271 if ((nconv == (size_t)-1) && (pin == src))
272 perror("char_mbtowc/iconv");
273 iconv_close(cd);
274
275 return (char*)dst == pout ? -1 : pin-src;
276 fallback:
277 #endif
278 if (maxlen >= MB_CUR_MAX)
279 maxlen = MB_CUR_MAX - 1;
280 #ifdef HAVE_WCRTOMB
281 return mbrtowc(dst, src, maxlen, mbs);
282 #else
283 return mbtowc(dst, src, maxlen);
284 #endif
285 }
286
287 /**
288 * char_wctomb:
289 * @dst: buffer for the converted multibyte string
290 * @src: buffer with the wide char string to be converted
291 * @mbs: pointer to a variable holding the shift state
292 * or NULL to use a global variable
293 *
294 * Returns: the number of bytes from @src that have been used
295 * or -1 in case of error
296 *
297 * Converts a wide char string to a multibyte string.
298 * Uses iconv() if it is available and iconv_open() succeeds, else wcrtomb()
299 * if available, else wctomb().
300 */
char_wctomb(char * dst,wchar_t src,MBSTATE * mbs)301 static int char_wctomb(char *dst, wchar_t src, MBSTATE *mbs)
302 {
303 #ifdef HAVE_ICONV
304 size_t nconv;
305 ICONV_CONST char *pin;
306 char *pout;
307 size_t inlen;
308 size_t outlen;
309 iconv_t cd;
310
311 pin = (char *)&src;
312 pout = (char *)dst;
313 inlen = sizeof(wchar_t);
314 outlen = 4;
315
316 cd = iconv_open(gn_char_get_encoding(), "WCHAR_T");
317 if (cd == (iconv_t)-1)
318 goto fallback;
319 nconv = iconv(cd, &pin, &inlen, &pout, &outlen);
320 if (nconv == (size_t)-1)
321 perror("char_wctomb/iconv");
322 iconv_close(cd);
323
324 return nconv == -1 ? -1 : pout-dst;
325 fallback:
326 #endif
327 #ifdef HAVE_WCRTOMB
328 return wcrtomb(dst, src, mbs);
329 #else
330 return wctomb(dst, src);
331 #endif
332 }
333
334 /**
335 * char_def_alphabet_ext:
336 * @value: the character to test UCS-2 encoded
337 *
338 * Returns: non zero if the character can be represented with the Extended GSM Alphabet,
339 * zero otherwise
340 *
341 * Checks if @value is a character defined by the Extended GSM Alphabet.
342 *
343 * In GSM specification there are 10 characters in the extension
344 * of the default alphabet. Their values look a bit random, they are
345 * only 10, and probably they will never change, so hardcoding them
346 * here is rather safe.
347 */
char_def_alphabet_ext(unsigned int value)348 bool char_def_alphabet_ext(unsigned int value)
349 {
350 return (value == 0x0c ||
351 value == '^' ||
352 value == '{' ||
353 value == '}' ||
354 value == '\\' ||
355 value == '[' ||
356 value == '~' ||
357 value == ']' ||
358 value == '|' ||
359 value == 0x20ac);
360 }
361
362 /**
363 * char_def_alphabet_ext_count:
364 * @input: input string
365 * @lengh: input string length
366 *
367 * Returns: number of extended GSM alphabet characters in the input string
368 */
char_def_alphabet_ext_count(unsigned char * input,int length)369 int char_def_alphabet_ext_count(unsigned char *input, int length)
370 {
371 int i, retval = 0;
372 for (i = 0; i < length; i++)
373 if (char_def_alphabet_ext(input[i]))
374 retval++;
375 return retval;
376 }
377
378 /**
379 * char_def_alphabet_ext_decode:
380 * @value: the character to decode
381 *
382 * Returns: the decoded character, or '?' if @value can't be decoded
383 *
384 * Converts a character from Extended GSM Alphabet to UCS-2.
385 */
char_def_alphabet_ext_decode(unsigned char value)386 static unsigned int char_def_alphabet_ext_decode(unsigned char value)
387 {
388 dprintf("Default extended alphabet\n");
389 switch (value) {
390 case 0x0a: return 0x000c; break; /* form feed */
391 case 0x14: return 0x005e; break; /* ^ */
392 case 0x28: return 0x007b; break; /* { */
393 case 0x29: return 0x007d; break; /* } */
394 case 0x2f: return 0x005c; break; /* \ */
395 case 0x3c: return 0x005b; break; /* [ */
396 case 0x3d: return 0x007e; break; /* ~ */
397 case 0x3e: return 0x005d; break; /* ] */
398 case 0x40: return 0x007c; break; /* | */
399 case 0x65: return 0x20ac; break; /* € */
400 default: return 0x003f; break; /* invalid character, set ? */
401 }
402 }
403
404 /**
405 * char_def_alphabet_ext_encode:
406 * @value: the UCS-2 character to encode
407 *
408 * Returns: the encoded character, or 0 if @value can't be encoded
409 *
410 * Converts a character from UCS-2 to Extended GSM Alphabet.
411 */
char_def_alphabet_ext_encode(unsigned int value)412 static unsigned char char_def_alphabet_ext_encode(unsigned int value)
413 {
414 switch (value) {
415 case 0x0c: return 0x0a; /* form feed */
416 case '^': return 0x14;
417 case '{': return 0x28;
418 case '}': return 0x29;
419 case '\\': return 0x2f;
420 case '[': return 0x3c;
421 case '~': return 0x3d;
422 case ']': return 0x3e;
423 case '|': return 0x40;
424 case 0x20ac: return 0x65; /* euro */
425 default: return 0x00; /* invalid character */
426 }
427 }
428
429 /**
430 * gn_char_def_alphabet:
431 * @string: the string to test
432 *
433 * Returns: %true if the string can be represented with the GSM Alphabet,
434 * %false otherwise
435 *
436 * Checks if @value is a string composed only by characters defined by
437 * the default GSM alphabet or its extension.
438 */
gn_char_def_alphabet(unsigned char * string)439 GNOKII_API int gn_char_def_alphabet(unsigned char *string)
440 {
441 unsigned int i, ucs2len, inlen = strlen(string);
442 char *ucs2str;
443
444 /* First, let's know the encoding. We convert it from something to UCS-2 */
445 ucs2str = calloc(2 * inlen, sizeof(unsigned char));
446 if (!ucs2str)
447 /* We are in trouble here. Whatever would be returned is irrelevant */
448 return true;
449 ucs2len = ucs2_encode(ucs2str, 2 * inlen, string, inlen);
450
451 /* It means we couldn't encode the input string */
452 if (ucs2len < 0) {
453 dprintf("gn_char_def_alphabet: failed to encode input string\n");
454 return false;
455 }
456
457 for (i = 0; i < ucs2len / 2; i++) {
458 unsigned int a = 0xff & ucs2str[2 * i], b = 0xff & ucs2str[2 * i + 1];
459 /*
460 * We need the following tests:
461 * - check in the default alphabet table
462 * - check in the extended default alphabet table
463 */
464 if (!char_def_alphabet(256 * a + b) &&
465 !char_def_alphabet_ext(256 * a + b)) {
466 free(ucs2str);
467 return false;
468 }
469 }
470 free(ucs2str);
471 return true;
472 }
473
474 /**
475 * char_def_alphabet_encode:
476 * @value: the character to encode
477 *
478 * Returns: the encoded character, or '?' if @value can't be encoded
479 *
480 * Converts a character from UCS-2 to Default GSM Alphabet.
481 * It could be possibly optimized but let's face it: nowadays full
482 * lookup of 128 elements table is not that time consuming.
483 */
char_def_alphabet_encode(unsigned int value)484 unsigned char char_def_alphabet_encode(unsigned int value)
485 {
486 int i;
487 for (i = 0; i < GN_CHAR_UNI_ALPHABET_SIZE; i++) {
488 if (gsm_default_unicode_alphabet[i] == value) {
489 return i;
490 }
491 }
492 return '?';
493 }
494
495 /**
496 * char_def_alphabet_decode:
497 * @value: the character to decode
498 *
499 * Returns: the decoded character or '?' if @value can't be decoded
500 *
501 * Converts a character from Default GSM Alphabet to UCS-2.
502 */
char_def_alphabet_decode(unsigned char value)503 unsigned int char_def_alphabet_decode(unsigned char value)
504 {
505 if (value < GN_CHAR_UNI_ALPHABET_SIZE) {
506 return gsm_default_unicode_alphabet[value];
507 } else {
508 return 0x003f; /* '?' */
509 }
510 }
511
512 /**
513 * char_def_alphabet_string_stats:
514 * @str: string to get statistics encoded in utf8
515 * @enc_chars: calculated number of the characters from the input string
516 * @ext_chars: calculated number of the characters to be encoded in gsm extended default alphabet
517 *
518 * Returns: GN_SMS_DCS_DefaultAlphabet, if @str can be encoded in GSM default alphabet, GN_SMS_DCS_UCS2 otherwise.
519 *
520 * Calculates statistics and encoding of the input string.
521 */
char_def_alphabet_string_stats(char * str,int * enc_chars,int * ext_chars)522 gn_sms_dcs_alphabet_type char_def_alphabet_string_stats(char *str, int *enc_chars, int *ext_chars)
523 {
524 gn_sms_dcs_alphabet_type enc = GN_SMS_DCS_DefaultAlphabet;
525 char *iter = str;
526 gunichar chr;
527
528 *enc_chars = 0;
529 *ext_chars = 0;
530 if (!g_utf8_validate(iter, -1, NULL)) {
531 dprintf("Not valid UTF8 string\n");
532 return enc;
533 }
534 do {
535 chr = g_utf8_get_char(iter);
536 if (!chr)
537 break;
538 if (char_def_alphabet_ext(chr))
539 (*ext_chars)++;
540 else if (!char_def_alphabet(chr))
541 enc = GN_SMS_DCS_UCS2;
542 (*enc_chars)++;
543 } while (iter = g_utf8_next_char(iter));
544 return enc;
545 }
546
547 /**
548 * char_def_alphabet_copy:
549 * @dest: room for the destination string
550 * @src: source utf-8 string to copy
551 * @len: number of utf-8 characters to copy
552 * @offset: number of utf-8 characters from input to skip
553 *
554 * Returns: number of characters copied
555 *
556 * Function copies @len characters from @src utf-8 string, starting at @offset character to @dest.
557 *
558 */
char_def_alphabet_string_copy(char * dest,const char * src,int len,int offset)559 int char_def_alphabet_string_copy(char *dest, const char *src, int len, int offset)
560 {
561 int i, to_copy = 0;
562 gunichar chr;
563 char *src_offset = g_utf8_offset_to_pointer(src, offset);
564 char *iter = src_offset;
565
566 if (!g_utf8_validate(iter, -1, NULL)) {
567 dprintf("Not valid UTF8 string\n");
568 return to_copy;
569 }
570 for (i = 0; i < len; i++) {
571 chr = g_utf8_get_char(iter);
572 if (!chr)
573 break;
574 if (char_def_alphabet_ext(chr))
575 i++;
576 if (i < len)
577 to_copy++;
578 iter = g_utf8_next_char(iter);
579 }
580 g_utf8_strncpy(dest, src_offset, to_copy);
581 return to_copy;
582 }
583
584 #define GN_BYTE_MASK ((1 << bits) - 1)
585
586 /**
587 * char_7bit_unpack:
588 * @offset: the bit offset inside the first byte of @input from which to start reading data
589 * @in_length: length of @input in bytes
590 * @out_length: size of @output in bytes
591 * @input: buffer with the string to be converted
592 * @output: buffer for the converted string, not NUL terminated
593 *
594 * Returns: the number of bytes used in @output
595 *
596 * Converts a packed sequence of 7-bit characters from @input into an array
597 * of 8-bit characters in @output.
598 * Source characters are stored in a char array of @in_length elements.
599 */
char_7bit_unpack(unsigned int offset,unsigned int in_length,unsigned int out_length,unsigned char * input,unsigned char * output)600 int char_7bit_unpack(unsigned int offset, unsigned int in_length, unsigned int out_length,
601 unsigned char *input, unsigned char *output)
602 {
603 unsigned char *out_num = output; /* Current pointer to the output buffer */
604 unsigned char *in_num = input; /* Current pointer to the input buffer */
605 unsigned char rest = 0x00;
606 int bits;
607
608 bits = offset ? offset : 7;
609
610 while ((in_num - input) < in_length) {
611
612 *out_num = ((*in_num & GN_BYTE_MASK) << (7 - bits)) | rest;
613 rest = *in_num >> bits;
614
615 /* If we don't start from 0th bit, we shouldn't go to the
616 next char. Under *out_num we have now 0 and under Rest -
617 _first_ part of the char. */
618 if ((in_num != input) || (bits == 7)) out_num++;
619 in_num++;
620
621 if ((out_num - output) >= out_length) break;
622
623 /* After reading 7 octets we have read 7 full characters but
624 we have 7 bits as well. This is the next character */
625 if (bits == 1) {
626 *out_num = rest;
627 out_num++;
628 bits = 7;
629 rest = 0x00;
630 } else {
631 bits--;
632 }
633 }
634
635 return out_num - output;
636 }
637
638 /**
639 * char_7bit_pack:
640 * @offset: the bit offset inside the first byte of @output from which to start writing data
641 * @input: buffer with the string to be converted
642 * @output: buffer for the converted string, not NUL terminated
643 * @in_len: length of @input to be set; includes extended alphabet escape char
644 *
645 * Returns: the number of bytes used in @output
646 *
647 * Converts an array of 8-bit characters from @input into a packed sequence
648 * of 7-bit characters in @output.
649 */
char_7bit_pack(unsigned int offset,unsigned char * input,unsigned char * output,unsigned int * in_len)650 int char_7bit_pack(unsigned int offset, unsigned char *input,
651 unsigned char *output, unsigned int *in_len)
652 {
653
654 unsigned char *out_num = output; /* Current pointer to the output buffer */
655 unsigned int in_num;
656 int bits; /* Number of bits directly copied to output buffer */
657 unsigned int ucs2len, i = 0, len = strlen(input);
658 char *ucs2str;
659
660 /* First, let's know the encoding. We convert it from something to UCS-2 */
661 ucs2str = calloc(2 * len, sizeof(unsigned char));
662 if (!ucs2str)
663 return 0;
664 ucs2len = ucs2_encode(ucs2str, 2 * len, input, len);
665
666 /* Encoding failed */
667 if (ucs2len < 0) {
668 dprintf("gn_char_def_alphabet: failed to encode input string\n");
669 return 0;
670 }
671
672 bits = (7 + offset) % 8;
673
674 /* If we don't begin with 0th bit, we will write only a part of the
675 first octet */
676 if (offset) {
677 *out_num = 0x00;
678 out_num++;
679 }
680
681 *in_len = 0;
682
683 while (i < ucs2len / 2) {
684 unsigned char byte;
685 bool double_char = false;
686 unsigned int a = 0xff & ucs2str[2 * i], b = 0xff & ucs2str[2 * i + 1];
687
688 in_num = 256 * a + b;
689 if (char_def_alphabet_ext(in_num)) {
690 byte = GN_CHAR_UNI_ESCAPE;
691 double_char = true;
692 goto skip;
693 next_char:
694 byte = char_def_alphabet_ext_encode(in_num);
695 double_char = false;
696 (*in_len) += 2;
697 } else {
698 byte = char_def_alphabet_encode(in_num);
699 (*in_len)++;
700 }
701 skip:
702 *out_num = byte >> (7 - bits);
703 /* If we don't write at 0th bit of the octet, we should write
704 a second part of the previous octet */
705 if (bits != 7)
706 *(out_num-1) |= (byte & ((1 << (7-bits)) - 1)) << (bits+1);
707
708 bits--;
709
710 if (bits == -1)
711 bits = 7;
712 else
713 out_num++;
714
715 if (double_char)
716 goto next_char;
717
718 i++;
719 }
720
721 free(ucs2str);
722 return (out_num - output);
723 }
724
725 /**
726 * char_default_alphabet_decode:
727 * @dest: buffer for the converted string, NUL terminated
728 * @src: buffer with the string to be converted
729 * @len: length of @src in bytes
730 *
731 * Converts a string from GSM Alphabet to ISO/IEC 8859-1.
732 * In the worst case where each character in @src must be converted from the
733 * Extended GSM Alphabet, size of @dest must be @len + 1; in general it must be
734 * at least @len - number_of_escape_chars + 1
735 */
char_default_alphabet_decode(unsigned char * dest,const unsigned char * src,int len)736 int char_default_alphabet_decode(unsigned char* dest, const unsigned char* src, int len)
737 {
738 int j, pos = 0;
739 MBSTATE mbs;
740
741 MBSTATE_DEC_CLEAR(mbs);
742
743 for (j = 0; j < len; j++) {
744 wchar_t wc;
745 int length;
746
747 if (char_is_escape(src[j])) {
748 wc = char_def_alphabet_ext_decode(src[++j]);
749 } else {
750 wc = char_def_alphabet_decode(src[j]);
751 }
752 length = char_uni_alphabet_decode(wc, dest, &mbs);
753 dest += length;
754 pos += length;
755 }
756 *dest = 0;
757 return pos;
758 }
759
760 /**
761 * char_ascii_encode:
762 * @dest: buffer for the converted string, not NUL terminated
763 * @dest_len: size of @dest in bytes, must be 2 * @len in the worst case
764 * @src: buffer with the string to be converted
765 * @len: length of @src in bytes
766 *
767 * Returns: the number of bytes used in the @dest buffer for the converted string
768 *
769 * Converts a string from ISO/IEC 8859-1 to GSM Alphabet.
770 * In the worst case where each character in @src must be converted in the
771 * Extended GSM Alphabet, @dest_len must be @len * 2; in general it must be
772 * at least @len + number_of_escape_chars
773 */
char_ascii_encode(char * dest,size_t dest_len,const char * src,size_t len)774 size_t char_ascii_encode(char *dest, size_t dest_len, const char *src, size_t len)
775 {
776 size_t i, j, extra = 0;
777
778 for (i = 0, j = 0; i < dest_len && j < len; i++, j++) {
779 if (char_def_alphabet_ext(src[j])) {
780 dest[i++] = GN_CHAR_UNI_ESCAPE;
781 dest[i] = char_def_alphabet_ext_encode(src[j]);
782 extra++;
783 } else {
784 dest[i] = char_def_alphabet_encode(src[j]);
785 }
786 }
787 return len + extra;
788 }
789
790 /**
791 * char_hex_decode:
792 * @dest: buffer for the converted string, NUL terminated
793 * @src: buffer with the string to be converted
794 * @len: length of @src in bytes, length of @dest must be at least (@len / 2) + 1
795 *
796 * Converts a string from GSM Alphabet in ASCII-encoded hexadecimal bytes to ISO/IEC 8859-1.
797 */
char_hex_decode(unsigned char * dest,const unsigned char * src,int len)798 void char_hex_decode(unsigned char* dest, const unsigned char* src, int len)
799 {
800 int i;
801 char buf[3];
802
803 buf[2] = '\0';
804 for (i = 0; i < (len / 2); i++) {
805 buf[0] = *(src + i * 2); buf[1] = *(src + i * 2 + 1);
806 dest[i] = char_def_alphabet_decode(strtol(buf, NULL, 16));
807 }
808 dest[i] = 0;
809 return;
810 }
811
812 /**
813 * char_hex_encode:
814 * @dest: buffer for the converted string, NUL terminated
815 * @dest_len: length of @dest in bytes, must be at least (@len * 2) + 1
816 * @src: buffer with the string to be converted
817 * @len: length of @src in bytes
818 *
819 * Returns: the number of bytes used in the @dest buffer for the converted string
820 *
821 * Converts a string from ISO/IEC 8859-1 to GSM Alphabet in ASCII-encoded hexadecimal bytes.
822 */
char_hex_encode(char * dest,size_t dest_len,const char * src,size_t len)823 size_t char_hex_encode(char *dest, size_t dest_len, const char *src, size_t len)
824 {
825 int i, n = dest_len / 2 >= len ? len : dest_len / 2;
826
827 for (i = 0; i < n; i++)
828 snprintf(dest + i * 2, 3, "%02X", char_def_alphabet_encode(src[i]));
829 return len * 2;
830 }
831
832 /**
833 * char_uni_alphabet_encode:
834 * @value: pointer to the character to be converted
835 * @n: maximum number of bytes of @value that will be examined
836 * @dest: buffer for the converted character
837 * @mbs: pointer to a variable holding the shift state
838 * or NULL to use a global variable
839 *
840 * Returns: the number of bytes from @value used by the converted string
841 * or -1 in case of error
842 *
843 * Converts a character from multibyte to wide.
844 */
char_uni_alphabet_encode(const char * value,size_t n,wchar_t * dest,MBSTATE * mbs)845 size_t char_uni_alphabet_encode(const char *value, size_t n, wchar_t *dest, MBSTATE *mbs)
846 {
847 int length;
848
849 length = char_mbtowc(dest, value, n, mbs);
850 return length;
851 }
852
853 /**
854 * char_uni_alphabet_decode:
855 * @value: the character to be converted
856 * @dest: buffer for the converted character
857 * @mbs: pointer to a variable holding the shift state
858 * or NULL to use a global variable
859 *
860 * Returns: the number of bytes from @value that have been used
861 * or -1 in case of error
862 *
863 * Converts a character from wide to multibyte.
864 */
char_uni_alphabet_decode(wchar_t value,unsigned char * dest,MBSTATE * mbs)865 int char_uni_alphabet_decode(wchar_t value, unsigned char *dest, MBSTATE *mbs)
866 {
867 int length;
868
869 switch (length = char_wctomb(dest, value, mbs)) {
870 case -1:
871 *dest = '?';
872 length = 1;
873 default:
874 return length;
875 }
876 }
877
878 /**
879 * char_ucs2_decode:
880 * @dest: buffer for the converted string, NUL terminated
881 * @src: buffer with the string to be converted
882 * @len: length of @src in bytes, size of @dest must be at least (@len / 4) + 1
883 *
884 * Converts a string from UCS-2 encoded as ASCII-encoded hexadecimal bytes to ISO/IEC 8859-1.
885 * @len must be a multiple of 4.
886 * Used in AT driver for UCS2 encoding commands.
887 */
char_ucs2_decode(unsigned char * dest,const unsigned char * src,int len)888 void char_ucs2_decode(unsigned char* dest, const unsigned char* src, int len)
889 {
890 int i_len = 0, o_len = 0, length;
891 char buf[5];
892 MBSTATE mbs;
893
894 MBSTATE_DEC_CLEAR(mbs);
895 buf[4] = '\0';
896 for (i_len = 0; i_len < len ; i_len++) {
897 buf[0] = *(src + i_len * 4);
898 buf[1] = *(src + i_len * 4 + 1);
899 buf[2] = *(src + i_len * 4 + 2);
900 buf[3] = *(src + i_len * 4 + 3);
901 switch (length = char_uni_alphabet_decode(strtol(buf, NULL, 16), dest + o_len, &mbs)) {
902 case -1:
903 o_len++;
904 length = 1;
905 break;
906 default:
907 o_len += length;
908 break;
909 }
910 if ((length == 1) && (dest[o_len-1] == 0))
911 return;
912 }
913 dest[o_len] = 0;
914 return;
915 }
916
917 /**
918 * char_ucs2_encode:
919 * @dest: buffer for the converted string, NUL terminated
920 * @dest_len: size of @dest
921 * @src: buffer with the string to be converted
922 * @len: length of @src in bytes, size of @dest must be at least (@len * 4) + 1
923 *
924 * Returns: the number of bytes of @dest that have been used
925 *
926 * Converts a string from ISO/IEC 8859-1 to UCS-2 encoded as ASCII-encoded hexadecimal bytes.
927 * This function should convert "ABC" to "004100420043"
928 * Used only in AT driver for UCS2 encoding commands.
929 * It reads char by char from the input.
930 */
931 #define UCS2_SIZE 4
char_ucs2_encode(char * dest,size_t dest_len,const char * src,size_t len)932 size_t char_ucs2_encode(char *dest, size_t dest_len, const char *src, size_t len)
933 {
934 wchar_t wc;
935 int i, o_len, length;
936 MBSTATE mbs;
937
938 MBSTATE_ENC_CLEAR(mbs);
939 for (i = 0, o_len = 0; i < len && o_len < dest_len / UCS2_SIZE; o_len++, i++) {
940 /*
941 * We read input by convertible chunks. 'length' is length of
942 * the read chunk.
943 */
944 length = char_uni_alphabet_encode(src + i, 1, &wc, &mbs);
945 /* We stop reading after first unreadable input */
946 if (length < 1)
947 return o_len * UCS2_SIZE;
948 /* We write here 4 chars + NULL termination */
949 /* XXX: We should probably check wchar_t size. */
950 snprintf(dest + (o_len * UCS2_SIZE), UCS2_SIZE + 1, "%04X", wc);
951 }
952 return o_len * UCS2_SIZE;
953 }
954
955 /**
956 * char_unicode_decode:
957 * @dest: buffer for the converted string, NUL terminated
958 * @src: buffer with the string to be converted
959 * @len: length of @src in bytes
960 *
961 * Returns: the number of bytes of @dest that have been used
962 *
963 * Converts a string from UTF-8 to ISO/IEC 8859-1.
964 */
char_unicode_decode(unsigned char * dest,const unsigned char * src,int len)965 unsigned int char_unicode_decode(unsigned char* dest, const unsigned char* src, int len)
966 {
967 int i, length = 0, pos = 0;
968 MBSTATE mbs;
969
970 MBSTATE_DEC_CLEAR(mbs);
971 for (i = 0; i < len / 2; i++) {
972 wchar_t wc = src[i * 2] << 8 | src[(i * 2) + 1];
973 length = char_uni_alphabet_decode(wc, dest, &mbs);
974 dest += length;
975 pos += length;
976 }
977 *dest = 0;
978 return pos;
979 }
980
981 /**
982 * char_unicode_encode:
983 * @dest: buffer for the converted string, not NUL terminated
984 * @src: buffer with the string to be converted
985 * @len: length of @src in bytes
986 *
987 * Returns: the number of bytes of @dest that have been used
988 *
989 * Converts a string from ISO/IEC 8859-1 to UTF-8.
990 */
char_unicode_encode(unsigned char * dest,const unsigned char * src,int len)991 unsigned int char_unicode_encode(unsigned char* dest, const unsigned char* src, int len)
992 {
993 int pos = 0;
994 MBSTATE mbs;
995 #ifndef HAVE_ICONV
996 int length, offset = 0;
997 wchar_t wc;
998 #endif
999
1000 MBSTATE_ENC_CLEAR(mbs);
1001 #ifdef HAVE_ICONV
1002 pos = ucs2_encode(dest, 2 * len, src, len);
1003 #else
1004 while (offset < len) {
1005 length = char_uni_alphabet_encode(src + offset, len - offset, &wc, &mbs);
1006 switch (length) {
1007 case -1:
1008 dest[pos++] = wc >> 8 & 0xFF;
1009 dest[pos++] = wc & 0xFF;
1010 offset++;
1011 break;
1012 case 0: /* Avoid infinite loop */
1013 offset++;
1014 break;
1015 default:
1016 dest[pos++] = wc >> 8 & 0xFF;
1017 dest[pos++] = wc & 0xFF;
1018 offset += length;
1019 break;
1020 }
1021 }
1022 #endif
1023 return pos;
1024 }
1025
1026 /* Conversion bin -> hex and hex -> bin */
1027
1028 /**
1029 * hex2bin:
1030 * @dest: buffer for the converted string
1031 * @src: buffer with the string to be converted
1032 * @len: length of @src, size of @dest must be at least @len / 2
1033 *
1034 * Converts from ASCII-encoded hexadecimal bytes to binary.
1035 * @len must be a multiple of 2.
1036 */
hex2bin(unsigned char * dest,const unsigned char * src,unsigned int len)1037 void hex2bin(unsigned char *dest, const unsigned char *src, unsigned int len)
1038 {
1039 int i;
1040
1041 if (!dest) return;
1042
1043 for (i = 0; i < len; i++) {
1044 unsigned aux;
1045
1046 if (src[2 * i] >= '0' && src[2 * i] <= '9') aux = src[2 * i] - '0';
1047 else if (src[2 * i] >= 'a' && src[2 * i] <= 'f') aux = src[2 * i] - 'a' + 10;
1048 else if (src[2 * i] >= 'A' && src[2 * i] <= 'F') aux = src[2 * i] - 'A' + 10;
1049 else {
1050 dest[0] = 0;
1051 return;
1052 }
1053 dest[i] = aux << 4;
1054 if (src[2 * i + 1] >= '0' && src[2 * i + 1] <= '9') aux = src[2 * i + 1] - '0';
1055 else if (src[2 * i + 1] >= 'a' && src[2 * i + 1] <= 'f') aux = src[2 * i + 1] - 'a' + 10;
1056 else if (src[2 * i + 1] >= 'A' && src[2 * i + 1] <= 'F') aux = src[2 * i + 1] - 'A' + 10;
1057 else {
1058 dest[0] = 0;
1059 return;
1060 }
1061 dest[i] |= aux;
1062 }
1063 }
1064
1065 /**
1066 * bin2hex:
1067 * @dest: buffer for the converted string, not NUL terminated
1068 * @src: buffer with the string to be converted
1069 * @len: length of @src, size of @dest must be at least @len * 2
1070 *
1071 * Converts from binary to ASCII-encoded hexadecimal bytes.
1072 */
bin2hex(unsigned char * dest,const unsigned char * src,unsigned int len)1073 void bin2hex(unsigned char *dest, const unsigned char *src, unsigned int len)
1074 {
1075 int i;
1076
1077 if (!dest) return;
1078
1079 for (i = 0; i < len; i++) {
1080 dest[2 * i] = (src[i] & 0xf0) >> 4;
1081 if (dest[2 * i] < 10) dest[2 * i] += '0';
1082 else dest[2 * i] += ('A' - 10);
1083 dest[2 * i + 1] = src[i] & 0x0f;
1084 if (dest[2 * i + 1] < 10) dest[2 * i + 1] += '0';
1085 else dest[2 * i + 1] += ('A' - 10);
1086 }
1087 }
1088
1089 /**
1090 * char_semi_octet_pack:
1091 * @number: string containing the phone number to convert
1092 * @output: buffer for the converted phone number, not NUL terminated
1093 * @type: type of the phone number (eg. %GN_GSM_NUMBER_International)
1094 *
1095 * Returns: the number of semi octects used by the whole encoded string
1096 *
1097 * This function implements packing of numbers (SMS Center number and
1098 * destination number) for SMS sending function.
1099 */
char_semi_octet_pack(char * number,unsigned char * output,gn_gsm_number_type type)1100 int char_semi_octet_pack(char *number, unsigned char *output, gn_gsm_number_type type)
1101 {
1102 char *in_num = number; /* Pointer to the input number */
1103 unsigned char *out_num = output; /* Pointer to the output */
1104 int count = 0; /* This variable is used to notify us about count of already
1105 packed numbers. */
1106
1107 /* The first byte in the Semi-octet representation of the address field is
1108 the Type-of-Address. This field is described in the official GSM
1109 specification 03.40 version 6.1.0, section 9.1.2.5, page 33. We support
1110 only international, unknown and alphanumeric number. */
1111
1112 *out_num++ = type;
1113
1114 if (((type & GN_GSM_NUMBER_Type_Mask) & GN_GSM_NUMBER_Alphanumeric_Mask) == GN_GSM_NUMBER_Alphanumeric_Mask) {
1115 count = strlen(number);
1116 return 2 * char_7bit_pack(0, number, out_num, &count);
1117 }
1118
1119 if ((((type & GN_GSM_NUMBER_International_Mask) == GN_GSM_NUMBER_International_Mask) ||
1120 (type & GN_GSM_NUMBER_Type_Mask) == 0) && /* Unknown */
1121 *in_num == '+')
1122 in_num++; /* skip leading '+' */
1123
1124 /* The next field is the number. It is in semi-octet representation - see
1125 GSM specification 03.40 version 6.1.0, section 9.1.2.3, page 31. */
1126 while (*in_num) {
1127 if (count & 0x01) {
1128 *out_num = *out_num | ((*in_num - '0') << 4);
1129 out_num++;
1130 }
1131 else
1132 *out_num = *in_num - '0';
1133 count++; in_num++;
1134 }
1135
1136 /* We should also fill in the most significant bits of the last byte with
1137 0x0f (1111 binary) if the number is represented with odd number of
1138 digits. */
1139 if (count & 0x01) {
1140 *out_num = *out_num | 0xf0;
1141 out_num++;
1142 }
1143
1144 return (2 * (out_num - output - 1) - (count % 2));
1145 }
1146
1147 /**
1148 * char_bcd_number_get:
1149 * @number: a phone number encoded in BCD format
1150 *
1151 * Returns: a static buffer with the converted phone number, NUL terminated
1152 *
1153 * This function implements unpacking of numbers (SMS Center number and
1154 * destination number) for SMS receiving function.
1155 */
char_bcd_number_get(u8 * number)1156 char *char_bcd_number_get(u8 *number)
1157 {
1158 static char buffer[GN_BCD_STRING_MAX_LENGTH] = "";
1159 int length = number[0]; /* This is the length of BCD coded number */
1160 int count, digit, i = 0;
1161
1162 if (length > GN_BCD_STRING_MAX_LENGTH) length = GN_BCD_STRING_MAX_LENGTH;
1163 switch (number[1]) {
1164 case GN_GSM_NUMBER_Alphanumeric:
1165 char_7bit_unpack(0, length, length, number + 2, buffer);
1166 buffer[length] = 0;
1167 break;
1168 case GN_GSM_NUMBER_International:
1169 snprintf(buffer, sizeof(buffer), "+");
1170 i++;
1171 if (length == GN_BCD_STRING_MAX_LENGTH)
1172 length--; /* avoid overflow */
1173 case GN_GSM_NUMBER_Unknown:
1174 case GN_GSM_NUMBER_National:
1175 case GN_GSM_NUMBER_Network:
1176 case GN_GSM_NUMBER_Subscriber:
1177 case GN_GSM_NUMBER_Abbreviated:
1178 default:
1179 /* start at 2 to skip length and TON (we can't overflow the buffer because i <= GN_BCD_STRING_MAX_LENGTH - 2) */
1180 for (count = 2; count <= length; count++) {
1181 digit = number[count] & 0x0f;
1182 if (digit < 0x0f)
1183 buffer[i++] = bcd_digits[digit];
1184 digit = number[count] >> 4;
1185 if (digit < 0x0f)
1186 buffer[i++] = bcd_digits[digit];
1187 }
1188 buffer[i] = '\0';
1189 break;
1190 }
1191 return buffer;
1192 }
1193
1194 /* UTF-8 conversion functions */
1195
1196 /**
1197 * utf8_decode:
1198 * @outstring: buffer for the converted string, not NUL terminated
1199 * @outlen: size of @outstring
1200 * @instring: buffer with the string to be converted
1201 * @inlen: length of @instring
1202 *
1203 * Returns: the number of bytes used in @outstring, or -1 in case of errors
1204 *
1205 * Converts a string from UTF-8 to an application specified (or system default) encoding.
1206 * Uses iconv() if available, else uses internal replacement code.
1207 */
utf8_decode(char * outstring,size_t outlen,const char * instring,size_t inlen)1208 int utf8_decode(char *outstring, size_t outlen, const char *instring, size_t inlen)
1209 {
1210 int retval = 1;
1211 size_t nconv;
1212
1213 #if defined(HAVE_ICONV)
1214 ICONV_CONST char *pin;
1215 char *pout;
1216 iconv_t cd;
1217
1218 pin = (char *)instring;
1219 pout = outstring;
1220
1221 cd = iconv_open(gn_char_get_encoding(), "UTF-8");
1222 if (cd == (iconv_t)-1)
1223 return -1;
1224 nconv = iconv(cd, &pin, &inlen, &pout, &outlen);
1225 if (nconv == (size_t)-1)
1226 perror("utf8_decode/iconv");
1227 else
1228 retval =
1229 iconv_close(cd);
1230 *pout = 0;
1231 #else
1232 unsigned char *pin, *pout;
1233
1234 pin = (unsigned char *)instring;
1235 pout = outstring;
1236
1237 while (inlen > 0 && outlen > 0) {
1238 if (*pin < 0x80) {
1239 *pout = *pin;
1240 nconv = 1;
1241 } else if (*pin < 0xc0) {
1242 *pout = '?';
1243 nconv = 1;
1244 } else if (*pin < 0xe0) {
1245 *pout = '?';
1246 nconv = 2;
1247 } else if (*pin < 0xf0) {
1248 *pout = '?';
1249 nconv = 3;
1250 } else if (*pin < 0xf8) {
1251 *pout = '?';
1252 nconv = 4;
1253 } else if (*pin < 0xfc) {
1254 *pout = '?';
1255 nconv = 5;
1256 } else {
1257 *pout = '?';
1258 nconv = 6;
1259 }
1260 inlen -= nconv;
1261 outlen--;
1262 pin += nconv;
1263 if (*pout++ == '\0')
1264 break;
1265 }
1266 retval = (char *)pout - outstring;
1267 #endif
1268 return retval;
1269 }
1270
1271 /**
1272 * utf8_encode:
1273 * @outstring: buffer for the converted string, not NUL terminated
1274 * @outlen: size of @outstring
1275 * @instring: buffer with the string to be converted
1276 * @inlen: length of @instring
1277 *
1278 * Returns: the number of bytes used in @outstring, or -1 in case of errors
1279 *
1280 * Converts a string from an application specified (or system default) encoding to UTF-8.
1281 * Uses iconv() if available, else uses internal replacement code.
1282 */
utf8_encode(char * outstring,int outlen,const char * instring,int inlen)1283 int utf8_encode(char *outstring, int outlen, const char *instring, int inlen)
1284 {
1285 int retval = -1;
1286 #if defined(HAVE_ICONV)
1287 size_t outleft, inleft, nconv;
1288 ICONV_CONST char *pin;
1289 char *pout;
1290 iconv_t cd;
1291
1292 outleft = outlen;
1293 inleft = inlen;
1294 pin = (char *)instring;
1295 pout = outstring;
1296
1297 cd = iconv_open("UTF-8", gn_char_get_encoding());
1298 if (cd == (iconv_t)-1)
1299 return -1;
1300
1301 nconv = iconv(cd, &pin, &inleft, &pout, &outleft);
1302 if (nconv == (size_t)-1)
1303 perror("utf8_encode/iconv");
1304 else
1305 retval = (char *)pout - outstring;
1306 *pout = 0;
1307 iconv_close(cd);
1308 #else
1309 size_t nconv;
1310 unsigned char *pin, *pout;
1311
1312 nconv = 0;
1313 pin = (unsigned char *)instring;
1314 pout = outstring;
1315
1316 while (inlen > 0 && outlen > 0) {
1317 if (*pin >= 0x80)
1318 *pout = '?';
1319 else
1320 *pout = *pin;
1321
1322 inlen--;
1323 outlen--;
1324 pin++;
1325 if (*pout++ == '\0') break;
1326 }
1327 retval = (char *)pout - outstring;
1328 #endif
1329 return retval;
1330 }
1331
1332 /* UCS-2 functions */
1333
1334 /**
1335 * ucs2_encode:
1336 * @outstring: buffer for the converted string, not NUL terminated
1337 * @outlen: size of @outstring
1338 * @instring: buffer with the string to be converted
1339 * @inlen: length of @instring
1340 *
1341 * Returns: the number of bytes used in @outstring, or -1 in case of errors
1342 *
1343 * Converts a string from an application specified (or system default) encoding to UCS-2.
1344 * Uses iconv() if available, else uses internal replacement code.
1345 */
ucs2_encode(char * outstring,int outlen,const char * instring,int inlen)1346 int ucs2_encode(char *outstring, int outlen, const char *instring, int inlen)
1347 {
1348 #if defined(HAVE_ICONV)
1349 size_t outleft, inleft, nconv;
1350 ICONV_CONST char *pin;
1351 char *pout;
1352 iconv_t cd;
1353 int retval = -1;
1354
1355 outleft = outlen;
1356 inleft = inlen;
1357 pin = (char *)instring;
1358 pout = outstring;
1359
1360 cd = iconv_open("UCS-2BE", gn_char_get_encoding());
1361 if (cd == (iconv_t)-1)
1362 return -1;
1363
1364 nconv = iconv(cd, &pin, &inleft, &pout, &outleft);
1365 if (nconv == (size_t)-1)
1366 perror("ucs2_encode/iconv");
1367 else
1368 retval = (char *)pout - outstring;
1369 iconv_close(cd);
1370 return retval;
1371 #else
1372 return char_unicode_encode(outstring, instring, inlen);
1373 #endif
1374 }
1375
ucs2_decode(char * outstring,int outlen,const char * instring,int inlen)1376 int ucs2_decode(char *outstring, int outlen, const char *instring, int inlen)
1377 {
1378 #if defined(HAVE_ICONV)
1379 size_t outleft, inleft, nconv;
1380 ICONV_CONST char *pin;
1381 char *pout;
1382 iconv_t cd;
1383 int retval = -1;
1384
1385 outleft = outlen;
1386 inleft = inlen;
1387 pin = (char *)instring;
1388 pout = outstring;
1389
1390 cd = iconv_open(gn_char_get_encoding(), "UCS-2BE");
1391 if (cd == (iconv_t)-1)
1392 return -1;
1393
1394 nconv = iconv(cd, &pin, &inleft, &pout, &outleft);
1395 if (nconv == (size_t)-1)
1396 perror("ucs2_encode/iconv");
1397 else
1398 retval = (char *)pout - outstring;
1399 iconv_close(cd);
1400 return retval;
1401 #else
1402 return char_unicode_decode(outstring, instring, inlen);
1403 #endif
1404 }
1405
1406 /* BASE64 functions */
1407
1408 /**
1409 * string_base64:
1410 * @instring: the string to check
1411 *
1412 * Returns: 1 if the string must be encoded in base 64, 0 otherwise
1413 *
1414 * Verifies if a string must be encoded in base 64.
1415 */
string_base64(const char * instring)1416 int string_base64(const char *instring)
1417 {
1418 for (; *instring; instring++)
1419 if (*instring & 0x80)
1420 return 1;
1421 return 0;
1422 }
1423
1424 /**
1425 * base64_encode:
1426 * @outstring: buffer for the converted string, will be NUL terminated
1427 * @outlen: size of @outstring
1428 * @instring: buffer with the string to be converted
1429 * @inlen: length of @instring
1430 *
1431 * Returns: the length of the converted string
1432 *
1433 * Converts a generic string to base64 encoding.
1434 * @outlen needs to be at least 4 / 3 times + 1 bigger than @inlen to hold
1435 * the converted string and the terminator.
1436 */
base64_encode(char * outstring,int outlen,const char * instring,int inlen)1437 int base64_encode(char *outstring, int outlen, const char *instring, int inlen)
1438 {
1439 const char *pin;
1440 char *pout;
1441 char *outtemp = NULL;
1442 int inleft, outleft;
1443
1444 pout = outstring;
1445 inleft = inlen;
1446 outleft = outlen;
1447 pin = instring;
1448
1449 /* This is in case someone passes a buffer not appropriate for outstring */
1450 while (outleft > 3 && inleft > 0) {
1451 int a, b, c;
1452 unsigned int i1, i2, i3, i4;
1453
1454 a = *pin++;
1455 b = (inleft > 1) ? *(pin++) : 0;
1456 c = (inleft > 2) ? *(pin++) : 0;
1457
1458 /* calculate the indexes */
1459 i1 = (a & 0xfc) >> 2;
1460 *(pout++) = base64_alphabet[i1];
1461
1462 i2 = ((a & 0x03) << 4) | ((b & 0xf0) >> 4);
1463 *(pout++) = base64_alphabet[i2];
1464
1465 inleft--;
1466
1467 i3 = ((b & 0x0f) << 2) | ((c & 0xc0) >> 6);
1468 if (!inleft) {
1469 *(pout++) = '=';
1470 } else {
1471 *(pout++) = base64_alphabet[i3];
1472 inleft--;
1473 }
1474
1475 i4 = c & 0x3f;
1476 if (!inleft)
1477 *(pout++) = '=';
1478 else {
1479 *(pout++) = base64_alphabet[i4];
1480 inleft--;
1481 }
1482
1483 outleft -= 4;
1484 }
1485
1486 /* terminate the output string */
1487 *pout = 0;
1488
1489 free(outtemp);
1490
1491 return pout - outstring;
1492 }
1493
1494 /**
1495 * base64_decode:
1496 * @dest: buffer for the converted string, will be NUL terminated
1497 * @destlen: size of @dest
1498 * @source: buffer with the string to be converted
1499 * @inlen: length of @source
1500 *
1501 * Returns: the number of bytes used in @dest
1502 *
1503 * Converts a generic string from base 64 encoding.
1504 * @destlen needs to be at least 3 / 4 + 1 of @inlen to hold the converted
1505 * string and the terminator.
1506 */
base64_decode(char * dest,int destlen,const char * source,int inlen)1507 int base64_decode(char *dest, int destlen, const char *source, int inlen)
1508 {
1509 int dtable[256];
1510 int i, c;
1511 int dpos = 0;
1512 int spos = 0;
1513
1514 for (i = 0; i < 255; i++) {
1515 dtable[i] = 0x80;
1516 }
1517 for (i = 'A'; i <= 'Z'; i++) {
1518 dtable[i] = 0 + (i - 'A');
1519 }
1520 for (i = 'a'; i <= 'z'; i++) {
1521 dtable[i] = 26 + (i - 'a');
1522 }
1523 for (i = '0'; i <= '9'; i++) {
1524 dtable[i] = 52 + (i - '0');
1525 }
1526 dtable['+'] = 62;
1527 dtable['/'] = 63;
1528 dtable['='] = 0;
1529
1530 /* CONSTANT CONDITION */
1531 while (1) {
1532 int a[4], b[4], o[3];
1533
1534 for (i = 0; i < 4; i++) {
1535 if (spos >= inlen || dpos >= destlen) {
1536 goto endloop;
1537 }
1538 c = source[spos++];
1539
1540 if (c == 0) {
1541 if (i > 0) {
1542 goto endloop;
1543 }
1544 goto endloop;
1545 }
1546 if (dtable[c] & 0x80) {
1547 /* Ignoring errors: discard invalid character. */
1548 i--;
1549 continue;
1550 }
1551 a[i] = (int) c;
1552 b[i] = (int) dtable[c];
1553 }
1554 o[0] = (b[0] << 2) | (b[1] >> 4);
1555 o[1] = (b[1] << 4) | (b[2] >> 2);
1556 o[2] = (b[2] << 6) | b[3];
1557 i = a[2] == '=' ? 1 : (a[3] == '=' ? 2 : 3);
1558 if (i >= 1) dest[dpos++] = o[0];
1559 if (i >= 2) dest[dpos++] = o[1];
1560 if (i >= 3) dest[dpos++] = o[2];
1561 dest[dpos] = 0;
1562 if (i < 3) {
1563 goto endloop;
1564 }
1565 }
1566 endloop:
1567 return dpos;
1568 }
1569
1570 /**
1571 * utf8_base64_encode:
1572 * @dest: buffer for the converted string, NUL terminated
1573 * @destlen: size of @dest
1574 * @in: buffer with the string to be converted
1575 * @inlen: length of @in
1576 *
1577 * Returns: the number of bytes used by the converted string
1578 *
1579 * Converts a string from application default encoding to UTF-8 then to base 64.
1580 * @dest must be valid and big enough to hold the converted string.
1581 */
utf8_base64_encode(char * dest,int destlen,const char * in,int inlen)1582 int utf8_base64_encode(char *dest, int destlen, const char *in, int inlen)
1583 {
1584 char *aux;
1585 int retval;
1586
1587 aux = calloc(destlen + 1, sizeof(char));
1588
1589 retval = utf8_encode(aux, destlen, in, inlen);
1590 if (retval >= 0)
1591 retval = base64_encode(dest, destlen, aux, retval);
1592
1593 free(aux);
1594 return retval;
1595 }
1596
1597 /**
1598 * utf8_base64_decode:
1599 * @dest: buffer for the converted string
1600 * @destlen: size of @dest
1601 * @in: buffer with the string to be converted
1602 * @inlen: length of @in
1603 *
1604 * Returns: the number of bytes used by the converted string
1605 *
1606 * Converts a string from base 64 to UTF-8 then to application default encoding.
1607 * @dest must be valid and big enough to hold the converted string.
1608 */
utf8_base64_decode(char * dest,int destlen,const char * in,int inlen)1609 int utf8_base64_decode(char *dest, int destlen, const char *in, int inlen)
1610 {
1611 char *aux;
1612 int retval;
1613
1614 aux = calloc(destlen + 1, sizeof(char));
1615
1616 retval = base64_decode(aux, destlen, in, inlen);
1617 if (retval >= 0)
1618 retval = utf8_decode(dest, destlen, aux, retval);
1619
1620 free(aux);
1621 return retval;
1622 }
1623
1624 /**
1625 * add_slashes:
1626 * @dest: buffer for the converted string, NUL terminated
1627 * @src: buffer with the string to be converted
1628 * @maxlen: size of @dest, must be 2 * @len + 1 in the worst case
1629 * @len: length of @src
1630 *
1631 * Returns: the number of bytes used by the converted string
1632 *
1633 * Escapes the following characters (according to rfc 2426):
1634 * '\n', '\r', ';', ',', '\'.
1635 */
add_slashes(char * dest,char * src,int maxlen,int len)1636 int add_slashes(char *dest, char *src, int maxlen, int len)
1637 {
1638 int i, j;
1639
1640 for (i = 0, j = 0; i < len && j < maxlen; i++, j++) {
1641 switch (src[i]) {
1642 case '\n':
1643 dest[j++] = '\\';
1644 dest[j] = 'n';
1645 break;
1646 case '\r':
1647 dest[j++] = '\\';
1648 dest[j] = 'r';
1649 break;
1650 case '\\':
1651 case ';':
1652 case ',':
1653 dest[j++] = '\\';
1654 default:
1655 dest[j] = src[i];
1656 break;
1657 }
1658 }
1659 dest[j] = 0;
1660 return j;
1661 }
1662
1663 /**
1664 * strip_slashes:
1665 * @dest: buffer for the converted string, NUL terminated
1666 * @src: buffer with the string to be converted
1667 * @maxlen: size of @dest, must be @len + 1 in the worst case
1668 * @len: length of @src
1669 *
1670 * Returns: the number of bytes used by the converted string
1671 *
1672 * Unescapes the caracters escaped by add_slashes().
1673 */
strip_slashes(char * dest,const char * src,int maxlen,int len)1674 int strip_slashes(char *dest, const char *src, int maxlen, int len)
1675 {
1676 int i, j, slash_state = 0;
1677
1678 for (i = 0, j = 0; i < len && j < maxlen; i++) {
1679 switch (src[i]) {
1680 case ';':
1681 case ',':
1682 if (slash_state) {
1683 slash_state = 0;
1684 }
1685 dest[j++] = src[i];
1686 break;
1687 case '\\':
1688 if (slash_state) {
1689 dest[j++] = src[i];
1690 slash_state = 0;
1691 } else {
1692 slash_state = 1;
1693 }
1694 break;
1695 case 'n':
1696 if (slash_state) {
1697 dest[j++] = '\n';
1698 slash_state = 0;
1699 } else {
1700 dest[j++] = src[i];
1701 }
1702 break;
1703 case 'r':
1704 if (slash_state) {
1705 dest[j++] = '\r';
1706 slash_state = 0;
1707 } else {
1708 dest[j++] = src[i];
1709 }
1710 break;
1711 default:
1712 if (slash_state) {
1713 dest[j++] = '\\';
1714 slash_state = 0;
1715 }
1716 dest[j++] = src[i];
1717 break;
1718 }
1719 }
1720 dest[j] = 0;
1721 return j;
1722 }
1723