1 /*
2 
3   G N O K I I
4 
5   A Linux/Unix toolset and driver for the mobile phones.
6 
7   This file is part of gnokii.
8 
9   Gnokii is free software; you can redistribute it and/or modify
10   it under the terms of the GNU General Public License as published by
11   the Free Software Foundation; either version 2 of the License, or
12   (at your option) any later version.
13 
14   Gnokii is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License for more details.
18 
19   You should have received a copy of the GNU General Public License
20   along with gnokii; if not, write to the Free Software
21   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22 
23   Copyright (C) 1999-2000 Pavel Janik ml.
24   Copyright (C) 2001-2011 Pawel Kot
25   Copyright (C) 2002      Markus Plail, Manfred Jonsson
26   Copyright (C) 2002-2004 BORBELY Zoltan
27   Copyright (C) 2003      Martin Goldhahn
28 
29   Functions for encoding SMS, calendar and other things.
30 
31 */
32 
33 #include "config.h"
34 
35 #include <stdlib.h>
36 #include <stdio.h>
37 #include <string.h>
38 
39 #include "compat.h"
40 #include "misc.h"
41 #include "gnokii.h"
42 #include "gnokii-internal.h"
43 
44 #ifdef HAVE_ICONV
45 #  include <iconv.h>
46 #endif
47 #ifdef HAVE_LANGINFO_CODESET
48 #  include <langinfo.h>
49 #endif
50 #ifdef HAVE_LOCALE_CHARSET
51 #  include <libcharset.h>
52 #else
53 /* FIXME: We should include here somehow ../intl/localcharset.h, but it may
54  * cause problems with MSVC. */
55 extern const char *locale_charset(void); /* from ../intl/localcharset.c */
56 #endif
57 
58 #include <glib.h>
59 
60 /**
61  * base64_alphabet:
62  *
63  * Mapping from 8-bit binary values to base 64 encoding.
64  */
65 static const char *base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
66 
67 /**
68  * bcd_digits:
69  *
70  * Mapping from ASCII to BCD digits representing phone numbers and vice versa.
71  * BCD digits are those from Table 10.5.118 of 3GPP TS 04.08 with 'a' replaced by 'p'.
72  */
73 static const char *bcd_digits = "0123456789*#pbc";
74 
75 /**
76  * GN_CHAR_UNI_ALPHABET_SIZE:
77  *
78  * Number of characters in GSM default alphabet (for UCS-2 encoding).
79  */
80 #define GN_CHAR_UNI_ALPHABET_SIZE 128
81 
82 /**
83  * GN_CHAR_UNI_ESCAPE:
84  *
85  * Value of the escape character for the GSM Alphabet (in UCS-2 encoding).
86  */
87 #define GN_CHAR_UNI_ESCAPE 0x001b
88 
89 /**
90  * gsm_default_unicode_alphabet:
91  *
92  * Mapping from GSM default alphabet to UCS-2.
93  *
94  * ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet. Mapping to UCS-2.
95  * Mapping according to http://unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
96  */
97 static unsigned int gsm_default_unicode_alphabet[GN_CHAR_UNI_ALPHABET_SIZE] = {
98 	/* @       £       $       ¥       è       é       ù       ì */
99 	0x0040, 0x00a3, 0x0024, 0x00a5, 0x00e8, 0x00e9, 0x00f9, 0x00ec,
100 	/* ò       Ç       \n      Ø       ø       \r      Å       å */
101 	0x00f2, 0x00c7, 0x000a, 0x00d8, 0x00f8, 0x000d, 0x00c5, 0x00e5,
102 	/* Δ       _       Φ       Γ       Λ       Ω       Π       Ψ */
103 	0x0394, 0x005f, 0x03a6, 0x0393, 0x039b, 0x03a9, 0x03a0, 0x03a8,
104 	/* Σ       Θ       Ξ      NBSP     Æ       æ       ß       É */
105 	0x03a3, 0x0398, 0x039e, 0x00a0, 0x00c6, 0x00e6, 0x00df, 0x00c9,
106 	/* ' '     !       "       #       ¤       %       &       ' */
107 	0x0020, 0x0021, 0x0022, 0x0023, 0x00a4, 0x0025, 0x0026, 0x0027,
108 	/* (       )       *       +       ,       -       .       / */
109 	0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
110 	/* 0       1       2       3       4       5       6       7 */
111 	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
112 	/* 8       9       :       ;       <       =       >       ? */
113 	0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
114 	/* ¡       A       B       C       D       E       F       G */
115 	0x00a1, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
116 	/* H       I       J       K       L       M       N       O */
117 	0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
118 	/* P       Q       R       S       T       U       V       W */
119 	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
120 	/* X       Y       Z       Ä       Ö       Ñ       Ü       § */
121 	0x0058, 0x0059, 0x005a, 0x00c4, 0x00d6, 0x00d1, 0x00dc, 0x00a7,
122 	/* ¿       a       b       c       d       e       f       g */
123 	0x00bf, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
124 	/* h       i       j       k       l       m       n       o */
125 	0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
126 	/* p       q       r       s       t       u       v       w */
127 	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
128 	/* x       y       z       ä       ö       ñ       ü       à */
129 	0x0078, 0x0079, 0x007a, 0x00e4, 0x00f6, 0x00f1, 0x00fc, 0x00e0
130 };
131 
132 static char application_encoding[64] = "";
133 
134 /**
135  * char_def_alphabet:
136  * @value: the UCS-2 character to validate
137  *
138  * Returns: true if the given character matches default alphabet, false otherwise
139  *
140  * It could be possibly optimized but let's face it: nowadays full
141  * lookup of 128 elements table is not that time consuming.
142  */
char_def_alphabet(unsigned int value)143 static int char_def_alphabet(unsigned int value)
144 {
145 	int i;
146 	for (i = 0; i < GN_CHAR_UNI_ALPHABET_SIZE; i++) {
147 		if (gsm_default_unicode_alphabet[i] == value) {
148 			return true;
149 		}
150 	}
151 	return false;
152 }
153 
154 /**
155  * char_is_escape:
156  * @value: the char to test
157  *
158  * Returns: non zero if @value is an escape character, zero otherwise
159  *
160  * Determines if @value is an escape character for GSM Alphabet.
161  */
char_is_escape(unsigned int value)162 static bool char_is_escape(unsigned int value)
163 {
164 	return (value == GN_CHAR_UNI_ESCAPE);
165 }
166 
167 /**
168  * get_langinfo_codeset:
169  *
170  * Returns: a constant string representing a charset encoding
171  *
172  * Gets the current charset encoding.
173  * Uses different methods on different platforms.
174  */
get_langinfo_codeset(void)175 static const char *get_langinfo_codeset(void)
176 {
177 	static const char *codeset = NULL;
178 
179 	if (!codeset) {
180 #ifdef HAVE_LANGINFO_CODESET
181 		codeset = nl_langinfo(CODESET);
182 #else
183 		codeset = locale_charset();
184 #endif
185 	}
186 	return codeset;
187 }
188 
189 /**
190  * gn_char_get_encoding:
191  *
192  * Returns: a constant string representing a charset encoding
193  *
194  * Gets the encoding set by the application or the default one.
195  */
gn_char_get_encoding()196 GNOKII_API const char *gn_char_get_encoding()
197 {
198 	const char *coding;
199 	if (*application_encoding)
200 		coding = application_encoding; /* app has overriden encoding setting */
201 	else
202 		coding = get_langinfo_codeset(); /* return default codeset */
203 	return coding;
204 }
205 
206 /**
207  * gn_char_set_encoding:
208  * @encoding: a string representing the name of a charset encoding
209  *
210  * Sets the encoding preferred by the application.
211  */
gn_char_set_encoding(const char * encoding)212 void gn_char_set_encoding(const char* encoding)
213 {
214 	snprintf(application_encoding, sizeof(application_encoding), "%s", encoding);
215 }
216 
217 /**
218  * char_mblen:
219  * @src: the string to measure
220  *
221  * Returns: the lenght of the string
222  *
223  * Detects the correct length of a string (also for multibyte chars like "umlaute").
224  */
char_mblen(const char * src)225 int char_mblen(const char *src)
226 {
227 	int len = mbstowcs(NULL, src, 0);
228 	dprintf("char_mblen(%s): %i\n", src, len);
229 	return len;
230 }
231 
232 #ifndef ICONV_CONST
233 #  define ICONV_CONST const
234 #endif
235 
236 /**
237  * char_mbtowc:
238  * @wchar_t: buffer for the converted wide char string
239  * @src: buffer with the multibyte string to be converted
240  * @maxlen: size of @wchar_t buffer
241  * @mbs: pointer to a variable holding the shift state
242  * or NULL to use a global variable
243  *
244  * Returns: the number of bytes from @src that have been used
245  * or -1 in case of error
246  *
247  * Converts a multibyte string to a wide char string.
248  * Uses iconv() if it is available and iconv_open() succeeds, else mbrtowc()
249  * if available, else mbtowc().
250  */
char_mbtowc(wchar_t * dst,const char * src,int maxlen,MBSTATE * mbs)251 static int char_mbtowc(wchar_t *dst, const char *src, int maxlen, MBSTATE *mbs)
252 {
253 #ifdef HAVE_ICONV
254 	size_t nconv;
255 	ICONV_CONST char *pin;
256 	char *pout;
257 	size_t inlen;
258 	size_t outlen;
259 	iconv_t cd;
260 
261 	pin = (char *)src;
262 	pout = (char *)dst;
263 	/* Let's assume that we have at most 4-bytes wide characters */
264 	inlen = maxlen;
265 	outlen = maxlen * sizeof(wchar_t);
266 
267 	cd = iconv_open("WCHAR_T", gn_char_get_encoding());
268 	if (cd == (iconv_t)-1)
269 		goto fallback;
270 	nconv = iconv(cd, &pin, &inlen, &pout, &outlen);
271 	if ((nconv == (size_t)-1) && (pin == src))
272 		perror("char_mbtowc/iconv");
273 	iconv_close(cd);
274 
275 	return (char*)dst == pout ? -1 : pin-src;
276 fallback:
277 #endif
278 	if (maxlen >= MB_CUR_MAX)
279 		maxlen = MB_CUR_MAX - 1;
280 #ifdef HAVE_WCRTOMB
281 	return mbrtowc(dst, src, maxlen, mbs);
282 #else
283 	return mbtowc(dst, src, maxlen);
284 #endif
285 }
286 
287 /**
288  * char_wctomb:
289  * @dst: buffer for the converted multibyte string
290  * @src: buffer with the wide char string to be converted
291  * @mbs: pointer to a variable holding the shift state
292  * or NULL to use a global variable
293  *
294  * Returns: the number of bytes from @src that have been used
295  * or -1 in case of error
296  *
297  * Converts a wide char string to a multibyte string.
298  * Uses iconv() if it is available and iconv_open() succeeds, else wcrtomb()
299  * if available, else wctomb().
300  */
char_wctomb(char * dst,wchar_t src,MBSTATE * mbs)301 static int char_wctomb(char *dst, wchar_t src, MBSTATE *mbs)
302 {
303 #ifdef HAVE_ICONV
304 	size_t nconv;
305 	ICONV_CONST char *pin;
306 	char *pout;
307 	size_t inlen;
308 	size_t outlen;
309 	iconv_t cd;
310 
311 	pin = (char *)&src;
312 	pout = (char *)dst;
313 	inlen = sizeof(wchar_t);
314 	outlen = 4;
315 
316 	cd = iconv_open(gn_char_get_encoding(), "WCHAR_T");
317 	if (cd == (iconv_t)-1)
318 		goto fallback;
319 	nconv = iconv(cd, &pin, &inlen, &pout, &outlen);
320 	if (nconv == (size_t)-1)
321 		perror("char_wctomb/iconv");
322 	iconv_close(cd);
323 
324 	return nconv == -1 ? -1 : pout-dst;
325 fallback:
326 #endif
327 #ifdef HAVE_WCRTOMB
328 	return wcrtomb(dst, src, mbs);
329 #else
330 	return wctomb(dst, src);
331 #endif
332 }
333 
334 /**
335  * char_def_alphabet_ext:
336  * @value: the character to test UCS-2 encoded
337  *
338  * Returns: non zero if the character can be represented with the Extended GSM Alphabet,
339  * zero otherwise
340  *
341  * Checks if @value is a character defined by the Extended GSM Alphabet.
342  *
343  * In GSM specification there are 10 characters in the extension
344  * of the default alphabet. Their values look a bit random, they are
345  * only 10, and probably they will never change, so hardcoding them
346  * here is rather safe.
347  */
char_def_alphabet_ext(unsigned int value)348 bool char_def_alphabet_ext(unsigned int value)
349 {
350 	return (value == 0x0c ||
351 		value == '^' ||
352 		value == '{' ||
353 		value == '}' ||
354 		value == '\\' ||
355 		value == '[' ||
356 		value == '~' ||
357 		value == ']' ||
358 		value == '|' ||
359 		value == 0x20ac);
360 }
361 
362 /**
363  * char_def_alphabet_ext_count:
364  * @input: input string
365  * @lengh: input string length
366  *
367  * Returns: number of extended GSM alphabet characters in the input string
368  */
char_def_alphabet_ext_count(unsigned char * input,int length)369 int char_def_alphabet_ext_count(unsigned char *input, int length)
370 {
371 	int i, retval = 0;
372 	for (i = 0; i < length; i++)
373 		if (char_def_alphabet_ext(input[i]))
374 			retval++;
375 	return retval;
376 }
377 
378 /**
379  * char_def_alphabet_ext_decode:
380  * @value: the character to decode
381  *
382  * Returns: the decoded character, or '?' if @value can't be decoded
383  *
384  * Converts a character from Extended GSM Alphabet to UCS-2.
385  */
char_def_alphabet_ext_decode(unsigned char value)386 static unsigned int char_def_alphabet_ext_decode(unsigned char value)
387 {
388 	dprintf("Default extended alphabet\n");
389 	switch (value) {
390 	case 0x0a: return 0x000c; break; /* form feed */
391 	case 0x14: return 0x005e; break; /* ^ */
392 	case 0x28: return 0x007b; break; /* { */
393 	case 0x29: return 0x007d; break; /* } */
394 	case 0x2f: return 0x005c; break; /* \ */
395 	case 0x3c: return 0x005b; break; /* [ */
396 	case 0x3d: return 0x007e; break; /* ~ */
397 	case 0x3e: return 0x005d; break; /* ] */
398 	case 0x40: return 0x007c; break; /* | */
399 	case 0x65: return 0x20ac; break; /* € */
400 	default:   return 0x003f; break; /* invalid character, set ? */
401 	}
402 }
403 
404 /**
405  * char_def_alphabet_ext_encode:
406  * @value: the UCS-2 character to encode
407  *
408  * Returns: the encoded character, or 0 if @value can't be encoded
409  *
410  * Converts a character from UCS-2 to Extended GSM Alphabet.
411  */
char_def_alphabet_ext_encode(unsigned int value)412 static unsigned char char_def_alphabet_ext_encode(unsigned int value)
413 {
414 	switch (value) {
415 	case 0x0c: return 0x0a; /* form feed */
416 	case '^':  return 0x14;
417 	case '{':  return 0x28;
418 	case '}':  return 0x29;
419 	case '\\': return 0x2f;
420 	case '[':  return 0x3c;
421 	case '~':  return 0x3d;
422 	case ']':  return 0x3e;
423 	case '|':  return 0x40;
424 	case 0x20ac: return 0x65; /* euro */
425 	default: return 0x00; /* invalid character */
426 	}
427 }
428 
429 /**
430  * gn_char_def_alphabet:
431  * @string: the string to test
432  *
433  * Returns: %true if the string can be represented with the GSM Alphabet,
434  * %false otherwise
435  *
436  * Checks if @value is a string composed only by characters defined by
437  * the default GSM alphabet or its extension.
438  */
gn_char_def_alphabet(unsigned char * string)439 GNOKII_API int gn_char_def_alphabet(unsigned char *string)
440 {
441 	unsigned int i, ucs2len, inlen = strlen(string);
442 	char *ucs2str;
443 
444 	/* First, let's know the encoding. We convert it from something to UCS-2 */
445 	ucs2str = calloc(2 * inlen, sizeof(unsigned char));
446 	if (!ucs2str)
447 		/* We are in trouble here. Whatever would be returned is irrelevant */
448 		return true;
449 	ucs2len = ucs2_encode(ucs2str, 2 * inlen, string, inlen);
450 
451 	/* It means we couldn't encode the input string */
452 	if (ucs2len < 0) {
453 		dprintf("gn_char_def_alphabet: failed to encode input string\n");
454 		return false;
455 	}
456 
457 	for (i = 0; i < ucs2len / 2; i++) {
458 		unsigned int a = 0xff & ucs2str[2 * i], b = 0xff & ucs2str[2 * i + 1];
459 		/*
460 		 * We need the following tests:
461 		 *  - check in the default alphabet table
462 		 *  - check in the extended default alphabet table
463 		 */
464 		if (!char_def_alphabet(256 * a + b) &&
465 		    !char_def_alphabet_ext(256 * a + b)) {
466 			free(ucs2str);
467 			return false;
468 		}
469 	}
470 	free(ucs2str);
471 	return true;
472 }
473 
474 /**
475  * char_def_alphabet_encode:
476  * @value: the character to encode
477  *
478  * Returns: the encoded character, or '?' if @value can't be encoded
479  *
480  * Converts a character from UCS-2 to Default GSM Alphabet.
481  * It could be possibly optimized but let's face it: nowadays full
482  * lookup of 128 elements table is not that time consuming.
483  */
char_def_alphabet_encode(unsigned int value)484 unsigned char char_def_alphabet_encode(unsigned int value)
485 {
486 	int i;
487 	for (i = 0; i < GN_CHAR_UNI_ALPHABET_SIZE; i++) {
488 		if (gsm_default_unicode_alphabet[i] == value) {
489 			return i;
490 		}
491 	}
492 	return '?';
493 }
494 
495 /**
496  * char_def_alphabet_decode:
497  * @value: the character to decode
498  *
499  * Returns: the decoded character or '?' if @value can't be decoded
500  *
501  * Converts a character from Default GSM Alphabet to UCS-2.
502  */
char_def_alphabet_decode(unsigned char value)503 unsigned int char_def_alphabet_decode(unsigned char value)
504 {
505 	if (value < GN_CHAR_UNI_ALPHABET_SIZE) {
506 		return gsm_default_unicode_alphabet[value];
507 	} else {
508 		return 0x003f; /* '?' */
509 	}
510 }
511 
512 /**
513  * char_def_alphabet_string_stats:
514  * @str: string to get statistics encoded in utf8
515  * @enc_chars: calculated number of the characters from the input string
516  * @ext_chars: calculated number of the characters to be encoded in gsm extended default alphabet
517  *
518  * Returns: GN_SMS_DCS_DefaultAlphabet, if @str can be encoded in GSM default alphabet, GN_SMS_DCS_UCS2 otherwise.
519  *
520  * Calculates statistics and encoding of the input string.
521  */
char_def_alphabet_string_stats(char * str,int * enc_chars,int * ext_chars)522 gn_sms_dcs_alphabet_type char_def_alphabet_string_stats(char *str, int *enc_chars, int *ext_chars)
523 {
524 	gn_sms_dcs_alphabet_type enc = GN_SMS_DCS_DefaultAlphabet;
525 	char *iter = str;
526 	gunichar chr;
527 
528 	*enc_chars = 0;
529 	*ext_chars = 0;
530 	if (!g_utf8_validate(iter, -1, NULL)) {
531 		dprintf("Not valid UTF8 string\n");
532 		return enc;
533 	}
534 	do {
535 		chr = g_utf8_get_char(iter);
536 		if (!chr)
537 			break;
538 		if (char_def_alphabet_ext(chr))
539 			(*ext_chars)++;
540 		else if (!char_def_alphabet(chr))
541 			enc = GN_SMS_DCS_UCS2;
542 		(*enc_chars)++;
543 	} while (iter = g_utf8_next_char(iter));
544 	return enc;
545 }
546 
547 /**
548  * char_def_alphabet_copy:
549  * @dest: room for the destination string
550  * @src: source utf-8 string to copy
551  * @len: number of utf-8 characters to copy
552  * @offset: number of utf-8 characters from input to skip
553  *
554  * Returns: number of characters copied
555  *
556  * Function copies @len characters from @src utf-8 string, starting at @offset character to @dest.
557  *
558  */
char_def_alphabet_string_copy(char * dest,const char * src,int len,int offset)559 int char_def_alphabet_string_copy(char *dest, const char *src, int len, int offset)
560 {
561 	int i, to_copy = 0;
562 	gunichar chr;
563 	char *src_offset = g_utf8_offset_to_pointer(src, offset);
564 	char *iter = src_offset;
565 
566 	if (!g_utf8_validate(iter, -1, NULL)) {
567 		dprintf("Not valid UTF8 string\n");
568 		return to_copy;
569 	}
570 	for (i = 0; i < len; i++) {
571 		chr = g_utf8_get_char(iter);
572 		if (!chr)
573 			break;
574 		if (char_def_alphabet_ext(chr))
575 			i++;
576 		if (i < len)
577 			to_copy++;
578 		iter = g_utf8_next_char(iter);
579 	}
580 	g_utf8_strncpy(dest, src_offset, to_copy);
581 	return to_copy;
582 }
583 
584 #define GN_BYTE_MASK ((1 << bits) - 1)
585 
586 /**
587  * char_7bit_unpack:
588  * @offset: the bit offset inside the first byte of @input from which to start reading data
589  * @in_length: length of @input in bytes
590  * @out_length: size of @output in bytes
591  * @input: buffer with the string to be converted
592  * @output: buffer for the converted string, not NUL terminated
593  *
594  * Returns: the number of bytes used in @output
595  *
596  * Converts a packed sequence of 7-bit characters from @input into an array
597  * of 8-bit characters in @output.
598  * Source characters are stored in a char array of @in_length elements.
599  */
char_7bit_unpack(unsigned int offset,unsigned int in_length,unsigned int out_length,unsigned char * input,unsigned char * output)600 int char_7bit_unpack(unsigned int offset, unsigned int in_length, unsigned int out_length,
601 		     unsigned char *input, unsigned char *output)
602 {
603 	unsigned char *out_num = output; /* Current pointer to the output buffer */
604 	unsigned char *in_num = input;  /* Current pointer to the input buffer */
605 	unsigned char rest = 0x00;
606 	int bits;
607 
608 	bits = offset ? offset : 7;
609 
610 	while ((in_num - input) < in_length) {
611 
612 		*out_num = ((*in_num & GN_BYTE_MASK) << (7 - bits)) | rest;
613 		rest = *in_num >> bits;
614 
615 		/* If we don't start from 0th bit, we shouldn't go to the
616 		   next char. Under *out_num we have now 0 and under Rest -
617 		   _first_ part of the char. */
618 		if ((in_num != input) || (bits == 7)) out_num++;
619 		in_num++;
620 
621 		if ((out_num - output) >= out_length) break;
622 
623 		/* After reading 7 octets we have read 7 full characters but
624 		   we have 7 bits as well. This is the next character */
625 		if (bits == 1) {
626 			*out_num = rest;
627 			out_num++;
628 			bits = 7;
629 			rest = 0x00;
630 		} else {
631 			bits--;
632 		}
633 	}
634 
635 	return out_num - output;
636 }
637 
638 /**
639  * char_7bit_pack:
640  * @offset: the bit offset inside the first byte of @output from which to start writing data
641  * @input: buffer with the string to be converted
642  * @output: buffer for the converted string, not NUL terminated
643  * @in_len: length of @input to be set; includes extended alphabet escape char
644  *
645  * Returns: the number of bytes used in @output
646  *
647  * Converts an array of 8-bit characters from @input into a packed sequence
648  * of 7-bit characters in @output.
649  */
char_7bit_pack(unsigned int offset,unsigned char * input,unsigned char * output,unsigned int * in_len)650 int char_7bit_pack(unsigned int offset, unsigned char *input,
651 		   unsigned char *output, unsigned int *in_len)
652 {
653 
654 	unsigned char *out_num = output; /* Current pointer to the output buffer */
655 	unsigned int in_num;
656 	int bits;		     /* Number of bits directly copied to output buffer */
657 	unsigned int ucs2len, i = 0, len = strlen(input);
658 	char *ucs2str;
659 
660 	/* First, let's know the encoding. We convert it from something to UCS-2 */
661 	ucs2str = calloc(2 * len, sizeof(unsigned char));
662 	if (!ucs2str)
663 		return 0;
664 	ucs2len = ucs2_encode(ucs2str, 2 * len, input, len);
665 
666 	/* Encoding failed */
667 	if (ucs2len < 0) {
668 		dprintf("gn_char_def_alphabet: failed to encode input string\n");
669 		return 0;
670 	}
671 
672 	bits = (7 + offset) % 8;
673 
674 	/* If we don't begin with 0th bit, we will write only a part of the
675 	   first octet */
676 	if (offset) {
677 		*out_num = 0x00;
678 		out_num++;
679 	}
680 
681 	*in_len = 0;
682 
683 	while (i < ucs2len / 2) {
684 		unsigned char byte;
685 		bool double_char = false;
686 		unsigned int a = 0xff & ucs2str[2 * i], b = 0xff & ucs2str[2 * i + 1];
687 
688 		in_num = 256 * a + b;
689 		if (char_def_alphabet_ext(in_num)) {
690 			byte = GN_CHAR_UNI_ESCAPE;
691 			double_char = true;
692 			goto skip;
693 next_char:
694 			byte = char_def_alphabet_ext_encode(in_num);
695 			double_char = false;
696 			(*in_len) += 2;
697 		} else {
698 			byte = char_def_alphabet_encode(in_num);
699 			(*in_len)++;
700 		}
701 skip:
702 		*out_num = byte >> (7 - bits);
703 		/* If we don't write at 0th bit of the octet, we should write
704 		   a second part of the previous octet */
705 		if (bits != 7)
706 			*(out_num-1) |= (byte & ((1 << (7-bits)) - 1)) << (bits+1);
707 
708 		bits--;
709 
710 		if (bits == -1)
711 			bits = 7;
712 		else
713 			out_num++;
714 
715 		if (double_char)
716 			goto next_char;
717 
718 		i++;
719 	}
720 
721 	free(ucs2str);
722 	return (out_num - output);
723 }
724 
725 /**
726  * char_default_alphabet_decode:
727  * @dest: buffer for the converted string, NUL terminated
728  * @src: buffer with the string to be converted
729  * @len: length of @src in bytes
730  *
731  * Converts a string from GSM Alphabet to ISO/IEC 8859-1.
732  * In the worst case where each character in @src must be converted from the
733  * Extended GSM Alphabet, size of @dest must be @len + 1; in general it must be
734  * at least @len - number_of_escape_chars + 1
735  */
char_default_alphabet_decode(unsigned char * dest,const unsigned char * src,int len)736 int char_default_alphabet_decode(unsigned char* dest, const unsigned char* src, int len)
737 {
738 	int j, pos = 0;
739 	MBSTATE mbs;
740 
741 	MBSTATE_DEC_CLEAR(mbs);
742 
743 	for (j = 0; j < len; j++) {
744 		wchar_t wc;
745 		int length;
746 
747 		if (char_is_escape(src[j])) {
748 			wc = char_def_alphabet_ext_decode(src[++j]);
749 		} else {
750 			wc = char_def_alphabet_decode(src[j]);
751 		}
752 		length = char_uni_alphabet_decode(wc, dest, &mbs);
753 		dest += length;
754 		pos += length;
755 	}
756 	*dest = 0;
757 	return pos;
758 }
759 
760 /**
761  * char_ascii_encode:
762  * @dest: buffer for the converted string, not NUL terminated
763  * @dest_len: size of @dest in bytes, must be 2 * @len in the worst case
764  * @src: buffer with the string to be converted
765  * @len: length of @src in bytes
766  *
767  * Returns: the number of bytes used in the @dest buffer for the converted string
768  *
769  * Converts a string from ISO/IEC 8859-1 to GSM Alphabet.
770  * In the worst case where each character in @src must be converted in the
771  * Extended GSM Alphabet, @dest_len must be @len * 2; in general it must be
772  * at least @len + number_of_escape_chars
773  */
char_ascii_encode(char * dest,size_t dest_len,const char * src,size_t len)774 size_t char_ascii_encode(char *dest, size_t dest_len, const char *src, size_t len)
775 {
776 	size_t i, j, extra = 0;
777 
778 	for (i = 0, j = 0; i < dest_len && j < len; i++, j++) {
779 		if (char_def_alphabet_ext(src[j])) {
780 			dest[i++] = GN_CHAR_UNI_ESCAPE;
781 			dest[i] = char_def_alphabet_ext_encode(src[j]);
782 			extra++;
783 		} else {
784 			dest[i] = char_def_alphabet_encode(src[j]);
785 		}
786 	}
787 	return len + extra;
788 }
789 
790 /**
791  * char_hex_decode:
792  * @dest: buffer for the converted string, NUL terminated
793  * @src: buffer with the string to be converted
794  * @len: length of @src in bytes, length of @dest must be at least (@len / 2) + 1
795  *
796  * Converts a string from GSM Alphabet in ASCII-encoded hexadecimal bytes to ISO/IEC 8859-1.
797  */
char_hex_decode(unsigned char * dest,const unsigned char * src,int len)798 void char_hex_decode(unsigned char* dest, const unsigned char* src, int len)
799 {
800 	int i;
801 	char buf[3];
802 
803 	buf[2] = '\0';
804 	for (i = 0; i < (len / 2); i++) {
805 		buf[0] = *(src + i * 2); buf[1] = *(src + i * 2 + 1);
806 		dest[i] = char_def_alphabet_decode(strtol(buf, NULL, 16));
807 	}
808 	dest[i] = 0;
809 	return;
810 }
811 
812 /**
813  * char_hex_encode:
814  * @dest: buffer for the converted string, NUL terminated
815  * @dest_len: length of @dest in bytes, must be at least (@len * 2) + 1
816  * @src: buffer with the string to be converted
817  * @len: length of @src in bytes
818  *
819  * Returns: the number of bytes used in the @dest buffer for the converted string
820  *
821  * Converts a string from ISO/IEC 8859-1 to GSM Alphabet in ASCII-encoded hexadecimal bytes.
822  */
char_hex_encode(char * dest,size_t dest_len,const char * src,size_t len)823 size_t char_hex_encode(char *dest, size_t dest_len, const char *src, size_t len)
824 {
825 	int i, n = dest_len / 2 >= len ? len : dest_len / 2;
826 
827 	for (i = 0; i < n; i++)
828 		snprintf(dest + i * 2, 3, "%02X", char_def_alphabet_encode(src[i]));
829 	return len * 2;
830 }
831 
832 /**
833  * char_uni_alphabet_encode:
834  * @value: pointer to the character to be converted
835  * @n: maximum number of bytes of @value that will be examined
836  * @dest: buffer for the converted character
837  * @mbs: pointer to a variable holding the shift state
838  * or NULL to use a global variable
839  *
840  * Returns: the number of bytes from @value used by the converted string
841  * or -1 in case of error
842  *
843  * Converts a character from multibyte to wide.
844  */
char_uni_alphabet_encode(const char * value,size_t n,wchar_t * dest,MBSTATE * mbs)845 size_t char_uni_alphabet_encode(const char *value, size_t n, wchar_t *dest, MBSTATE *mbs)
846 {
847 	int length;
848 
849 	length = char_mbtowc(dest, value, n, mbs);
850 	return length;
851 }
852 
853 /**
854  * char_uni_alphabet_decode:
855  * @value: the character to be converted
856  * @dest: buffer for the converted character
857  * @mbs: pointer to a variable holding the shift state
858  * or NULL to use a global variable
859  *
860  * Returns: the number of bytes from @value that have been used
861  * or -1 in case of error
862  *
863  * Converts a character from wide to multibyte.
864  */
char_uni_alphabet_decode(wchar_t value,unsigned char * dest,MBSTATE * mbs)865 int char_uni_alphabet_decode(wchar_t value, unsigned char *dest, MBSTATE *mbs)
866 {
867 	int length;
868 
869     switch (length = char_wctomb(dest, value, mbs)) {
870 	case -1:
871 		*dest = '?';
872 		length = 1;
873 	default:
874 		return length;
875 	}
876 }
877 
878 /**
879  * char_ucs2_decode:
880  * @dest: buffer for the converted string, NUL terminated
881  * @src: buffer with the string to be converted
882  * @len: length of @src in bytes, size of @dest must be at least (@len / 4) + 1
883  *
884  * Converts a string from UCS-2 encoded as ASCII-encoded hexadecimal bytes to ISO/IEC 8859-1.
885  * @len must be a multiple of 4.
886  * Used in AT driver for UCS2 encoding commands.
887  */
char_ucs2_decode(unsigned char * dest,const unsigned char * src,int len)888 void char_ucs2_decode(unsigned char* dest, const unsigned char* src, int len)
889 {
890 	int i_len = 0, o_len = 0, length;
891 	char buf[5];
892 	MBSTATE mbs;
893 
894 	MBSTATE_DEC_CLEAR(mbs);
895 	buf[4] = '\0';
896 	for (i_len = 0; i_len < len ; i_len++) {
897 		buf[0] = *(src + i_len * 4);
898 		buf[1] = *(src + i_len * 4 + 1);
899 		buf[2] = *(src + i_len * 4 + 2);
900 		buf[3] = *(src + i_len * 4 + 3);
901 		switch (length = char_uni_alphabet_decode(strtol(buf, NULL, 16), dest + o_len, &mbs)) {
902 		case -1:
903 			o_len++;
904 			length = 1;
905 			break;
906 		default:
907 			o_len += length;
908 			break;
909 		}
910 		if ((length == 1) && (dest[o_len-1] == 0))
911 			return;
912 	}
913 	dest[o_len] = 0;
914 	return;
915 }
916 
917 /**
918  * char_ucs2_encode:
919  * @dest: buffer for the converted string, NUL terminated
920  * @dest_len: size of @dest
921  * @src: buffer with the string to be converted
922  * @len: length of @src in bytes, size of @dest must be at least (@len * 4) + 1
923  *
924  * Returns: the number of bytes of @dest that have been used
925  *
926  * Converts a string from ISO/IEC 8859-1 to UCS-2 encoded as ASCII-encoded hexadecimal bytes.
927  * This function should convert "ABC" to "004100420043"
928  * Used only in AT driver for UCS2 encoding commands.
929  * It reads char by char from the input.
930  */
931 #define UCS2_SIZE	4
char_ucs2_encode(char * dest,size_t dest_len,const char * src,size_t len)932 size_t char_ucs2_encode(char *dest, size_t dest_len, const char *src, size_t len)
933 {
934 	wchar_t wc;
935 	int i, o_len, length;
936 	MBSTATE mbs;
937 
938 	MBSTATE_ENC_CLEAR(mbs);
939 	for (i = 0, o_len = 0; i < len && o_len < dest_len / UCS2_SIZE; o_len++, i++) {
940 		/*
941 		 * We read input by convertible chunks. 'length' is length of
942 		 * the read chunk.
943 		 */
944 		length = char_uni_alphabet_encode(src + i, 1, &wc, &mbs);
945 		/* We stop reading after first unreadable input */
946 		if (length < 1)
947 			return o_len * UCS2_SIZE;
948 		/* We write here 4 chars + NULL termination */
949 		/* XXX: We should probably check wchar_t size. */
950 		snprintf(dest + (o_len * UCS2_SIZE), UCS2_SIZE + 1, "%04X", wc);
951 	}
952 	return o_len * UCS2_SIZE;
953 }
954 
955 /**
956  * char_unicode_decode:
957  * @dest: buffer for the converted string, NUL terminated
958  * @src: buffer with the string to be converted
959  * @len: length of @src in bytes
960  *
961  * Returns: the number of bytes of @dest that have been used
962  *
963  * Converts a string from UTF-8 to ISO/IEC 8859-1.
964  */
char_unicode_decode(unsigned char * dest,const unsigned char * src,int len)965 unsigned int char_unicode_decode(unsigned char* dest, const unsigned char* src, int len)
966 {
967 	int i, length = 0, pos = 0;
968 	MBSTATE mbs;
969 
970 	MBSTATE_DEC_CLEAR(mbs);
971 	for (i = 0; i < len / 2; i++) {
972 		wchar_t wc = src[i * 2] << 8 | src[(i * 2) + 1];
973 		length = char_uni_alphabet_decode(wc, dest, &mbs);
974 		dest += length;
975 		pos += length;
976 	}
977 	*dest = 0;
978 	return pos;
979 }
980 
981 /**
982  * char_unicode_encode:
983  * @dest: buffer for the converted string, not NUL terminated
984  * @src: buffer with the string to be converted
985  * @len: length of @src in bytes
986  *
987  * Returns: the number of bytes of @dest that have been used
988  *
989  * Converts a string from ISO/IEC 8859-1 to UTF-8.
990  */
char_unicode_encode(unsigned char * dest,const unsigned char * src,int len)991 unsigned int char_unicode_encode(unsigned char* dest, const unsigned char* src, int len)
992 {
993 	int pos = 0;
994 	MBSTATE mbs;
995 #ifndef HAVE_ICONV
996 	int length, offset = 0;
997 	wchar_t  wc;
998 #endif
999 
1000 	MBSTATE_ENC_CLEAR(mbs);
1001 #ifdef HAVE_ICONV
1002 	pos = ucs2_encode(dest, 2 * len, src, len);
1003 #else
1004 	while (offset < len) {
1005 		length = char_uni_alphabet_encode(src + offset, len - offset, &wc, &mbs);
1006 		switch (length) {
1007 		case -1:
1008 			dest[pos++] =  wc >> 8 & 0xFF;
1009 			dest[pos++] =  wc & 0xFF;
1010 			offset++;
1011 			break;
1012 		case 0: /* Avoid infinite loop */
1013 			offset++;
1014 			break;
1015 		default:
1016 			dest[pos++] =  wc >> 8 & 0xFF;
1017 			dest[pos++] =  wc & 0xFF;
1018 			offset += length;
1019 			break;
1020 		}
1021 	}
1022 #endif
1023 	return pos;
1024 }
1025 
1026 /* Conversion bin -> hex and hex -> bin */
1027 
1028 /**
1029  * hex2bin:
1030  * @dest: buffer for the converted string
1031  * @src: buffer with the string to be converted
1032  * @len: length of @src, size of @dest must be at least @len / 2
1033  *
1034  * Converts from ASCII-encoded hexadecimal bytes to binary.
1035  * @len must be a multiple of 2.
1036  */
hex2bin(unsigned char * dest,const unsigned char * src,unsigned int len)1037 void hex2bin(unsigned char *dest, const unsigned char *src, unsigned int len)
1038 {
1039 	int i;
1040 
1041 	if (!dest) return;
1042 
1043 	for (i = 0; i < len; i++) {
1044 		unsigned aux;
1045 
1046 		if (src[2 * i] >= '0' && src[2 * i] <= '9') aux = src[2 * i] - '0';
1047 		else if (src[2 * i] >= 'a' && src[2 * i] <= 'f') aux = src[2 * i] - 'a' + 10;
1048 		else if (src[2 * i] >= 'A' && src[2 * i] <= 'F') aux = src[2 * i] - 'A' + 10;
1049 		else {
1050 			dest[0] = 0;
1051 			return;
1052 		}
1053 		dest[i] = aux << 4;
1054 		if (src[2 * i + 1] >= '0' && src[2 * i + 1] <= '9') aux = src[2 * i + 1] - '0';
1055 		else if (src[2 * i + 1] >= 'a' && src[2 * i + 1] <= 'f') aux = src[2 * i + 1] - 'a' + 10;
1056 		else if (src[2 * i + 1] >= 'A' && src[2 * i + 1] <= 'F') aux = src[2 * i + 1] - 'A' + 10;
1057 		else {
1058 			dest[0] = 0;
1059 			return;
1060 		}
1061 		dest[i] |= aux;
1062 	}
1063 }
1064 
1065 /**
1066  * bin2hex:
1067  * @dest: buffer for the converted string, not NUL terminated
1068  * @src: buffer with the string to be converted
1069  * @len: length of @src, size of @dest must be at least @len * 2
1070  *
1071  * Converts from binary to ASCII-encoded hexadecimal bytes.
1072  */
bin2hex(unsigned char * dest,const unsigned char * src,unsigned int len)1073 void bin2hex(unsigned char *dest, const unsigned char *src, unsigned int len)
1074 {
1075 	int i;
1076 
1077 	if (!dest) return;
1078 
1079 	for (i = 0; i < len; i++) {
1080 		dest[2 * i] = (src[i] & 0xf0) >> 4;
1081 		if (dest[2 * i] < 10) dest[2 * i] += '0';
1082 		else dest[2 * i] += ('A' - 10);
1083 		dest[2 * i + 1] = src[i] & 0x0f;
1084 		if (dest[2 * i + 1] < 10) dest[2 * i + 1] += '0';
1085 		else dest[2 * i + 1] += ('A' - 10);
1086 	}
1087 }
1088 
1089 /**
1090  * char_semi_octet_pack:
1091  * @number: string containing the phone number to convert
1092  * @output: buffer for the converted phone number, not NUL terminated
1093  * @type: type of the phone number (eg. %GN_GSM_NUMBER_International)
1094  *
1095  * Returns: the number of semi octects used by the whole encoded string
1096  *
1097  * This function implements packing of numbers (SMS Center number and
1098  * destination number) for SMS sending function.
1099  */
char_semi_octet_pack(char * number,unsigned char * output,gn_gsm_number_type type)1100 int char_semi_octet_pack(char *number, unsigned char *output, gn_gsm_number_type type)
1101 {
1102 	char *in_num = number;  /* Pointer to the input number */
1103 	unsigned char *out_num = output; /* Pointer to the output */
1104 	int count = 0; /* This variable is used to notify us about count of already
1105 			  packed numbers. */
1106 
1107 	/* The first byte in the Semi-octet representation of the address field is
1108 	   the Type-of-Address. This field is described in the official GSM
1109 	   specification 03.40 version 6.1.0, section 9.1.2.5, page 33. We support
1110 	   only international, unknown and alphanumeric number. */
1111 
1112 	*out_num++ = type;
1113 
1114 	if (((type & GN_GSM_NUMBER_Type_Mask) & GN_GSM_NUMBER_Alphanumeric_Mask) == GN_GSM_NUMBER_Alphanumeric_Mask) {
1115 		count = strlen(number);
1116 		return 2 * char_7bit_pack(0, number, out_num, &count);
1117 	}
1118 
1119 	if ((((type & GN_GSM_NUMBER_International_Mask) == GN_GSM_NUMBER_International_Mask) ||
1120 		(type & GN_GSM_NUMBER_Type_Mask) == 0) && /* Unknown */
1121 		*in_num == '+')
1122 		in_num++; /* skip leading '+' */
1123 
1124 	/* The next field is the number. It is in semi-octet representation - see
1125 	   GSM specification 03.40 version 6.1.0, section 9.1.2.3, page 31. */
1126 	while (*in_num) {
1127 		if (count & 0x01) {
1128 			*out_num = *out_num | ((*in_num - '0') << 4);
1129 			out_num++;
1130 		}
1131 		else
1132 			*out_num = *in_num - '0';
1133 		count++; in_num++;
1134 	}
1135 
1136 	/* We should also fill in the most significant bits of the last byte with
1137 	   0x0f (1111 binary) if the number is represented with odd number of
1138 	   digits. */
1139 	if (count & 0x01) {
1140 		*out_num = *out_num | 0xf0;
1141 		out_num++;
1142 	}
1143 
1144 	return (2 * (out_num - output - 1) - (count % 2));
1145 }
1146 
1147 /**
1148  * char_bcd_number_get:
1149  * @number: a phone number encoded in BCD format
1150  *
1151  * Returns: a static buffer with the converted phone number, NUL terminated
1152  *
1153  * This function implements unpacking of numbers (SMS Center number and
1154  * destination number) for SMS receiving function.
1155  */
char_bcd_number_get(u8 * number)1156 char *char_bcd_number_get(u8 *number)
1157 {
1158 	static char buffer[GN_BCD_STRING_MAX_LENGTH] = "";
1159 	int length = number[0]; /* This is the length of BCD coded number */
1160 	int count, digit, i = 0;
1161 
1162 	if (length > GN_BCD_STRING_MAX_LENGTH) length = GN_BCD_STRING_MAX_LENGTH;
1163 	switch (number[1]) {
1164 	case GN_GSM_NUMBER_Alphanumeric:
1165 		char_7bit_unpack(0, length, length, number + 2, buffer);
1166 		buffer[length] = 0;
1167 		break;
1168 	case GN_GSM_NUMBER_International:
1169 		snprintf(buffer, sizeof(buffer), "+");
1170 		i++;
1171 		if (length == GN_BCD_STRING_MAX_LENGTH)
1172 			length--; /* avoid overflow */
1173 	case GN_GSM_NUMBER_Unknown:
1174 	case GN_GSM_NUMBER_National:
1175 	case GN_GSM_NUMBER_Network:
1176 	case GN_GSM_NUMBER_Subscriber:
1177 	case GN_GSM_NUMBER_Abbreviated:
1178 	default:
1179 		/* start at 2 to skip length and TON (we can't overflow the buffer because i <= GN_BCD_STRING_MAX_LENGTH - 2) */
1180 		for (count = 2; count <= length; count++) {
1181 			digit = number[count] & 0x0f;
1182 			if (digit < 0x0f)
1183 				buffer[i++] = bcd_digits[digit];
1184 			digit = number[count] >> 4;
1185 			if (digit < 0x0f)
1186 				buffer[i++] = bcd_digits[digit];
1187 		}
1188 		buffer[i] = '\0';
1189 		break;
1190 	}
1191 	return buffer;
1192 }
1193 
1194 /* UTF-8 conversion functions */
1195 
1196 /**
1197  * utf8_decode:
1198  * @outstring: buffer for the converted string, not NUL terminated
1199  * @outlen: size of @outstring
1200  * @instring: buffer with the string to be converted
1201  * @inlen: length of @instring
1202  *
1203  * Returns: the number of bytes used in @outstring, or -1 in case of errors
1204  *
1205  * Converts a string from UTF-8 to an application specified (or system default) encoding.
1206  * Uses iconv() if available, else uses internal replacement code.
1207  */
utf8_decode(char * outstring,size_t outlen,const char * instring,size_t inlen)1208 int utf8_decode(char *outstring, size_t outlen, const char *instring, size_t inlen)
1209 {
1210 	int retval = 1;
1211 	size_t nconv;
1212 
1213 #if defined(HAVE_ICONV)
1214 	ICONV_CONST char *pin;
1215 	char *pout;
1216 	iconv_t cd;
1217 
1218 	pin = (char *)instring;
1219 	pout = outstring;
1220 
1221 	cd = iconv_open(gn_char_get_encoding(), "UTF-8");
1222 	if (cd == (iconv_t)-1)
1223 		return -1;
1224 	nconv = iconv(cd, &pin, &inlen, &pout, &outlen);
1225 	if (nconv == (size_t)-1)
1226 		perror("utf8_decode/iconv");
1227 	else
1228 		retval =
1229 	iconv_close(cd);
1230 	*pout = 0;
1231 #else
1232 	unsigned char *pin, *pout;
1233 
1234 	pin = (unsigned char *)instring;
1235 	pout = outstring;
1236 
1237 	while (inlen > 0 && outlen > 0) {
1238 		if (*pin < 0x80) {
1239 			*pout = *pin;
1240 			nconv = 1;
1241 		} else if (*pin < 0xc0) {
1242 			*pout = '?';
1243 			nconv = 1;
1244 		} else if (*pin < 0xe0) {
1245 			*pout = '?';
1246 			nconv = 2;
1247 		} else if (*pin < 0xf0) {
1248 			*pout = '?';
1249 			nconv = 3;
1250 		} else if (*pin < 0xf8) {
1251 			*pout = '?';
1252 			nconv = 4;
1253 		} else if (*pin < 0xfc) {
1254 			*pout = '?';
1255 			nconv = 5;
1256 		} else {
1257 			*pout = '?';
1258 			nconv = 6;
1259 		}
1260 		inlen -= nconv;
1261 		outlen--;
1262 		pin += nconv;
1263 		if (*pout++ == '\0')
1264 			break;
1265 	}
1266 	retval = (char *)pout - outstring;
1267 #endif
1268 	return retval;
1269 }
1270 
1271 /**
1272  * utf8_encode:
1273  * @outstring: buffer for the converted string, not NUL terminated
1274  * @outlen: size of @outstring
1275  * @instring: buffer with the string to be converted
1276  * @inlen: length of @instring
1277  *
1278  * Returns: the number of bytes used in @outstring, or -1 in case of errors
1279  *
1280  * Converts a string from an application specified (or system default) encoding to UTF-8.
1281  * Uses iconv() if available, else uses internal replacement code.
1282  */
utf8_encode(char * outstring,int outlen,const char * instring,int inlen)1283 int utf8_encode(char *outstring, int outlen, const char *instring, int inlen)
1284 {
1285 	int retval = -1;
1286 #if defined(HAVE_ICONV)
1287 	size_t outleft, inleft, nconv;
1288 	ICONV_CONST char *pin;
1289 	char *pout;
1290 	iconv_t cd;
1291 
1292 	outleft = outlen;
1293 	inleft = inlen;
1294 	pin = (char *)instring;
1295 	pout = outstring;
1296 
1297 	cd = iconv_open("UTF-8", gn_char_get_encoding());
1298 	if (cd == (iconv_t)-1)
1299 		return -1;
1300 
1301 	nconv = iconv(cd, &pin, &inleft, &pout, &outleft);
1302 	if (nconv == (size_t)-1)
1303 		perror("utf8_encode/iconv");
1304 	else
1305 		retval = (char *)pout - outstring;
1306 	*pout = 0;
1307 	iconv_close(cd);
1308 #else
1309 	size_t nconv;
1310 	unsigned char *pin, *pout;
1311 
1312 	nconv = 0;
1313 	pin = (unsigned char *)instring;
1314 	pout = outstring;
1315 
1316 	while (inlen > 0 && outlen > 0) {
1317 		if (*pin >= 0x80)
1318 			*pout = '?';
1319 		else
1320 			*pout = *pin;
1321 
1322 		inlen--;
1323 		outlen--;
1324 		pin++;
1325 		if (*pout++ == '\0') break;
1326 	}
1327 	retval = (char *)pout - outstring;
1328 #endif
1329 	return retval;
1330 }
1331 
1332 /* UCS-2 functions */
1333 
1334 /**
1335  * ucs2_encode:
1336  * @outstring: buffer for the converted string, not NUL terminated
1337  * @outlen: size of @outstring
1338  * @instring: buffer with the string to be converted
1339  * @inlen: length of @instring
1340  *
1341  * Returns: the number of bytes used in @outstring, or -1 in case of errors
1342  *
1343  * Converts a string from an application specified (or system default) encoding to UCS-2.
1344  * Uses iconv() if available, else uses internal replacement code.
1345  */
ucs2_encode(char * outstring,int outlen,const char * instring,int inlen)1346 int ucs2_encode(char *outstring, int outlen, const char *instring, int inlen)
1347 {
1348 #if defined(HAVE_ICONV)
1349 	size_t outleft, inleft, nconv;
1350 	ICONV_CONST char *pin;
1351 	char *pout;
1352 	iconv_t cd;
1353 	int retval = -1;
1354 
1355 	outleft = outlen;
1356 	inleft = inlen;
1357 	pin = (char *)instring;
1358 	pout = outstring;
1359 
1360 	cd = iconv_open("UCS-2BE", gn_char_get_encoding());
1361 	if (cd == (iconv_t)-1)
1362 		return -1;
1363 
1364 	nconv = iconv(cd, &pin, &inleft, &pout, &outleft);
1365 	if (nconv == (size_t)-1)
1366 		perror("ucs2_encode/iconv");
1367 	else
1368 		retval = (char *)pout - outstring;
1369 	iconv_close(cd);
1370 	return retval;
1371 #else
1372 	return char_unicode_encode(outstring, instring, inlen);
1373 #endif
1374 }
1375 
ucs2_decode(char * outstring,int outlen,const char * instring,int inlen)1376 int ucs2_decode(char *outstring, int outlen, const char *instring, int inlen)
1377 {
1378 #if defined(HAVE_ICONV)
1379 	size_t outleft, inleft, nconv;
1380 	ICONV_CONST char *pin;
1381 	char *pout;
1382 	iconv_t cd;
1383 	int retval = -1;
1384 
1385 	outleft = outlen;
1386 	inleft = inlen;
1387 	pin = (char *)instring;
1388 	pout = outstring;
1389 
1390 	cd = iconv_open(gn_char_get_encoding(), "UCS-2BE");
1391 	if (cd == (iconv_t)-1)
1392 		return -1;
1393 
1394 	nconv = iconv(cd, &pin, &inleft, &pout, &outleft);
1395 	if (nconv == (size_t)-1)
1396 		perror("ucs2_encode/iconv");
1397 	else
1398 		retval = (char *)pout - outstring;
1399 	iconv_close(cd);
1400 	return retval;
1401 #else
1402 	return char_unicode_decode(outstring, instring, inlen);
1403 #endif
1404 }
1405 
1406 /* BASE64 functions */
1407 
1408 /**
1409  * string_base64:
1410  * @instring: the string to check
1411  *
1412  * Returns: 1 if the string must be encoded in base 64, 0 otherwise
1413  *
1414  * Verifies if a string must be encoded in base 64.
1415  */
string_base64(const char * instring)1416 int string_base64(const char *instring)
1417 {
1418 	for (; *instring; instring++)
1419 		if (*instring & 0x80)
1420 			return 1;
1421 	return 0;
1422 }
1423 
1424 /**
1425  * base64_encode:
1426  * @outstring: buffer for the converted string, will be NUL terminated
1427  * @outlen: size of @outstring
1428  * @instring: buffer with the string to be converted
1429  * @inlen: length of @instring
1430  *
1431  * Returns: the length of the converted string
1432  *
1433  * Converts a generic string to base64 encoding.
1434  * @outlen needs to be at least 4 / 3 times + 1 bigger than @inlen to hold
1435  * the converted string and the terminator.
1436  */
base64_encode(char * outstring,int outlen,const char * instring,int inlen)1437 int base64_encode(char *outstring, int outlen, const char *instring, int inlen)
1438 {
1439 	const char *pin;
1440 	char *pout;
1441 	char *outtemp = NULL;
1442 	int inleft, outleft;
1443 
1444 	pout = outstring;
1445 	inleft = inlen;
1446 	outleft = outlen;
1447 	pin = instring;
1448 
1449 	/* This is in case someone passes a buffer not appropriate for outstring */
1450 	while (outleft > 3 && inleft > 0) {
1451 		int a, b, c;
1452 		unsigned int i1, i2, i3, i4;
1453 
1454 		a = *pin++;
1455 		b = (inleft > 1) ? *(pin++) : 0;
1456 		c = (inleft > 2) ? *(pin++) : 0;
1457 
1458 		/* calculate the indexes */
1459 		i1 = (a & 0xfc) >> 2;
1460 		*(pout++) = base64_alphabet[i1];
1461 
1462 		i2 = ((a & 0x03) << 4) | ((b & 0xf0) >> 4);
1463 		*(pout++) = base64_alphabet[i2];
1464 
1465 		inleft--;
1466 
1467 		i3 = ((b & 0x0f) << 2) | ((c & 0xc0) >> 6);
1468 		if (!inleft) {
1469 			*(pout++) = '=';
1470 		} else {
1471 			*(pout++) = base64_alphabet[i3];
1472 			inleft--;
1473 		}
1474 
1475 		i4 = c & 0x3f;
1476 		if (!inleft)
1477 			*(pout++) = '=';
1478 		else {
1479 			*(pout++) = base64_alphabet[i4];
1480 			inleft--;
1481 		}
1482 
1483 		outleft -= 4;
1484 	}
1485 
1486 	/* terminate the output string */
1487 	*pout = 0;
1488 
1489 	free(outtemp);
1490 
1491 	return pout - outstring;
1492 }
1493 
1494 /**
1495  * base64_decode:
1496  * @dest: buffer for the converted string, will be NUL terminated
1497  * @destlen: size of @dest
1498  * @source: buffer with the string to be converted
1499  * @inlen: length of @source
1500  *
1501  * Returns: the number of bytes used in @dest
1502  *
1503  * Converts a generic string from base 64 encoding.
1504  * @destlen needs to be at least 3 / 4 + 1 of @inlen to hold the converted
1505  * string and the terminator.
1506  */
base64_decode(char * dest,int destlen,const char * source,int inlen)1507 int base64_decode(char *dest, int destlen, const char *source, int inlen)
1508 {
1509 	int dtable[256];
1510 	int i, c;
1511 	int dpos = 0;
1512 	int spos = 0;
1513 
1514 	for (i = 0; i < 255; i++) {
1515 		dtable[i] = 0x80;
1516 	}
1517 	for (i = 'A'; i <= 'Z'; i++) {
1518 		dtable[i] = 0 + (i - 'A');
1519 	}
1520 	for (i = 'a'; i <= 'z'; i++) {
1521 		dtable[i] = 26 + (i - 'a');
1522 	}
1523 	for (i = '0'; i <= '9'; i++) {
1524 		dtable[i] = 52 + (i - '0');
1525 	}
1526 	dtable['+'] = 62;
1527 	dtable['/'] = 63;
1528 	dtable['='] = 0;
1529 
1530 	/* CONSTANT CONDITION */
1531 	while (1) {
1532 		int a[4], b[4], o[3];
1533 
1534 		for (i = 0; i < 4; i++) {
1535 			if (spos >= inlen || dpos >= destlen) {
1536 				goto endloop;
1537 			}
1538 			c = source[spos++];
1539 
1540 			if (c == 0) {
1541 				if (i > 0) {
1542 					goto endloop;
1543 				}
1544 				goto endloop;
1545 			}
1546 			if (dtable[c] & 0x80) {
1547 				/* Ignoring errors: discard invalid character. */
1548 				i--;
1549 				continue;
1550 			}
1551 			a[i] = (int) c;
1552 			b[i] = (int) dtable[c];
1553 		}
1554 		o[0] = (b[0] << 2) | (b[1] >> 4);
1555 		o[1] = (b[1] << 4) | (b[2] >> 2);
1556 		o[2] = (b[2] << 6) | b[3];
1557 		i = a[2] == '=' ? 1 : (a[3] == '=' ? 2 : 3);
1558 		if (i >= 1) dest[dpos++] = o[0];
1559 		if (i >= 2) dest[dpos++] = o[1];
1560 		if (i >= 3) dest[dpos++] = o[2];
1561 		dest[dpos] = 0;
1562 		if (i < 3) {
1563 			goto endloop;
1564 		}
1565 	}
1566 endloop:
1567 	return dpos;
1568 }
1569 
1570 /**
1571  * utf8_base64_encode:
1572  * @dest: buffer for the converted string, NUL terminated
1573  * @destlen: size of @dest
1574  * @in: buffer with the string to be converted
1575  * @inlen: length of @in
1576  *
1577  * Returns: the number of bytes used by the converted string
1578  *
1579  * Converts a string from application default encoding to UTF-8 then to base 64.
1580  * @dest must be valid and big enough to hold the converted string.
1581  */
utf8_base64_encode(char * dest,int destlen,const char * in,int inlen)1582 int utf8_base64_encode(char *dest, int destlen, const char *in, int inlen)
1583 {
1584 	char *aux;
1585 	int retval;
1586 
1587 	aux = calloc(destlen + 1, sizeof(char));
1588 
1589 	retval = utf8_encode(aux, destlen, in, inlen);
1590 	if (retval >= 0)
1591 		retval = base64_encode(dest, destlen, aux, retval);
1592 
1593 	free(aux);
1594 	return retval;
1595 }
1596 
1597 /**
1598  * utf8_base64_decode:
1599  * @dest: buffer for the converted string
1600  * @destlen: size of @dest
1601  * @in: buffer with the string to be converted
1602  * @inlen: length of @in
1603  *
1604  * Returns: the number of bytes used by the converted string
1605  *
1606  * Converts a string from base 64 to UTF-8 then to application default encoding.
1607  * @dest must be valid and big enough to hold the converted string.
1608  */
utf8_base64_decode(char * dest,int destlen,const char * in,int inlen)1609 int utf8_base64_decode(char *dest, int destlen, const char *in, int inlen)
1610 {
1611 	char *aux;
1612 	int retval;
1613 
1614 	aux = calloc(destlen + 1, sizeof(char));
1615 
1616 	retval = base64_decode(aux, destlen, in, inlen);
1617 	if (retval >= 0)
1618 		retval = utf8_decode(dest, destlen, aux, retval);
1619 
1620 	free(aux);
1621 	return retval;
1622 }
1623 
1624 /**
1625  * add_slashes:
1626  * @dest: buffer for the converted string, NUL terminated
1627  * @src: buffer with the string to be converted
1628  * @maxlen: size of @dest, must be 2 * @len + 1 in the worst case
1629  * @len: length of @src
1630  *
1631  * Returns: the number of bytes used by the converted string
1632  *
1633  * Escapes the following characters (according to rfc 2426):
1634  * '\n', '\r', ';', ',', '\'.
1635  */
add_slashes(char * dest,char * src,int maxlen,int len)1636 int add_slashes(char *dest, char *src, int maxlen, int len)
1637 {
1638 	int i, j;
1639 
1640 	for (i = 0, j = 0; i < len && j < maxlen; i++, j++) {
1641 		switch (src[i]) {
1642 		case '\n':
1643 			dest[j++] = '\\';
1644 			dest[j] = 'n';
1645 			break;
1646 		case '\r':
1647 			dest[j++] = '\\';
1648 			dest[j] = 'r';
1649 			break;
1650 		case '\\':
1651 		case ';':
1652 		case ',':
1653 			dest[j++] = '\\';
1654 		default:
1655 			dest[j] = src[i];
1656 			break;
1657 		}
1658 	}
1659 	dest[j] = 0;
1660 	return j;
1661 }
1662 
1663 /**
1664  * strip_slashes:
1665  * @dest: buffer for the converted string, NUL terminated
1666  * @src: buffer with the string to be converted
1667  * @maxlen: size of @dest, must be @len + 1 in the worst case
1668  * @len: length of @src
1669  *
1670  * Returns: the number of bytes used by the converted string
1671  *
1672  * Unescapes the caracters escaped by add_slashes().
1673  */
strip_slashes(char * dest,const char * src,int maxlen,int len)1674 int strip_slashes(char *dest, const char *src, int maxlen, int len)
1675 {
1676 	int i, j, slash_state = 0;
1677 
1678 	for (i = 0, j = 0; i < len && j < maxlen; i++) {
1679 		switch (src[i]) {
1680 		case ';':
1681 		case ',':
1682 			if (slash_state) {
1683 				slash_state = 0;
1684 			}
1685 			dest[j++] = src[i];
1686 			break;
1687 		case '\\':
1688 			if (slash_state) {
1689 				dest[j++] = src[i];
1690 				slash_state = 0;
1691 			} else {
1692 				slash_state = 1;
1693 			}
1694 			break;
1695 		case 'n':
1696 			if (slash_state) {
1697 				dest[j++] = '\n';
1698 				slash_state = 0;
1699 			} else {
1700 				dest[j++] = src[i];
1701 			}
1702 			break;
1703 		case 'r':
1704 			if (slash_state) {
1705 				dest[j++] = '\r';
1706 				slash_state = 0;
1707 			} else {
1708 				dest[j++] = src[i];
1709 			}
1710 			break;
1711 		default:
1712 			if (slash_state) {
1713 				dest[j++] = '\\';
1714 				slash_state = 0;
1715 			}
1716 			dest[j++] = src[i];
1717 			break;
1718 		}
1719 	}
1720 	dest[j] = 0;
1721 	return j;
1722 }
1723