1 /* FreeTDS - Library of routines accessing Sybase and Microsoft databases
2  * Copyright (C) 2010 Frediano Ziglio
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 02111-1307, USA.
18  */
19 #undef NDEBUG
20 #define TDS_DONT_DEFINE_DEFAULT_FUNCTIONS
21 #include "common.h"
22 
23 #include <ctype.h>
24 
25 #include <common/test_assert.h>
26 
27 int utf8_max_len = 0;
28 
29 int
get_unichar(const char ** psrc)30 get_unichar(const char **psrc)
31 {
32 	const char *src = *psrc;
33 	int n;
34 
35 	if (!*src) return -1;
36 
37 	if (src[0] == '&' && src[1] == '#') {
38 		char *end;
39 		int radix = 10;
40 
41 		if (toupper(src[2]) == 'X') {
42 			radix = 16;
43 			++src;
44 		}
45 		n = strtol(src+2, &end, radix);
46 		assert(*end == ';' && n > 0 && n < 0x10000);
47 		src = end + 1;
48 	} else {
49 		n = (unsigned char) *src++;
50 	}
51 	*psrc = src;
52 	return n;
53 }
54 
55 char *
to_utf8(const char * src,char * dest)56 to_utf8(const char *src, char *dest)
57 {
58 	unsigned char *p = (unsigned char *) dest;
59 	int len = 0, n;
60 
61 	while ((n=get_unichar(&src)) > 0) {
62 		if (n >= 0x2000) {
63 			*p++ = 0xe0 | (n >> 12);
64 			*p++ = 0x80 | ((n >> 6) & 0x3f);
65 			*p++ = 0x80 | (n & 0x3f);
66 		} else if (n >= 0x80) {
67 			*p++ = 0xc0 | (n >> 6);
68 			*p++ = 0x80 | (n & 0x3f);
69 		} else {
70 			*p++ = (unsigned char) n;
71 		}
72 		++len;
73 	}
74 	if (len > utf8_max_len)
75 		utf8_max_len = len;
76 	*p = 0;
77 	return dest;
78 }
79 
80