1 /* utf8conv.c - Converter for Utf-8.
2
3 Copyright (C) 1999 Tom Tromey
4
5 The Gnome Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The Gnome Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the Gnome Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20
21 #include <config.h>
22
23 #include <stdlib.h>
24
25 #include "unicode.h"
26 #include "convert.h"
27 #include "utf8.h"
28
29 static int
utf8_std_init(void ** privp)30 utf8_std_init (void **privp)
31 {
32 *privp = (void *) 1;
33 return 1;
34 }
35
36 static int
utf8_java_init(void ** privp)37 utf8_java_init (void **privp)
38 {
39 *privp = (void *) 0;
40 return 1;
41 }
42
43 static enum unicode_read_result
utf8_read(void * arg,const char ** inbuf,size_t * inbytesleft,unicode_char_t ** outbuf,size_t * outcharsleft)44 utf8_read (void *arg,
45 const char **inbuf, size_t *inbytesleft,
46 unicode_char_t **outbuf, size_t *outcharsleft)
47 {
48 while (*inbytesleft > 0 && *outcharsleft > 0)
49 {
50 int i, mask = 0, len;
51 unsigned char c = (unsigned char) **inbuf;
52
53 UTF8_COMPUTE (c, mask, len);
54 if (len == -1)
55 return unicode_read_error;
56
57 if (*inbytesleft < (unsigned int) len)
58 return unicode_read_incomplete;
59
60 UTF8_GET (**outbuf, *inbuf, i, mask, len);
61 if (**outbuf == (unicode_char_t) -1)
62 return unicode_read_error;
63
64 *inbuf += len;
65 *inbytesleft -= len;
66
67 ++*outbuf;
68 --*outcharsleft;
69 }
70
71 return unicode_read_ok;
72 }
73
74 static enum unicode_write_result
utf8_write(void * arg,unicode_char_t ** inbuf,size_t * incharsleft,char ** outbuf,size_t * outbytesleft)75 utf8_write (void *arg,
76 unicode_char_t **inbuf, size_t *incharsleft,
77 char **outbuf, size_t *outbytesleft)
78 {
79 int is_std = (int) arg;
80
81 while (*incharsleft > 0 && *outbytesleft > 0)
82 {
83 size_t len = 0;
84 int first;
85 int i;
86
87 unicode_char_t c = **inbuf;
88
89 if (c < 0x80 && (is_std || c > 0))
90 {
91 first = 0;
92 len = 1;
93 }
94 else if (c < 0x800)
95 {
96 first = 0xc0;
97 len = 2;
98 }
99 else if (c < 0x10000)
100 {
101 first = 0xe0;
102 len = 3;
103 }
104 else if (c < 0x200000)
105 {
106 first = 0xf0;
107 len = 4;
108 }
109 else if (c < 0x4000000)
110 {
111 first = 0xf8;
112 len = 5;
113 }
114 else
115 {
116 first = 0xfc;
117 len = 6;
118 }
119
120 if (*outbytesleft < len)
121 return unicode_write_more_room;
122
123 for (i = len - 1; i > 0; --i)
124 {
125 (*outbuf)[i] = (c & 0x3f) | 0x80;
126 c >>= 6;
127 }
128 (*outbuf)[0] = c | first;
129
130 *outbuf += len;
131 *outbytesleft -= len;
132
133 ++*inbuf;
134 --*incharsleft;
135 }
136
137 return unicode_write_ok;
138 }
139
140 /* The encoding descriptor for UTF8. */
141 static char *un[] = { "UTF-8", NULL };
142 unicode_encoding_t unicode_utf8_encoding =
143 {
144 un,
145 utf8_std_init, /* Init. */
146 NULL, /* Destroy. */
147 NULL, /* Reset. */
148 utf8_read,
149 utf8_write,
150 NULL
151 };
152
153 /* The encoding descriptor for Java-style UTF8. */
154 static char *jn[] = { "Java-Utf-8", NULL };
155 unicode_encoding_t unicode_java_utf8_encoding =
156 {
157 jn,
158 utf8_java_init, /* Init. */
159 NULL, /* Destroy. */
160 NULL, /* Reset. */
161 utf8_read,
162 utf8_write,
163 NULL
164 };
165