1 /* utf8conv.c - Converter for Utf-8.
2 
3    Copyright (C) 1999 Tom Tromey
4 
5    The Gnome Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Library General Public License as
7    published by the Free Software Foundation; either version 2 of the
8    License, or (at your option) any later version.
9 
10    The Gnome Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Library General Public License for more details.
14 
15    You should have received a copy of the GNU Library General Public
16    License along with the Gnome Library; see the file COPYING.LIB.  If not,
17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18    Boston, MA 02111-1307, USA.  */
19 
20 
21 #include <config.h>
22 
23 #include <stdlib.h>
24 
25 #include "unicode.h"
26 #include "convert.h"
27 #include "utf8.h"
28 
29 static int
utf8_std_init(void ** privp)30 utf8_std_init (void **privp)
31 {
32   *privp = (void *) 1;
33   return 1;
34 }
35 
36 static int
utf8_java_init(void ** privp)37 utf8_java_init (void **privp)
38 {
39   *privp = (void *) 0;
40   return 1;
41 }
42 
43 static enum unicode_read_result
utf8_read(void * arg,const char ** inbuf,size_t * inbytesleft,unicode_char_t ** outbuf,size_t * outcharsleft)44 utf8_read (void *arg,
45 	   const char **inbuf,  size_t *inbytesleft,
46 	   unicode_char_t **outbuf, size_t *outcharsleft)
47 {
48   while (*inbytesleft > 0 && *outcharsleft > 0)
49     {
50       int i, mask = 0, len;
51       unsigned char c = (unsigned char) **inbuf;
52 
53       UTF8_COMPUTE (c, mask, len);
54       if (len == -1)
55 	return unicode_read_error;
56 
57       if (*inbytesleft < (unsigned int) len)
58 	return unicode_read_incomplete;
59 
60       UTF8_GET (**outbuf, *inbuf, i, mask, len);
61       if (**outbuf == (unicode_char_t) -1)
62 	return unicode_read_error;
63 
64       *inbuf += len;
65       *inbytesleft -= len;
66 
67       ++*outbuf;
68       --*outcharsleft;
69     }
70 
71   return unicode_read_ok;
72 }
73 
74 static enum unicode_write_result
utf8_write(void * arg,unicode_char_t ** inbuf,size_t * incharsleft,char ** outbuf,size_t * outbytesleft)75 utf8_write (void *arg,
76 	    unicode_char_t **inbuf,  size_t *incharsleft,
77 	    char **outbuf, size_t *outbytesleft)
78 {
79   int is_std = (int) arg;
80 
81   while (*incharsleft > 0 && *outbytesleft > 0)
82     {
83       size_t len = 0;
84       int first;
85       int i;
86 
87       unicode_char_t c = **inbuf;
88 
89       if (c < 0x80 && (is_std || c > 0))
90 	{
91 	  first = 0;
92 	  len = 1;
93 	}
94       else if (c < 0x800)
95 	{
96 	  first = 0xc0;
97 	  len = 2;
98 	}
99       else if (c < 0x10000)
100 	{
101 	  first = 0xe0;
102 	  len = 3;
103 	}
104       else if (c < 0x200000)
105 	{
106 	  first = 0xf0;
107 	  len = 4;
108 	}
109       else if (c < 0x4000000)
110 	{
111 	  first = 0xf8;
112 	  len = 5;
113 	}
114       else
115 	{
116 	  first = 0xfc;
117 	  len = 6;
118 	}
119 
120       if (*outbytesleft < len)
121 	return unicode_write_more_room;
122 
123       for (i = len - 1; i > 0; --i)
124 	{
125 	  (*outbuf)[i] = (c & 0x3f) | 0x80;
126 	  c >>= 6;
127 	}
128       (*outbuf)[0] = c | first;
129 
130       *outbuf += len;
131       *outbytesleft -= len;
132 
133       ++*inbuf;
134       --*incharsleft;
135     }
136 
137   return unicode_write_ok;
138 }
139 
140 /* The encoding descriptor for UTF8.  */
141 static char *un[] = { "UTF-8", NULL };
142 unicode_encoding_t unicode_utf8_encoding =
143 {
144   un,
145   utf8_std_init,		/* Init.  */
146   NULL,				/* Destroy.  */
147   NULL,				/* Reset.  */
148   utf8_read,
149   utf8_write,
150   NULL
151 };
152 
153 /* The encoding descriptor for Java-style UTF8.  */
154 static char *jn[] = { "Java-Utf-8", NULL };
155 unicode_encoding_t unicode_java_utf8_encoding =
156 {
157   jn,
158   utf8_java_init,		/* Init.  */
159   NULL,				/* Destroy.  */
160   NULL,				/* Reset.  */
161   utf8_read,
162   utf8_write,
163   NULL
164 };
165