1 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
2 // use this file except in compliance with the License. You may obtain a copy of
3 // the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10 // License for the specific language governing permissions and limitations under
11 // the License.
12
13 #include <jsapi.h>
14 #include "config.h"
15
16 static int
enc_char(uint8 * utf8Buffer,uint32 ucs4Char)17 enc_char(uint8 *utf8Buffer, uint32 ucs4Char)
18 {
19 int utf8Length = 1;
20
21 if (ucs4Char < 0x80)
22 {
23 *utf8Buffer = (uint8)ucs4Char;
24 }
25 else
26 {
27 int i;
28 uint32 a = ucs4Char >> 11;
29 utf8Length = 2;
30 while(a)
31 {
32 a >>= 5;
33 utf8Length++;
34 }
35 i = utf8Length;
36 while(--i)
37 {
38 utf8Buffer[i] = (uint8)((ucs4Char & 0x3F) | 0x80);
39 ucs4Char >>= 6;
40 }
41 *utf8Buffer = (uint8)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
42 }
43
44 return utf8Length;
45 }
46
47 static JSBool
enc_charbuf(const jschar * src,size_t srclen,char * dst,size_t * dstlenp)48 enc_charbuf(const jschar* src, size_t srclen, char* dst, size_t* dstlenp)
49 {
50 size_t i;
51 size_t utf8Len;
52 size_t dstlen = *dstlenp;
53 size_t origDstlen = dstlen;
54 jschar c;
55 jschar c2;
56 uint32 v;
57 uint8 utf8buf[6];
58
59 if(!dst)
60 {
61 dstlen = origDstlen = (size_t) -1;
62 }
63
64 while(srclen)
65 {
66 c = *src++;
67 srclen--;
68
69 if(c <= 0xD7FF || c >= 0xE000)
70 {
71 v = (uint32) c;
72 }
73 else if(c >= 0xD800 && c <= 0xDBFF)
74 {
75 if(srclen < 1) goto buffer_too_small;
76 c2 = *src++;
77 srclen--;
78 if(c2 >= 0xDC00 && c2 <= 0xDFFF)
79 {
80 v = (uint32) (((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000);
81 }
82 else
83 {
84 // Invalid second half of surrogate pair
85 v = (uint32) 0xFFFD;
86 // Undo our character advancement
87 src--;
88 srclen++;
89 }
90 }
91 else
92 {
93 // Invalid first half surrogate pair
94 v = (uint32) 0xFFFD;
95 }
96
97 if(v < 0x0080)
98 {
99 /* no encoding necessary - performance hack */
100 if(!dstlen) goto buffer_too_small;
101 if(dst) *dst++ = (char) v;
102 utf8Len = 1;
103 }
104 else
105 {
106 utf8Len = enc_char(utf8buf, v);
107 if(utf8Len > dstlen) goto buffer_too_small;
108 if(dst)
109 {
110 for (i = 0; i < utf8Len; i++)
111 {
112 *dst++ = (char) utf8buf[i];
113 }
114 }
115 }
116 dstlen -= utf8Len;
117 }
118
119 *dstlenp = (origDstlen - dstlen);
120 return JS_TRUE;
121
122 buffer_too_small:
123 *dstlenp = (origDstlen - dstlen);
124 return JS_FALSE;
125 }
126
127 char*
enc_string(JSContext * cx,jsval arg,size_t * buflen)128 enc_string(JSContext* cx, jsval arg, size_t* buflen)
129 {
130 JSString* str = NULL;
131 const jschar* src = NULL;
132 char* bytes = NULL;
133 size_t srclen = 0;
134 size_t byteslen = 0;
135
136 str = JS_ValueToString(cx, arg);
137 if(!str) goto error;
138
139 #ifdef HAVE_JS_GET_STRING_CHARS_AND_LENGTH
140 src = JS_GetStringCharsAndLength(cx, str, &srclen);
141 #else
142 src = JS_GetStringChars(str);
143 srclen = JS_GetStringLength(str);
144 #endif
145
146 if(!enc_charbuf(src, srclen, NULL, &byteslen)) goto error;
147
148 bytes = JS_malloc(cx, (byteslen) + 1);
149 bytes[byteslen] = 0;
150
151 if(!enc_charbuf(src, srclen, bytes, &byteslen)) goto error;
152
153 if(buflen) *buflen = byteslen;
154 goto success;
155
156 error:
157 if(bytes != NULL) JS_free(cx, bytes);
158 bytes = NULL;
159
160 success:
161 return bytes;
162 }
163
164 static uint32
dec_char(const uint8 * utf8Buffer,int utf8Length)165 dec_char(const uint8 *utf8Buffer, int utf8Length)
166 {
167 uint32 ucs4Char;
168 uint32 minucs4Char;
169
170 /* from Unicode 3.1, non-shortest form is illegal */
171 static const uint32 minucs4Table[] = {
172 0x00000080, 0x00000800, 0x0001000, 0x0020000, 0x0400000
173 };
174
175 if (utf8Length == 1)
176 {
177 ucs4Char = *utf8Buffer;
178 }
179 else
180 {
181 ucs4Char = *utf8Buffer++ & ((1<<(7-utf8Length))-1);
182 minucs4Char = minucs4Table[utf8Length-2];
183 while(--utf8Length)
184 {
185 ucs4Char = ucs4Char<<6 | (*utf8Buffer++ & 0x3F);
186 }
187 if(ucs4Char < minucs4Char || ucs4Char == 0xFFFE || ucs4Char == 0xFFFF)
188 {
189 ucs4Char = 0xFFFD;
190 }
191 }
192
193 return ucs4Char;
194 }
195
196 static JSBool
dec_charbuf(const char * src,size_t srclen,jschar * dst,size_t * dstlenp)197 dec_charbuf(const char *src, size_t srclen, jschar *dst, size_t *dstlenp)
198 {
199 uint32 v;
200 size_t offset = 0;
201 size_t j;
202 size_t n;
203 size_t dstlen = *dstlenp;
204 size_t origDstlen = dstlen;
205
206 if(!dst) dstlen = origDstlen = (size_t) -1;
207
208 while(srclen)
209 {
210 v = (uint8) *src;
211 n = 1;
212
213 if(v & 0x80)
214 {
215 while(v & (0x80 >> n))
216 {
217 n++;
218 }
219
220 if(n > srclen) goto buffer_too_small;
221 if(n == 1 || n > 6) goto bad_character;
222
223 for(j = 1; j < n; j++)
224 {
225 if((src[j] & 0xC0) != 0x80) goto bad_character;
226 }
227
228 v = dec_char((const uint8 *) src, n);
229 if(v >= 0x10000)
230 {
231 v -= 0x10000;
232
233 if(v > 0xFFFFF || dstlen < 2)
234 {
235 *dstlenp = (origDstlen - dstlen);
236 return JS_FALSE;
237 }
238
239 if(dstlen < 2) goto buffer_too_small;
240
241 if(dst)
242 {
243 *dst++ = (jschar)((v >> 10) + 0xD800);
244 v = (jschar)((v & 0x3FF) + 0xDC00);
245 }
246 dstlen--;
247 }
248 }
249
250 if(!dstlen) goto buffer_too_small;
251 if(dst) *dst++ = (jschar) v;
252
253 dstlen--;
254 offset += n;
255 src += n;
256 srclen -= n;
257 }
258
259 *dstlenp = (origDstlen - dstlen);
260 return JS_TRUE;
261
262 bad_character:
263 *dstlenp = (origDstlen - dstlen);
264 return JS_FALSE;
265
266 buffer_too_small:
267 *dstlenp = (origDstlen - dstlen);
268 return JS_FALSE;
269 }
270
271 JSString*
dec_string(JSContext * cx,const char * bytes,size_t byteslen)272 dec_string(JSContext* cx, const char* bytes, size_t byteslen)
273 {
274 JSString* str = NULL;
275 jschar* chars = NULL;
276 size_t charslen;
277
278 if(!dec_charbuf(bytes, byteslen, NULL, &charslen)) goto error;
279
280 chars = JS_malloc(cx, (charslen + 1) * sizeof(jschar));
281 if(!chars) return NULL;
282 chars[charslen] = 0;
283
284 if(!dec_charbuf(bytes, byteslen, chars, &charslen)) goto error;
285
286 str = JS_NewUCString(cx, chars, charslen - 1);
287 if(!str) goto error;
288
289 goto success;
290
291 error:
292 if(chars != NULL) JS_free(cx, chars);
293 str = NULL;
294
295 success:
296 return str;
297 }
298