1 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
2 // use this file except in compliance with the License. You may obtain a copy of
3 // the License at
4 //
5 //   http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10 // License for the specific language governing permissions and limitations under
11 // the License.
12 
13 #include <jsapi.h>
14 #include "config.h"
15 
16 static int
enc_char(uint8 * utf8Buffer,uint32 ucs4Char)17 enc_char(uint8 *utf8Buffer, uint32 ucs4Char)
18 {
19     int utf8Length = 1;
20 
21     if (ucs4Char < 0x80)
22     {
23         *utf8Buffer = (uint8)ucs4Char;
24     }
25     else
26     {
27         int i;
28         uint32 a = ucs4Char >> 11;
29         utf8Length = 2;
30         while(a)
31         {
32             a >>= 5;
33             utf8Length++;
34         }
35         i = utf8Length;
36         while(--i)
37         {
38             utf8Buffer[i] = (uint8)((ucs4Char & 0x3F) | 0x80);
39             ucs4Char >>= 6;
40         }
41         *utf8Buffer = (uint8)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
42     }
43 
44     return utf8Length;
45 }
46 
47 static JSBool
enc_charbuf(const jschar * src,size_t srclen,char * dst,size_t * dstlenp)48 enc_charbuf(const jschar* src, size_t srclen, char* dst, size_t* dstlenp)
49 {
50     size_t i;
51     size_t utf8Len;
52     size_t dstlen = *dstlenp;
53     size_t origDstlen = dstlen;
54     jschar c;
55     jschar c2;
56     uint32 v;
57     uint8 utf8buf[6];
58 
59     if(!dst)
60     {
61         dstlen = origDstlen = (size_t) -1;
62     }
63 
64     while(srclen)
65     {
66         c = *src++;
67         srclen--;
68 
69         if(c <= 0xD7FF || c >= 0xE000)
70         {
71             v = (uint32) c;
72         }
73         else if(c >= 0xD800 && c <= 0xDBFF)
74         {
75             if(srclen < 1) goto buffer_too_small;
76             c2 = *src++;
77             srclen--;
78             if(c2 >= 0xDC00 && c2 <= 0xDFFF)
79             {
80                 v = (uint32) (((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000);
81             }
82             else
83             {
84                 // Invalid second half of surrogate pair
85                 v = (uint32) 0xFFFD;
86                 // Undo our character advancement
87                 src--;
88                 srclen++;
89             }
90         }
91         else
92         {
93             // Invalid first half surrogate pair
94             v = (uint32) 0xFFFD;
95         }
96 
97         if(v < 0x0080)
98         {
99             /* no encoding necessary - performance hack */
100             if(!dstlen) goto buffer_too_small;
101             if(dst) *dst++ = (char) v;
102             utf8Len = 1;
103         }
104         else
105         {
106             utf8Len = enc_char(utf8buf, v);
107             if(utf8Len > dstlen) goto buffer_too_small;
108             if(dst)
109             {
110                 for (i = 0; i < utf8Len; i++)
111                 {
112                     *dst++ = (char) utf8buf[i];
113                 }
114             }
115         }
116         dstlen -= utf8Len;
117     }
118 
119     *dstlenp = (origDstlen - dstlen);
120     return JS_TRUE;
121 
122 buffer_too_small:
123     *dstlenp = (origDstlen - dstlen);
124     return JS_FALSE;
125 }
126 
127 char*
enc_string(JSContext * cx,jsval arg,size_t * buflen)128 enc_string(JSContext* cx, jsval arg, size_t* buflen)
129 {
130     JSString* str = NULL;
131     const jschar* src = NULL;
132     char* bytes = NULL;
133     size_t srclen = 0;
134     size_t byteslen = 0;
135 
136     str = JS_ValueToString(cx, arg);
137     if(!str) goto error;
138 
139 #ifdef HAVE_JS_GET_STRING_CHARS_AND_LENGTH
140     src = JS_GetStringCharsAndLength(cx, str, &srclen);
141 #else
142     src = JS_GetStringChars(str);
143     srclen = JS_GetStringLength(str);
144 #endif
145 
146     if(!enc_charbuf(src, srclen, NULL, &byteslen)) goto error;
147 
148     bytes = JS_malloc(cx, (byteslen) + 1);
149     bytes[byteslen] = 0;
150 
151     if(!enc_charbuf(src, srclen, bytes, &byteslen)) goto error;
152 
153     if(buflen) *buflen = byteslen;
154     goto success;
155 
156 error:
157     if(bytes != NULL) JS_free(cx, bytes);
158     bytes = NULL;
159 
160 success:
161     return bytes;
162 }
163 
164 static uint32
dec_char(const uint8 * utf8Buffer,int utf8Length)165 dec_char(const uint8 *utf8Buffer, int utf8Length)
166 {
167     uint32 ucs4Char;
168     uint32 minucs4Char;
169 
170     /* from Unicode 3.1, non-shortest form is illegal */
171     static const uint32 minucs4Table[] = {
172         0x00000080, 0x00000800, 0x0001000, 0x0020000, 0x0400000
173     };
174 
175     if (utf8Length == 1)
176     {
177         ucs4Char = *utf8Buffer;
178     }
179     else
180     {
181         ucs4Char = *utf8Buffer++ & ((1<<(7-utf8Length))-1);
182         minucs4Char = minucs4Table[utf8Length-2];
183         while(--utf8Length)
184         {
185             ucs4Char = ucs4Char<<6 | (*utf8Buffer++ & 0x3F);
186         }
187         if(ucs4Char < minucs4Char || ucs4Char == 0xFFFE || ucs4Char == 0xFFFF)
188         {
189             ucs4Char = 0xFFFD;
190         }
191     }
192 
193     return ucs4Char;
194 }
195 
196 static JSBool
dec_charbuf(const char * src,size_t srclen,jschar * dst,size_t * dstlenp)197 dec_charbuf(const char *src, size_t srclen, jschar *dst, size_t *dstlenp)
198 {
199     uint32 v;
200     size_t offset = 0;
201     size_t j;
202     size_t n;
203     size_t dstlen = *dstlenp;
204     size_t origDstlen = dstlen;
205 
206     if(!dst) dstlen = origDstlen = (size_t) -1;
207 
208     while(srclen)
209     {
210         v = (uint8) *src;
211         n = 1;
212 
213         if(v & 0x80)
214         {
215             while(v & (0x80 >> n))
216             {
217                 n++;
218             }
219 
220             if(n > srclen) goto buffer_too_small;
221             if(n == 1 || n > 6) goto bad_character;
222 
223             for(j = 1; j < n; j++)
224             {
225                 if((src[j] & 0xC0) != 0x80) goto bad_character;
226             }
227 
228             v = dec_char((const uint8 *) src, n);
229             if(v >= 0x10000)
230             {
231                 v -= 0x10000;
232 
233                 if(v > 0xFFFFF || dstlen < 2)
234                 {
235                     *dstlenp = (origDstlen - dstlen);
236                     return JS_FALSE;
237                 }
238 
239                 if(dstlen < 2) goto buffer_too_small;
240 
241                 if(dst)
242                 {
243                     *dst++ = (jschar)((v >> 10) + 0xD800);
244                     v = (jschar)((v & 0x3FF) + 0xDC00);
245                 }
246                 dstlen--;
247             }
248         }
249 
250         if(!dstlen) goto buffer_too_small;
251         if(dst) *dst++ = (jschar) v;
252 
253         dstlen--;
254         offset += n;
255         src += n;
256         srclen -= n;
257     }
258 
259     *dstlenp = (origDstlen - dstlen);
260     return JS_TRUE;
261 
262 bad_character:
263     *dstlenp = (origDstlen - dstlen);
264     return JS_FALSE;
265 
266 buffer_too_small:
267     *dstlenp = (origDstlen - dstlen);
268     return JS_FALSE;
269 }
270 
271 JSString*
dec_string(JSContext * cx,const char * bytes,size_t byteslen)272 dec_string(JSContext* cx, const char* bytes, size_t byteslen)
273 {
274     JSString* str = NULL;
275     jschar* chars = NULL;
276     size_t charslen;
277 
278     if(!dec_charbuf(bytes, byteslen, NULL, &charslen)) goto error;
279 
280     chars = JS_malloc(cx, (charslen + 1) * sizeof(jschar));
281     if(!chars) return NULL;
282     chars[charslen] = 0;
283 
284     if(!dec_charbuf(bytes, byteslen, chars, &charslen)) goto error;
285 
286     str = JS_NewUCString(cx, chars, charslen - 1);
287     if(!str) goto error;
288 
289     goto success;
290 
291 error:
292     if(chars != NULL) JS_free(cx, chars);
293     str = NULL;
294 
295 success:
296     return str;
297 }
298