1 /*
2 * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io>
3 *
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include "yajl_encode.h"
18
19 #include <assert.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <stdio.h>
23
CharToHex(unsigned char c,char * hexBuf)24 static void CharToHex(unsigned char c, char * hexBuf)
25 {
26 const char * hexchar = "0123456789ABCDEF";
27 hexBuf[0] = hexchar[c >> 4];
28 hexBuf[1] = hexchar[c & 0x0F];
29 }
30
31 void
yajl_string_encode(const yajl_print_t print,void * ctx,const unsigned char * str,size_t len,int escape_solidus)32 yajl_string_encode(const yajl_print_t print,
33 void * ctx,
34 const unsigned char * str,
35 size_t len,
36 int escape_solidus)
37 {
38 size_t beg = 0;
39 size_t end = 0;
40 char hexBuf[7];
41 hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
42 hexBuf[6] = 0;
43
44 while (end < len) {
45 const char * escaped = NULL;
46 switch (str[end]) {
47 case '\r': escaped = "\\r"; break;
48 case '\n': escaped = "\\n"; break;
49 case '\\': escaped = "\\\\"; break;
50 /* it is not required to escape a solidus in JSON:
51 * read sec. 2.5: http://www.ietf.org/rfc/rfc4627.txt
52 * specifically, this production from the grammar:
53 * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
54 */
55 case '/': if (escape_solidus) escaped = "\\/"; break;
56 case '"': escaped = "\\\""; break;
57 case '\f': escaped = "\\f"; break;
58 case '\b': escaped = "\\b"; break;
59 case '\t': escaped = "\\t"; break;
60 default:
61 if ((unsigned char) str[end] < 32) {
62 CharToHex(str[end], hexBuf + 4);
63 escaped = hexBuf;
64 }
65 break;
66 }
67 if (escaped != NULL) {
68 print(ctx, (const char *) (str + beg), end - beg);
69 print(ctx, escaped, (unsigned int)strlen(escaped));
70 beg = ++end;
71 } else {
72 ++end;
73 }
74 }
75 print(ctx, (const char *) (str + beg), end - beg);
76 }
77
hexToDigit(unsigned int * val,const unsigned char * hex)78 static void hexToDigit(unsigned int * val, const unsigned char * hex)
79 {
80 unsigned int i;
81 for (i=0;i<4;i++) {
82 unsigned char c = hex[i];
83 if (c >= 'A') c = (c & ~0x20) - 7;
84 c -= '0';
85 assert(!(c & 0xF0));
86 *val = (*val << 4) | c;
87 }
88 }
89
Utf32toUtf8(unsigned int codepoint,char * utf8Buf)90 static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf)
91 {
92 if (codepoint < 0x80) {
93 utf8Buf[0] = (char) codepoint;
94 utf8Buf[1] = 0;
95 } else if (codepoint < 0x0800) {
96 utf8Buf[0] = (char) ((codepoint >> 6) | 0xC0);
97 utf8Buf[1] = (char) ((codepoint & 0x3F) | 0x80);
98 utf8Buf[2] = 0;
99 } else if (codepoint < 0x10000) {
100 utf8Buf[0] = (char) ((codepoint >> 12) | 0xE0);
101 utf8Buf[1] = (char) (((codepoint >> 6) & 0x3F) | 0x80);
102 utf8Buf[2] = (char) ((codepoint & 0x3F) | 0x80);
103 utf8Buf[3] = 0;
104 } else if (codepoint < 0x200000) {
105 utf8Buf[0] =(char)((codepoint >> 18) | 0xF0);
106 utf8Buf[1] =(char)(((codepoint >> 12) & 0x3F) | 0x80);
107 utf8Buf[2] =(char)(((codepoint >> 6) & 0x3F) | 0x80);
108 utf8Buf[3] =(char)((codepoint & 0x3F) | 0x80);
109 utf8Buf[4] = 0;
110 } else {
111 utf8Buf[0] = '?';
112 utf8Buf[1] = 0;
113 }
114 }
115
yajl_string_decode(yajl_buf buf,const unsigned char * str,size_t len)116 void yajl_string_decode(yajl_buf buf, const unsigned char * str,
117 size_t len)
118 {
119 size_t beg = 0;
120 size_t end = 0;
121
122 while (end < len) {
123 if (str[end] == '\\') {
124 char utf8Buf[5];
125 const char * unescaped = "?";
126 yajl_buf_append(buf, str + beg, end - beg);
127 switch (str[++end]) {
128 case 'r': unescaped = "\r"; break;
129 case 'n': unescaped = "\n"; break;
130 case '\\': unescaped = "\\"; break;
131 case '/': unescaped = "/"; break;
132 case '"': unescaped = "\""; break;
133 case 'f': unescaped = "\f"; break;
134 case 'b': unescaped = "\b"; break;
135 case 't': unescaped = "\t"; break;
136 case 'u': {
137 unsigned int codepoint = 0;
138 hexToDigit(&codepoint, str + ++end);
139 end+=3;
140 /* check if this is a surrogate */
141 if ((codepoint & 0xFC00) == 0xD800) {
142 end++;
143 if (str[end] == '\\' && str[end + 1] == 'u') {
144 unsigned int surrogate = 0;
145 hexToDigit(&surrogate, str + end + 2);
146 codepoint =
147 (((codepoint & 0x3F) << 10) |
148 ((((codepoint >> 6) & 0xF) + 1) << 16) |
149 (surrogate & 0x3FF));
150 end += 5;
151 } else {
152 unescaped = "?";
153 break;
154 }
155 }
156
157 Utf32toUtf8(codepoint, utf8Buf);
158 unescaped = utf8Buf;
159
160 if (codepoint == 0) {
161 yajl_buf_append(buf, unescaped, 1);
162 beg = ++end;
163 continue;
164 }
165
166 break;
167 }
168 default:
169 assert("this should never happen" && 0);
170 }
171 yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped));
172 beg = ++end;
173 } else {
174 end++;
175 }
176 }
177 yajl_buf_append(buf, str + beg, end - beg);
178 }
179
180 #define ADV_PTR s++; if (!(len--)) return 0;
181
yajl_string_validate_utf8(const unsigned char * s,size_t len)182 int yajl_string_validate_utf8(const unsigned char * s, size_t len)
183 {
184 if (!len) return 1;
185 if (!s) return 0;
186
187 while (len--) {
188 /* single byte */
189 if (*s <= 0x7f) {
190 /* noop */
191 }
192 /* two byte */
193 else if ((*s >> 5) == 0x6) {
194 ADV_PTR;
195 if (!((*s >> 6) == 0x2)) return 0;
196 }
197 /* three byte */
198 else if ((*s >> 4) == 0x0e) {
199 ADV_PTR;
200 if (!((*s >> 6) == 0x2)) return 0;
201 ADV_PTR;
202 if (!((*s >> 6) == 0x2)) return 0;
203 }
204 /* four byte */
205 else if ((*s >> 3) == 0x1e) {
206 ADV_PTR;
207 if (!((*s >> 6) == 0x2)) return 0;
208 ADV_PTR;
209 if (!((*s >> 6) == 0x2)) return 0;
210 ADV_PTR;
211 if (!((*s >> 6) == 0x2)) return 0;
212 } else {
213 return 0;
214 }
215
216 s++;
217 }
218
219 return 1;
220 }
221