1 /*
2 * Copyright (C) 2013 Nikos Mavrogiannopoulos
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <config.h>
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <stddef.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <syslog.h>
26 #include <common.h>
27 #include <c-strcase.h>
28 #include <c-ctype.h>
29 #include <wchar.h>
30
31 #include "html.h"
32
unescape_html(void * pool,const char * html,unsigned len,unsigned * out_len)33 char *unescape_html(void *pool, const char *html, unsigned len, unsigned *out_len)
34 {
35 char *msg;
36 int pos;
37 unsigned i;
38
39 msg = talloc_size(pool, len + 1);
40 if (msg == NULL)
41 return NULL;
42
43 for (i = pos = 0; i < len;) {
44 if (len-pos < 1) {
45 goto fail;
46 }
47
48 if (html[i] == '&') {
49 if (!c_strncasecmp(&html[i], "<", 4)) {
50 msg[pos++] = '<';
51 i += 4;
52 } else if (!c_strncasecmp(&html[i], ">", 4)) {
53 msg[pos++] = '>';
54 i += 4;
55 } else if (!c_strncasecmp(&html[i], " ", 6)) {
56 msg[pos++] = ' ';
57 i += 6;
58 } else if (!c_strncasecmp(&html[i], """, 6)) {
59 msg[pos++] = '"';
60 i += 6;
61 } else if (!c_strncasecmp(&html[i], "&", 5)) {
62 msg[pos++] = '&';
63 i += 5;
64 } else if (!c_strncasecmp(&html[i], "'", 6)) {
65 msg[pos++] = '\'';
66 i += 6;
67 } else if (!strncmp(&html[i], "&#", 2)) {
68 const char *p = &html[i];
69 char *endptr = NULL;
70 long val;
71
72 if (p[2]=='x') {
73 p += 3;
74 val = strtol(p, &endptr, 16);
75 } else {
76 p += 2;
77 val = strtol(p, &endptr, 10);
78 }
79 if (endptr == NULL || *endptr != ';' || val > WCHAR_MAX) {
80 /* skip */
81 msg[pos++] = html[i++];
82 } else {
83 char tmpmb[MB_CUR_MAX];
84 wchar_t ch = val;
85 mbstate_t ps;
86 memset(&ps, 0, sizeof(ps));
87
88 i += (ptrdiff_t)(1+endptr-(&html[i]));
89 val = wcrtomb(tmpmb, ch, &ps);
90
91 if (val == -1)
92 goto fail;
93 if (len-pos > val)
94 memcpy(&msg[pos], tmpmb, val);
95 else
96 goto fail;
97 pos += val;
98 }
99 } else
100 msg[pos++] = html[i++];
101 } else
102 msg[pos++] = html[i++];
103 }
104
105 msg[pos] = 0;
106 if (out_len)
107 *out_len = pos;
108
109 return msg;
110 fail:
111 talloc_free(msg);
112 return NULL;
113 }
114
unescape_url(void * pool,const char * url,unsigned len,unsigned * out_len)115 char *unescape_url(void *pool, const char *url, unsigned len, unsigned *out_len)
116 {
117 char *msg;
118 int pos;
119 unsigned i;
120
121 msg = talloc_size(pool, len + 1);
122 if (msg == NULL)
123 return NULL;
124
125 for (i = pos = 0; i < len;) {
126 if (url[i] == '%') {
127 char b[3];
128 unsigned int u;
129
130 b[0] = url[i + 1];
131 b[1] = url[i + 2];
132 b[2] = 0;
133
134 if (sscanf(b, "%02x", &u) <= 0) {
135 talloc_free(msg);
136 syslog(LOG_ERR, "%s: error parsing URL: %s", __func__, url);
137 return NULL;
138 }
139
140 msg[pos++] = u;
141 i += 3;
142 } else if (url[i] == '+') {
143 msg[pos++] = ' ';
144 i++;
145 } else
146 msg[pos++] = url[i++];
147 }
148
149 msg[pos] = 0;
150 if (out_len)
151 *out_len = pos;
152
153 return msg;
154 }
155
escape_url(void * pool,const char * url,unsigned len,unsigned * out_len)156 char *escape_url(void *pool, const char *url, unsigned len, unsigned *out_len)
157 {
158 char *msg;
159 int pos;
160 unsigned i;
161
162 msg = talloc_size(pool, 3*len + 1);
163 if (msg == NULL)
164 return NULL;
165
166 for (i = pos = 0; i < len;) {
167 if (c_isalnum(url[i]) || url[i]=='-' || url[i]=='_' || url[i]=='.' || url[i]=='~') {
168 msg[pos++] = url[i++];
169 } else if (url[i] == ' ') {
170 msg[pos++] = '+';
171 i++;
172 } else {
173 snprintf(&msg[pos], 4, "%%%02X", (unsigned)url[i++]);
174 pos+=3;
175 }
176 }
177 msg[pos] = 0;
178 if (out_len)
179 *out_len = pos;
180
181 return msg;
182 }
183
184