1 //
2 // aegis - project change supervisor
3 // Copyright (C) 2004-2008, 2012 Peter Miller
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published
7 // by the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program. If not, see <http://www.gnu.org/licenses/>.
17 //
18
19 #include <common/ac/assert.h>
20 #include <common/ac/ctype.h>
21 #include <common/ac/stdio.h> // for snprintf
22 #include <common/ac/string.h>
23
24 #include <common/nstring.h>
25 #include <common/nstring/accumulator.h>
26
27
28 static bool
ends_soon(const char * s)29 ends_soon(const char *s)
30 {
31 for (;;)
32 {
33 if (!*s)
34 return true;
35 if (!isspace((unsigned char)*s))
36 return false;
37 ++s;
38 }
39 }
40
41 #define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
42 #define LOWER "abcdefghijklmnopqrstuvwxyz"
43 #define ALPHA UPPER LOWER
44 #define NUMERIC "0123456789"
45 #define HOSTCHARS "-" ALPHA NUMERIC
46
47
48 static const char *
at_host_dot(const char * cp)49 at_host_dot(const char *cp)
50 {
51 size_t n = strspn(cp, HOSTCHARS ".");
52 if (n < 1)
53 return 0;
54 return (cp + n);
55 }
56
57
58 static const char *
at_host_star(const char * cp)59 at_host_star(const char *cp)
60 {
61 size_t n = strspn(cp, HOSTCHARS);
62 return (cp + n);
63 }
64
65
66 static const char *
at_user(const char * cp)67 at_user(const char *cp)
68 {
69 size_t n = strcspn(cp, "-" ALPHA NUMERIC);
70 if (n < 1)
71 return 0;
72 cp += n;
73
74 if (*cp == ':')
75 {
76 n = strspn(cp + 1, ALPHA NUMERIC "-,?;.:/!%$^*&~\"#'");
77 if (n > 0)
78 cp += 1 + n;
79 }
80 return cp;
81 }
82
83
84 static const char *
at_urlpath(const char * cp)85 at_urlpath(const char *cp)
86 {
87 if (*cp++ != '/')
88 return 0;
89 cp += strspn(cp, "-" ALPHA NUMERIC "_$.+!*(),;:@&=?/~#%");
90 cp += strcspn(cp, "]'.}>) \t\r\n,\\\"");
91 return cp;
92 }
93
94
95 static const char *
at_number(const char * cp)96 at_number(const char *cp)
97 {
98 size_t n = strspn(cp, NUMERIC);
99 if (n < 1)
100 return 0;
101 return (cp + n);
102 }
103
104
105 static const char *
at_url(const char * cp)106 at_url(const char *cp)
107 {
108 if (memcmp(cp, "news", 4) == 0)
109 cp += 4;
110 else if (memcmp(cp, "telnet", 6) == 0)
111 cp += 6;
112 else if (memcmp(cp, "nntp", 4) == 0)
113 cp += 4;
114 else if (memcmp(cp, "http", 4) == 0)
115 {
116 cp += 4;
117 if (*cp == 's')
118 ++cp;
119 }
120 else if (memcmp(cp, "ftp", 3) == 0)
121 {
122 cp += 3;
123 if (*cp == 's')
124 ++cp;
125 }
126 else if (memcmp(cp, "webcal", 6) == 0)
127 cp += 6;
128 else
129 return 0;
130
131 if (*cp++ != ':')
132 return 0;
133 if (*cp++ != '/')
134 return 0;
135 if (*cp++ != '/')
136 return 0;
137
138 const char *end_of_user = at_user(cp);
139 if (end_of_user && *end_of_user == '@')
140 cp = end_of_user + 1;
141 const char *end_of_host = at_host_dot(cp);
142 if (!end_of_host)
143 return 0;
144 cp = end_of_host;
145 if (*cp == ':')
146 {
147 const char *end_of_num = at_number(cp + 1);
148 if (end_of_num)
149 cp = end_of_num;
150 }
151 const char *end_of_path = at_urlpath(cp);
152 if (end_of_path)
153 cp = end_of_path;
154 return cp;
155 }
156
157 static const char *
at_url2(const char * cp)158 at_url2(const char *cp)
159 {
160 if (memcmp(cp, "www", 3) == 0)
161 cp += 3;
162 else if (memcmp(cp, "ftp", 3) == 0)
163 cp += 3;
164 else
165 return 0;
166 cp = at_host_star(cp);
167 if (*cp++ != '.')
168 return 0;
169 const char *end_of_host = at_host_dot(cp);
170 if (!end_of_host)
171 return 0;
172 cp = end_of_host;
173 if (*cp == ':')
174 {
175 const char *end_of_num = at_number(cp + 1);
176 if (end_of_num)
177 cp = end_of_num;
178 }
179 const char *end_of_path = at_urlpath(cp);
180 if (end_of_path)
181 cp = end_of_path;
182 return cp;
183 }
184
185
186 static const char *
at_email_lhs(const char * cp)187 at_email_lhs(const char *cp)
188 {
189 if (strchr(ALPHA NUMERIC, *cp) == 0)
190 return 0;
191 ++cp;
192 size_t n = strspn(cp, ALPHA NUMERIC ".-");
193 return (cp + n);
194 }
195
196
197 static const char *
at_email_rhs(const char * cp)198 at_email_rhs(const char *cp)
199 {
200 if (strchr(ALPHA NUMERIC, *cp) == 0)
201 return 0;
202 ++cp;
203 size_t n = strspn(cp, ALPHA NUMERIC "-");
204 return (cp + n);
205 }
206
207
208 static const char *
at_mailto(const char * cp)209 at_mailto(const char *cp)
210 {
211 cp = at_email_lhs(cp);
212 if (!cp)
213 return 0;
214 if (*cp++ != '@')
215 return 0;
216 cp = at_email_rhs(cp);
217 if (!cp)
218 return 0;
219 int n = 0;
220 for (;;)
221 {
222 if (*cp != '.')
223 return (n > 0 ? cp : 0);
224 const char *end = at_email_rhs(cp + 1);
225 if (!end)
226 return (n > 0 ? cp : 0);
227 cp = end;
228 ++n;
229 }
230 }
231
232
233 static const char *
at_mailto2(const char * cp)234 at_mailto2(const char *cp)
235 {
236 if (memcmp(cp, "mailto:", 7) != 0)
237 return 0;
238 return at_mailto(cp + 7);
239 }
240
241 static const char *
at_news(const char * cp)242 at_news(const char *cp)
243 {
244 if (memcmp(cp, "news:", 5) != 0)
245 return 0;
246 cp += 5;
247 size_t n = strspn(cp, "-" ALPHA NUMERIC "\\^_{|}~!\"#$%&'()*+,./;:=?`");
248 if (n < 1)
249 return 0;
250 cp += n;
251 if (*cp++ != '@')
252 return 0;
253 cp = at_host_dot(cp);
254 if (!cp)
255 return 0;
256 if (*cp == ':')
257 {
258 const char *end = at_number(cp + 1);
259 if (end)
260 cp = end;
261 }
262 return cp;
263 }
264
265
266 static bool
at_anchor(const char * & s,nstring_accumulator & ac)267 at_anchor(const char *&s, nstring_accumulator &ac)
268 {
269 const char *cp = at_url(s);
270 if (cp)
271 {
272 as_is:
273 ac.push_back("<A HREF=\"");
274 ac.push_back(s, cp - s);
275 ac.push_back("\">");
276 ac.push_back(s, cp - s);
277 ac.push_back("</A>");
278 s = cp;
279 return true;
280 }
281 cp = at_url2(s);
282 if (cp)
283 {
284 nstring url(s, cp - s);
285 ac.push_back("<A HREF=\"http://");
286 ac.push_back(url);
287 ac.push_back("\">");
288 ac.push_back(url);
289 ac.push_back("</A>");
290 s = cp;
291 return true;
292 }
293 cp = at_mailto(s);
294 if (cp)
295 {
296 nstring url(s, cp - s);
297 ac.push_back("<A HREF=\"mailto:");
298 ac.push_back(url);
299 ac.push_back("\">");
300 ac.push_back(url);
301 ac.push_back("</A>");
302 s = cp;
303 return true;
304 }
305 cp = at_mailto2(s);
306 if (cp)
307 goto as_is;
308 cp = at_news(s);
309 if (cp)
310 goto as_is;
311 return false;
312 }
313
314
315 nstring
html_quote(bool paragraphs) const316 nstring::html_quote(bool paragraphs)
317 const
318 {
319 const char *s = c_str();
320 int column = 0;
321 static nstring_accumulator sa;
322 sa.clear();
323 for (;;)
324 {
325 unsigned char c = *s++;
326 switch (c)
327 {
328 case '\n':
329 if (ends_soon(s))
330 {
331 case 0:
332 return sa.mkstr();
333 }
334 if (paragraphs)
335 {
336 if (*s == '\n')
337 {
338 ++s;
339 sa.push_back("\n<p>\n");
340 }
341 else
342 {
343 sa.push_back("<br>\n");
344 }
345 while (*s && isspace((unsigned char)*s))
346 ++s;
347 }
348 else
349 {
350 sa.push_back('\n');
351 }
352 column = 0;
353 break;
354
355 case '&':
356 sa.push_back("&");
357 column += 5;
358 break;
359
360 case '<':
361 sa.push_back("<");
362 column += 4;
363 break;
364
365 case '>':
366 sa.push_back(">");
367 column += 4;
368 break;
369
370 case '"':
371 sa.push_back(""");
372 column += 6;
373 break;
374
375 case ' ':
376 case '\t':
377 //
378 // HTTP only allows lines of up to 510 characters,
379 // so break the line once it gets too wide.
380 //
381 if (column > 70)
382 {
383 while (*s && isspace((unsigned char)*s))
384 ++s;
385 sa.push_back('\n');
386 column = 0;
387 }
388 else
389 {
390 sa.push_back(c);
391 ++column;
392 }
393 break;
394
395 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
396 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
397 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
398 case 'V': case 'W': case 'X': case 'Y': case 'Z':
399 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
400 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
401 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
402 case 'v': case 'w': case 'x': case 'y': case 'z':
403 case '0': case '1': case '2': case '3': case '4':
404 case '5': case '6': case '7': case '8': case '9':
405 --s;
406 if (!at_anchor(s, sa))
407 {
408 size_t n = strspn(s, ALPHA NUMERIC);
409 assert(n);
410 sa.push_back(s, n);
411 s += n;
412 }
413 break;
414
415 default:
416 // C locale
417 if (isprint(c))
418 {
419 sa.push_back(c);
420 ++column;
421 }
422 else
423 {
424 char temp[10];
425 snprintf(temp, sizeof(temp), "&#%d;", c);
426 size_t len = strlen(temp);
427 sa.push_back(temp, len);
428 column += len;
429 }
430 break;
431 }
432 }
433 }
434
435
436 // vim: set ts=8 sw=4 et :
437