1 /* $Id: decode.c,v 1.395 2011/09/03 13:25:39 sbajic Exp $ */
2
3 /*
4 DSPAM
5 COPYRIGHT (C) 2002-2012 DSPAM PROJECT
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU Affero General Public License as
9 published by the Free Software Foundation, either version 3 of the
10 License, or (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Affero General Public License for more details.
16
17 You should have received a copy of the GNU Affero General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 */
21
22 /*
23 * decode.c - message decoding and parsing
24 *
25 * DESCRIPTION
26 * This set of functions performs parsing and decoding of a message and
27 * embeds its components into a ds_message_t structure, suitable for
28 * logical access.
29 */
30
31 #ifdef HAVE_CONFIG_H
32 #include <auto-config.h>
33 #endif
34
35 #include <stdio.h>
36 #include <string.h>
37 #include <stdlib.h>
38 #include <ctype.h>
39
40 #include "decode.h"
41 #include "error.h"
42 #include "util.h"
43 #include "language.h"
44 #include "buffer.h"
45 #include "base64.h"
46 #include "libdspam.h"
47
48 /*
49 * _ds_actualize_message (const char *message)
50 *
51 * DESCRIPTION
52 * primary message parser
53 *
54 * this function performs all decoding and actualization of the message
55 * into the message structures defined in the .h
56 *
57 * INPUT ARGUMENTS
58 * message message to decode
59 *
60 * RETURN VALUES
61 * pointer to an allocated message structure (ds_message_t), NULL on failure
62 */
63
64 ds_message_t
_ds_actualize_message(const char * message)65 _ds_actualize_message (const char *message)
66 {
67 char *line = NULL;
68 char *in = NULL;
69 char *m_in = NULL;
70 ds_message_part_t current_block;
71 ds_header_t current_heading = NULL;
72 struct nt *boundaries = NULL;
73 ds_message_t out = NULL;
74 int block_position = BP_HEADER;
75 int in_content = 0;
76
77 if (!message || !(*message))
78 goto MEMFAIL;
79
80 if (!(in = strdup(message)))
81 goto MEMFAIL;
82
83 m_in = in;
84
85 boundaries = nt_create (NT_CHAR);
86 if (!boundaries)
87 goto MEMFAIL;
88
89 out = (ds_message_t) calloc (1, sizeof (struct _ds_message));
90 if (!out)
91 goto MEMFAIL;
92
93 out->components = nt_create (NT_PTR);
94 if (!out->components)
95 goto MEMFAIL;
96
97 current_block = _ds_create_message_part ();
98 if (!current_block)
99 goto MEMFAIL;
100
101 if (nt_add (out->components, (void *) current_block) == NULL)
102 goto MEMFAIL;
103
104 /* Read the message from memory */
105
106 line = strsep (&in, "\n");
107 while (line)
108 {
109
110 /* Header processing */
111
112 if (block_position == BP_HEADER)
113 {
114
115 /* If we see two boundaries converged on top of one another */
116
117 if (_ds_match_boundary (boundaries, line))
118 {
119
120 /* Add the boundary as the terminating boundary */
121
122 current_block->terminating_boundary = strdup (line + 2);
123 current_block->original_encoding = current_block->encoding;
124
125 _ds_decode_headers(current_block);
126 current_block = _ds_create_message_part ();
127
128 if (!current_block)
129 goto MEMFAIL;
130
131 if (nt_add (out->components, (void *) current_block) == NULL)
132 goto MEMFAIL;
133
134 block_position = BP_HEADER;
135 }
136
137 /* Concatenate multiline headers to the original header field data */
138
139 else if (line[0] == 32 || line[0] == '\t')
140 {
141 if (current_heading)
142 {
143 char *eow, *ptr;
144
145 ptr = realloc (current_heading->data,
146 strlen (current_heading->data) + strlen (line) + 2);
147 if (ptr)
148 {
149 current_heading->data = ptr;
150 strcat (current_heading->data, "\n");
151 strcat (current_heading->data, line);
152 } else {
153 goto MEMFAIL;
154 }
155
156 /* Our concatenated data doesn't have any whitespace between lines */
157 for(eow=line;eow[0] && isspace((int) eow[0]);eow++) { }
158
159 ptr =
160 realloc (current_heading->concatenated_data,
161 strlen (current_heading->concatenated_data) + strlen (eow) + 1);
162 if (ptr)
163 {
164 current_heading->concatenated_data = ptr;
165 strcat (current_heading->concatenated_data, eow);
166 } else {
167 goto MEMFAIL;
168 }
169
170 if (current_heading->original_data) {
171 ptr =
172 realloc (current_heading->original_data,
173 strlen (current_heading->original_data) +
174 strlen (line) + 2);
175 if (ptr) {
176 current_heading->original_data = ptr;
177 strcat (current_heading->original_data, "\n");
178 strcat (current_heading->original_data, line);
179 } else {
180 goto MEMFAIL;
181 }
182 }
183
184 _ds_analyze_header (current_block, current_heading, boundaries);
185 }
186 }
187
188 /* New header field when LF or CRLF is not found */
189
190 else if (line[0] != 0 && line[0] != 13)
191 {
192 ds_header_t header = _ds_create_header_field (line);
193
194 if (header != NULL)
195 {
196 _ds_analyze_header (current_block, header, boundaries);
197 current_heading = header;
198 nt_add (current_block->headers, header);
199 }
200
201
202 /* line[0] == 0 or line[0] == 13; LF or CRLF, switch to body */
203
204 } else {
205 block_position = BP_BODY;
206 }
207 }
208
209 /* Body processing */
210
211 else if (block_position == BP_BODY)
212 {
213 /* Look for a boundary in the header of a part */
214
215 if (!strncasecmp (line, "Content-Type", 12)
216 || ((line[0] == 32 || line[0] == 9) && in_content))
217 {
218 char boundary[128];
219 in_content = 1;
220 if (!_ds_extract_boundary(boundary, sizeof(boundary), line)) {
221 if (!_ds_match_boundary (boundaries, boundary)) {
222 _ds_push_boundary (boundaries, boundary);
223 free(current_block->boundary);
224 current_block->boundary = strdup (boundary);
225 }
226 } else {
227 _ds_push_boundary (boundaries, "");
228 }
229 } else {
230 in_content = 0;
231 }
232
233 /* Multipart boundary was reached; move onto next block */
234
235 if (_ds_match_boundary (boundaries, line))
236 {
237
238 /* Add the boundary as the terminating boundary */
239
240 current_block->terminating_boundary = strdup (line + 2);
241 current_block->original_encoding = current_block->encoding;
242
243 _ds_decode_headers(current_block);
244 current_block = _ds_create_message_part ();
245
246 if (!current_block)
247 goto MEMFAIL;
248
249 if (nt_add (out->components, (void *) current_block) == NULL)
250 goto MEMFAIL;
251
252 block_position = BP_HEADER;
253 }
254
255 /* Plain old message (or part) body */
256
257 else {
258 buffer_cat (current_block->body, line);
259
260 /* Don't add extra \n at the end of message's body */
261
262 if (in != NULL)
263 buffer_cat (current_block->body, "\n");
264 }
265 }
266
267 line = strsep (&in, "\n");
268 } /* while (line) */
269
270 _ds_decode_headers(current_block);
271
272 free (m_in);
273 nt_destroy (boundaries);
274 return out;
275
276 MEMFAIL:
277 if (m_in) free(m_in);
278 if (boundaries) nt_destroy (boundaries);
279 if (out) _ds_destroy_message(out);
280 LOG (LOG_CRIT, ERR_MEM_ALLOC);
281 return NULL;
282 }
283
284 /*
285 * _ds_create_message_part
286 *
287 * DESCRIPTION
288 * create and initialize a new message block component
289 *
290 * RETURN VALUES
291 * pointer to an allocated message block (ds_message_part_t), NULL on failure
292 *
293 */
294
295 ds_message_part_t
_ds_create_message_part(void)296 _ds_create_message_part (void)
297 {
298 ds_message_part_t block =
299 (ds_message_part_t) calloc (1, sizeof (struct _ds_message_part));
300
301 if (!block)
302 goto MEMFAIL;
303
304 block->headers = nt_create (NT_PTR);
305 if (!block->headers)
306 goto MEMFAIL;
307
308 block->body = buffer_create (NULL);
309 if (!block->body)
310 goto MEMFAIL;
311
312 block->encoding = EN_UNKNOWN;
313 block->media_type = MT_TEXT;
314 block->media_subtype = MST_PLAIN;
315 block->original_encoding = EN_UNKNOWN;
316 block->content_disposition = PCD_UNKNOWN;
317
318 /* Not really necessary, but.. */
319
320 block->boundary = NULL;
321 block->terminating_boundary = NULL;
322 block->original_signed_body = NULL;
323
324
325 return block;
326
327 MEMFAIL:
328 if (block) {
329 buffer_destroy(block->body);
330 nt_destroy(block->headers);
331 free(block);
332 }
333 LOG (LOG_CRIT, ERR_MEM_ALLOC);
334 return NULL;
335 }
336
337 /*
338 * _ds_create_header_field(const char *heading)
339 *
340 * DESCRIPTION
341 * create and initialize a new header structure
342 *
343 * INPUT ARGUMENTS
344 * heading plain text heading (e.g. "To: Mom")
345 *
346 * RETURN VALUES
347 * pointer to an allocated header structure (ds_header_t), NULL on failure
348 */
349
350 ds_header_t
_ds_create_header_field(const char * heading)351 _ds_create_header_field (const char *heading)
352 {
353 char *in = strdup(heading);
354 char *ptr, *m = in, *data;
355 ds_header_t header =
356 (ds_header_t) calloc (1, sizeof (struct _ds_header_field));
357
358 if (!header || !in)
359 goto MEMFAIL;
360
361 ptr = strsep (&in, ":");
362 if (ptr) {
363 header->heading = strdup (ptr);
364 if (!header->heading)
365 goto MEMFAIL;
366 else
367 {
368 if (!in)
369 {
370 LOGDEBUG("%s:%u: unexpected data: header string '%s' doesn't "
371 "contains `:' character", __FILE__, __LINE__, header->heading);
372
373 /* Use empty string as data as fallback for comtinue processing. */
374
375 in = "";
376 }
377 else
378 {
379 /* Skip white space */
380 while (*in == 32 || *in == 9)
381 ++in;
382 }
383
384 data = strdup (in);
385 if (!data)
386 goto MEMFAIL;
387
388 header->data = data;
389 header->concatenated_data = strdup(data);
390 }
391 }
392
393 free (m);
394 return header;
395
396 MEMFAIL:
397 free(header);
398 free(m);
399 LOG (LOG_CRIT, ERR_MEM_ALLOC);
400 return NULL;
401 }
402
403 /*
404 * _ds_decode_headers (ds_message_part_t block)
405 *
406 * DESCRIPTION
407 * decodes in-line encoded headers
408 *
409 * RETURN VALUES
410 * returns 0 on success
411 */
412
413 int
_ds_decode_headers(ds_message_part_t block)414 _ds_decode_headers (ds_message_part_t block) {
415 #ifdef VERBOSE
416 LOGDEBUG("decoding headers in message block");
417 #endif
418 char *ptr, *dptr, *rest, *enc;
419 ds_header_t header;
420 struct nt_node *node_nt;
421 struct nt_c c_nt;
422 long decoded_len;
423
424 node_nt = c_nt_first(block->headers, &c_nt);
425 while(node_nt != NULL) {
426 long enc_offset;
427 header = (ds_header_t) node_nt->ptr;
428
429 for(enc_offset = 0; header->concatenated_data[enc_offset]; enc_offset++)
430 {
431 enc = header->concatenated_data + enc_offset;
432
433 if (!strncmp(enc, "=?", 2)) {
434 int was_null = 0;
435 char *ptrptr, *decoded = NULL;
436 long offset = (long) enc - (long) header->concatenated_data;
437
438 if (header->original_data == NULL) {
439 header->original_data = strdup(header->data);
440 was_null = 1;
441 }
442
443 strtok_r (enc, "?", &ptrptr);
444 strtok_r (NULL, "?", &ptrptr);
445 ptr = strtok_r (NULL, "?", &ptrptr);
446 dptr = strtok_r (NULL, "?", &ptrptr);
447 if (!dptr) {
448 if (was_null && header->original_data != NULL)
449 free(header->original_data);
450 if (was_null)
451 header->original_data = NULL;
452 continue;
453 }
454
455 rest = dptr + strlen (dptr);
456 if (rest[0]!=0) {
457 rest++;
458 if (rest[0]!=0) rest++;
459 }
460
461 if (ptr != NULL && (ptr[0] == 'b' || ptr[0] == 'B'))
462 decoded = _ds_decode_base64 (dptr);
463 else if (ptr != NULL && (ptr[0] == 'q' || ptr[0] == 'Q'))
464 decoded = _ds_decode_quoted (dptr);
465
466 decoded_len = 0;
467
468 /* Append the rest of the message */
469
470 if (decoded)
471 {
472 char *new_alloc;
473
474 decoded_len = strlen(decoded);
475 new_alloc = calloc (1, offset + decoded_len + strlen (rest) + 2);
476 if (new_alloc == NULL) {
477 LOG (LOG_CRIT, ERR_MEM_ALLOC);
478 }
479 else
480 {
481 if (offset)
482 strncpy(new_alloc, header->concatenated_data, offset);
483
484 strcat(new_alloc, decoded);
485 strcat(new_alloc, rest);
486 free(decoded);
487 decoded = new_alloc;
488 }
489 }
490
491 if (decoded) {
492 enc_offset += (decoded_len-1);
493 free(header->concatenated_data);
494 header->concatenated_data = decoded;
495 }
496 else if (was_null && header->original_data) {
497 free(header->original_data);
498 header->original_data = NULL;
499 }
500 else if (was_null) {
501 header->original_data = NULL;
502 }
503 }
504 }
505
506 if (header->original_data != NULL) {
507 free(header->data);
508 header->data = strdup(header->concatenated_data);
509 }
510
511 node_nt = c_nt_next(block->headers, &c_nt);
512 }
513
514 return 0;
515 }
516
517 /*
518 * _ds_analyze_header (ds_message_part_t block, ds_header_t header,
519 * struct nt *boundaries)
520 *
521 * DESCRIPTION
522 * analyzes the header passed in and performs various operations including:
523 * - setting media type and subtype
524 * - setting transfer encoding
525 * - adding newly discovered boundaries
526 *
527 * based on the heading specified. essentially all headers should be
528 * analyzed for future expansion
529 *
530 * INPUT ARGUMENTS
531 * block the message block to which the header belongs
532 * header the header to analyze
533 * boundaries a list of known boundaries found within the block
534 */
535
536 void
_ds_analyze_header(ds_message_part_t block,ds_header_t header,struct nt * boundaries)537 _ds_analyze_header (
538 ds_message_part_t block,
539 ds_header_t header,
540 struct nt *boundaries)
541 {
542 if (!header || !block || !header->data)
543 return;
544
545 /* Content-Type header */
546
547 if (!strcasecmp (header->heading, "Content-Type"))
548 {
549 int len = strlen(header->data);
550 if (!strncasecmp (header->data, "text", 4)) {
551 block->media_type = MT_TEXT;
552 if (len >= 5 && !strncasecmp (header->data + 5, "plain", 5))
553 block->media_subtype = MST_PLAIN;
554 else if (len >= 5 && !strncasecmp (header->data + 5, "html", 4))
555 block->media_subtype = MST_HTML;
556 else
557 block->media_subtype = MST_OTHER;
558 }
559
560 else if (!strncasecmp (header->data, "application", 11))
561 {
562 block->media_type = MT_APPLICATION;
563 if (len >= 12 && !strncasecmp (header->data + 12, "dspam-signature", 15))
564 block->media_subtype = MST_DSPAM_SIGNATURE;
565 else
566 block->media_subtype = MST_OTHER;
567 }
568
569 else if (!strncasecmp (header->data, "message", 7))
570 {
571 block->media_type = MT_MESSAGE;
572 if (len >= 8 && !strncasecmp (header->data + 8, "rfc822", 6))
573 block->media_subtype = MST_RFC822;
574 else if (len >= 8 && !strncasecmp (header->data + 8, "inoculation", 11))
575 block->media_subtype = MST_INOCULATION;
576 else
577 block->media_subtype = MST_OTHER;
578 }
579
580 else if (!strncasecmp (header->data, "multipart", 9))
581 {
582 char boundary[128];
583
584 block->media_type = MT_MULTIPART;
585 if (len >= 10 && !strncasecmp (header->data + 10, "mixed", 5))
586 block->media_subtype = MST_MIXED;
587 else if (len >= 10 && !strncasecmp (header->data + 10, "alternative", 11))
588 block->media_subtype = MST_ALTERNATIVE;
589 else if (len >= 10 && !strncasecmp (header->data + 10, "signed", 6))
590 block->media_subtype = MST_SIGNED;
591 else if (len >= 10 && !strncasecmp (header->data + 10, "encrypted", 9))
592 block->media_subtype = MST_ENCRYPTED;
593 else
594 block->media_subtype = MST_OTHER;
595
596 if (!_ds_extract_boundary(boundary, sizeof(boundary), header->data)) {
597 if (!_ds_match_boundary (boundaries, boundary)) {
598 _ds_push_boundary (boundaries, boundary);
599 free(block->boundary);
600 block->boundary = strdup (boundary);
601 }
602 } else {
603 _ds_push_boundary (boundaries, "");
604 }
605 }
606 else {
607 block->media_type = MT_OTHER;
608 block->media_subtype = MST_OTHER;
609 }
610
611 }
612
613 /* Content-Transfer-Encoding */
614
615 else if (!strcasecmp (header->heading, "Content-Transfer-Encoding"))
616 {
617 if (!strncasecmp (header->data, "7bit", 4))
618 block->encoding = EN_7BIT;
619 else if (!strncasecmp (header->data, "8bit", 4))
620 block->encoding = EN_8BIT;
621 else if (!strncasecmp (header->data, "quoted-printable", 16))
622 block->encoding = EN_QUOTED_PRINTABLE;
623 else if (!strncasecmp (header->data, "base64", 6))
624 block->encoding = EN_BASE64;
625 else if (!strncasecmp (header->data, "binary", 6))
626 block->encoding = EN_BINARY;
627 else
628 block->encoding = EN_OTHER;
629 }
630
631 if (!strcasecmp (header->heading, "Content-Disposition"))
632 {
633 if (!strncasecmp (header->data, "inline", 6))
634 block->content_disposition = PCD_INLINE;
635 else if (!strncasecmp (header->data, "attachment", 10))
636 block->content_disposition = PCD_ATTACHMENT;
637 else
638 block->content_disposition = PCD_OTHER;
639 }
640
641 return;
642 }
643
644 /*
645 * _ds_destroy_message (ds_message_t message)
646 *
647 * DESCRIPTION
648 * destroys a message structure (ds_message_t)
649 *
650 * INPUT ARGUMENTS
651 * message the message structure to be destroyed
652 */
653
654 void
_ds_destroy_message(ds_message_t message)655 _ds_destroy_message (ds_message_t message)
656 {
657 struct nt_node *node_nt;
658 struct nt_c c;
659
660 if (message == NULL)
661 return;
662
663 if (message->components) {
664 node_nt = c_nt_first (message->components, &c);
665 while (node_nt != NULL)
666 {
667 ds_message_part_t block = (ds_message_part_t) node_nt->ptr;
668 _ds_destroy_block(block);
669 node_nt = c_nt_next (message->components, &c);
670 }
671 nt_destroy (message->components);
672 }
673 free (message);
674 return;
675 }
676
677 /*
678 * _ds_destroy_headers (ds_message_part_t block)
679 *
680 * DESCRIPTION
681 * destroys a message block's header pairs
682 * does not free the structures themselves; these are freed at nt_destroy
683 *
684 * INPUT ARGUMENTS
685 * block the message block containing the headers to destsroy
686 */
687
688 void
_ds_destroy_headers(ds_message_part_t block)689 _ds_destroy_headers (ds_message_part_t block)
690 {
691 struct nt_node *node_nt;
692 struct nt_c c;
693
694 if (!block || !block->headers)
695 return;
696
697 node_nt = c_nt_first (block->headers, &c);
698 while (node_nt != NULL)
699 {
700 ds_header_t field = (ds_header_t) node_nt->ptr;
701
702 if (field)
703 {
704 free (field->original_data);
705 free (field->heading);
706 free (field->concatenated_data);
707 free (field->data);
708 }
709 node_nt = c_nt_next (block->headers, &c);
710 }
711
712 return;
713 }
714
715 /*
716 * _ds_destroy_block (ds_message_part_t block)
717 *
718 * DESCRIPTION
719 * destroys a message block
720 *
721 * INPUT ARGUMENTS
722 * block the message block to destroy
723 */
724
725 void
_ds_destroy_block(ds_message_part_t block)726 _ds_destroy_block (ds_message_part_t block)
727 {
728 if (!block)
729 return;
730
731 if (block->headers)
732 {
733 _ds_destroy_headers (block);
734 nt_destroy (block->headers);
735 }
736 buffer_destroy (block->body);
737 buffer_destroy (block->original_signed_body);
738 free (block->boundary);
739 free (block->terminating_boundary);
740 // free (block);
741 return;
742 }
743
744 /*
745 * _ds_decode_block (ds_message_part_t block)
746 *
747 * DESCRIPTION
748 * decodes a message block
749 *
750 * INPUT ARGUMENTS
751 * block the message block to decode
752 *
753 * RETURN VALUES
754 * a pointer to the allocated character array containing the decoded message
755 * NULL on failure
756 */
757
758 char *
_ds_decode_block(ds_message_part_t block)759 _ds_decode_block (ds_message_part_t block)
760 {
761 if (block->encoding == EN_BASE64)
762 return _ds_decode_base64 (block->body->data);
763 else if (block->encoding == EN_QUOTED_PRINTABLE)
764 return _ds_decode_quoted (block->body->data);
765
766 LOG (LOG_WARNING, "decoding of block encoding type %d not supported",
767 block->encoding);
768 return NULL;
769 }
770
771 /*
772 * _ds_decode_{base64,quoted,hex8bit}
773 *
774 * DESCRIPTION
775 * supporting block decoder functions
776 * these function call (or perform) specific decoding functions
777 *
778 * INPUT ARGUMENTS
779 * body encoded message body
780 *
781 * RETURN VALUES
782 * a pointer to the allocated character array containing the decoded body
783 */
784
785 char *
_ds_decode_base64(const char * body)786 _ds_decode_base64 (const char *body)
787 {
788 if (body == NULL)
789 return NULL;
790
791 return base64decode (body);
792 }
793
794 char *
_ds_decode_quoted(const char * body)795 _ds_decode_quoted (const char *body)
796 {
797 #ifdef VERBOSE
798 LOGDEBUG("decoding Quoted Printable encoded buffer");
799 #endif
800 if (!body)
801 return NULL;
802
803 char *n, *out;
804 const char *end, *p;
805
806 n = out = malloc(strlen(body)+1);
807 end = body + strlen(body);
808
809 if (out == NULL) {
810 LOG (LOG_CRIT, ERR_MEM_ALLOC);
811 return NULL;
812 }
813
814 for (p = body; p < end; p++, n++) {
815 if (*p == '=') {
816 if (p[1] == '\r' && p[2] == '\n') {
817 n -= 1;
818 p += 2;
819 } else if (p[1] == '\n') {
820 n -= 1;
821 p += 1;
822 } else if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) {
823 *n = ((_ds_hex2dec((unsigned char) p[1])) << 4) | (_ds_hex2dec((unsigned char) p[2]));
824 p += 2;
825 } else
826 *n = *p;
827 } else
828 *n = *p;
829 }
830
831 *n = '\0';
832 return (char *)out;
833 }
834
835 char *
_ds_decode_hex8bit(const char * body)836 _ds_decode_hex8bit (const char *body)
837 {
838 #ifdef VERBOSE
839 LOGDEBUG("decoding hexadecimal 8-bit encodings in message block");
840 #endif
841 if (!body)
842 return NULL;
843
844 char *n, *out;
845 const char *end, *p;
846
847 n = out = malloc(strlen(body)+1);
848 end = body + strlen(body);
849
850 if (out == NULL) {
851 LOG (LOG_CRIT, ERR_MEM_ALLOC);
852 return NULL;
853 }
854
855 for (p = body; p < end; p++, n++) {
856 if (*p == '%')
857 if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) {
858 *n = ((_ds_hex2dec((unsigned char) p[1])) << 4) | (_ds_hex2dec((unsigned char) p[2]));
859 p += 2;
860 } else
861 *n = *p;
862 else
863 *n = *p;
864 }
865
866 *n = '\0';
867 return (char *)out;
868 }
869
870 /*
871 * _ds_encode_block (ds_message_part_t block, int encoding)
872 *
873 * DESCRIPTION
874 * encodes a message block using the encoding specified and replaces the
875 * block's message body with the encoded data
876 *
877 * INPUT ARGUMENTS
878 * block the message block to encode
879 * encoding encoding to use (EN_)
880 *
881 * RETURN VALUES
882 * returns 0 on success
883 */
884
885 int
_ds_encode_block(ds_message_part_t block,int encoding)886 _ds_encode_block (ds_message_part_t block, int encoding)
887 {
888 /* we can't encode a block with the same encoding */
889
890 if (block->encoding == encoding)
891 return EINVAL;
892
893 /* we can't encode a block that's already encoded */
894
895 if (block->encoding == EN_BASE64 || block->encoding == EN_QUOTED_PRINTABLE)
896 return EFAILURE;
897
898 if (encoding == EN_BASE64) {
899 char *encoded = _ds_encode_base64 (block->body->data);
900 buffer_destroy (block->body);
901 block->body = buffer_create (encoded);
902 free (encoded);
903 block->encoding = EN_BASE64;
904 }
905 else if (encoding == EN_QUOTED_PRINTABLE) {
906
907 /* TODO */
908
909 return 0;
910 }
911
912 LOGDEBUG("unsupported encoding: %d", encoding);
913 return 0;
914 }
915
916 /*
917 * _ds_encode_{base64,quoted}
918 *
919 * DESCRIPTION
920 * supporting block encoder functions
921 * these function call (or perform) specific encoding functions
922 *
923 * INPUT ARGUMENTS
924 * body decoded message body
925 *
926 * RETURN VALUES
927 * a pointer to the allocated character array containing the encoded body
928 */
929
930 char *
_ds_encode_base64(const char * body)931 _ds_encode_base64 (const char *body)
932 {
933 return base64encode (body);
934 }
935
936 /*
937 * _ds_assemble_message (ds_message_t message)
938 *
939 * DESCRIPTION
940 * assembles a message structure into a flat text message
941 *
942 * INPUT ARGUMENTS
943 * message the message structure (ds_message_t) to assemble
944 *
945 * RETURN VALUES
946 * a pointer to the allocated character array containing the text message
947 */
948
949 char *
_ds_assemble_message(ds_message_t message,const char * newline)950 _ds_assemble_message (ds_message_t message, const char *newline)
951 {
952 buffer *out = buffer_create (NULL);
953 struct nt_node *node_nt, *node_header;
954 struct nt_c c_nt, c_nt2;
955 char *heading;
956 char *copyback;
957 #ifdef VERBOSE
958 int i = 0;
959 #endif
960
961 if (!out) {
962 LOG (LOG_CRIT, ERR_MEM_ALLOC);
963 return NULL;
964 }
965
966 node_nt = c_nt_first (message->components, &c_nt);
967 while (node_nt != NULL && node_nt->ptr != NULL)
968 {
969 ds_message_part_t block =
970 (ds_message_part_t) node_nt->ptr;
971 #ifdef VERBOSE
972 LOGDEBUG ("assembling component %d", i);
973 #endif
974
975 /* Assemble headers */
976
977 if (block->headers != NULL && block->headers->items > 0)
978 {
979 node_header = c_nt_first (block->headers, &c_nt2);
980 while (node_header != NULL)
981 {
982 char *data;
983 ds_header_t current_header =
984 (ds_header_t) node_header->ptr;
985
986 data = (current_header->original_data == NULL) ? current_header->data :
987 current_header->original_data;
988
989 heading = malloc(
990 ((current_header->heading) ? strlen(current_header->heading) : 0)
991 + ((data) ? strlen(data) : 0)
992 + 3 + strlen(newline));
993
994 if (current_header->heading != NULL &&
995 (!strncmp (current_header->heading, "From ", 5) ||
996 !strncmp (current_header->heading, "--", 2)))
997 sprintf (heading, "%s:%s%s",
998 (current_header->heading) ? current_header->heading : "",
999 (data) ? data : "", newline);
1000 else
1001 sprintf (heading, "%s: %s%s",
1002 (current_header->heading) ? current_header->heading : "",
1003 (data) ? data : "", newline);
1004
1005 buffer_cat (out, heading);
1006 free(heading);
1007 node_header = c_nt_next (block->headers, &c_nt2);
1008 }
1009 }
1010
1011 buffer_cat (out, newline);
1012
1013 /* Assemble bodies */
1014
1015 if (block->original_signed_body != NULL && message->protect)
1016 buffer_cat (out, block->original_signed_body->data);
1017 else
1018 buffer_cat (out, block->body->data);
1019
1020 if (block->terminating_boundary != NULL)
1021 {
1022 buffer_cat (out, "--");
1023 buffer_cat (out, block->terminating_boundary);
1024 }
1025
1026 node_nt = c_nt_next (message->components, &c_nt);
1027 #ifdef VERBOSE
1028 i++;
1029 #endif
1030
1031 if (node_nt != NULL && node_nt->ptr != NULL)
1032 buffer_cat (out, newline);
1033 }
1034
1035 copyback = out->data;
1036 out->data = NULL;
1037 buffer_destroy (out);
1038 return copyback;
1039 }
1040
1041 /*
1042 * _ds_{push,pop,match,extract}_boundary
1043 *
1044 * DESCRIPTION
1045 * these functions maintain and service a boundary "stack" on the message
1046 */
1047
1048 int
_ds_push_boundary(struct nt * stack,const char * boundary)1049 _ds_push_boundary (struct nt *stack, const char *boundary)
1050 {
1051 char *y;
1052
1053 if (boundary == NULL || boundary[0] == 0)
1054 return EINVAL;
1055
1056 y = malloc (strlen (boundary) + 3);
1057 if (y == NULL)
1058 return EUNKNOWN;
1059
1060 sprintf (y, "--%s", boundary);
1061 nt_add (stack, (char *) y);
1062 free(y);
1063
1064 return 0;
1065 }
1066
1067 char *
_ds_pop_boundary(struct nt * stack)1068 _ds_pop_boundary (struct nt *stack)
1069 {
1070 struct nt_node *node, *last_node = NULL, *parent_node = NULL;
1071 struct nt_c c;
1072 char *boundary = NULL;
1073
1074 node = c_nt_first (stack, &c);
1075 while (node != NULL)
1076 {
1077 parent_node = last_node;
1078 last_node = node;
1079 node = c_nt_next (stack, &c);
1080 }
1081 if (parent_node != NULL)
1082 parent_node->next = NULL;
1083 else
1084 stack->first = NULL;
1085
1086 if (last_node == NULL)
1087 return NULL;
1088
1089 boundary = strdup (last_node->ptr);
1090
1091 free (last_node->ptr);
1092 free (last_node);
1093
1094 return boundary;
1095 }
1096
1097 int
_ds_match_boundary(struct nt * stack,const char * buff)1098 _ds_match_boundary (struct nt *stack, const char *buff)
1099 {
1100 struct nt_node *node;
1101 struct nt_c c;
1102
1103 node = c_nt_first (stack, &c);
1104 while (node != NULL)
1105 {
1106 if (!strncmp (buff, node->ptr, strlen (node->ptr)))
1107 {
1108 return 1;
1109 }
1110 node = c_nt_next (stack, &c);
1111 }
1112 return 0;
1113 }
1114
1115 int
_ds_extract_boundary(char * buf,size_t size,char * mem)1116 _ds_extract_boundary (char *buf, size_t size, char *mem)
1117 {
1118 char *data, *ptr, *ptrptr;
1119
1120 if (mem == NULL)
1121 return EINVAL;
1122
1123 data = strdup(mem);
1124 if (data == NULL) {
1125 LOG(LOG_CRIT, ERR_MEM_ALLOC);
1126 return EUNKNOWN;
1127 }
1128
1129 for(ptr=data;ptr<(data+strlen(data));ptr++) {
1130 if (!strncasecmp(ptr, "boundary", 8)) {
1131 ptr = strchr(ptr, '=');
1132 if (ptr == NULL) {
1133 free(data);
1134 return EFAILURE;
1135 }
1136 ptr++;
1137 while(isspace((int) ptr[0]))
1138 ptr++;
1139 if (ptr[0] == '"')
1140 ptr++;
1141 strtok_r(ptr, " \";\n\t", &ptrptr);
1142 strlcpy(buf, ptr, size);
1143 free(data);
1144 return 0;
1145 }
1146 }
1147
1148 free(data);
1149 return EFAILURE;
1150 }
1151
1152 /*
1153 * _ds_find_header (ds_message_t message, consr char *heading) {
1154 *
1155 * DESCRIPTION
1156 * finds a header and returns its value
1157 *
1158 * INPUT ARGUMENTS
1159 * message the message structure to search
1160 * heading the heading to search for
1161 * flags optional search flags
1162 *
1163 * RETURN VALUES
1164 * a pointer to the header structure's value
1165 *
1166 */
1167
1168 char *
_ds_find_header(ds_message_t message,const char * heading)1169 _ds_find_header (ds_message_t message, const char *heading) {
1170 ds_message_part_t block;
1171 ds_header_t head;
1172 struct nt_node *node_nt;
1173
1174 if (message->components->first) {
1175 if ((block = message->components->first->ptr)==NULL)
1176 return NULL;
1177 if (block->headers == NULL)
1178 return NULL;
1179 } else {
1180 return NULL;
1181 }
1182
1183 node_nt = block->headers->first;
1184 while(node_nt != NULL) {
1185 head = (ds_header_t) node_nt->ptr;
1186 if (head && !strcasecmp(head->heading, heading)) {
1187 return head->data;
1188 }
1189 node_nt = node_nt->next;
1190 }
1191
1192 return NULL;
1193 }
1194
_ds_hex2dec(unsigned char hex)1195 int _ds_hex2dec(unsigned char hex) {
1196 switch (hex) {
1197 case '0': return 0;
1198 case '1': return 1;
1199 case '2': return 2;
1200 case '3': return 3;
1201 case '4': return 4;
1202 case '5': return 5;
1203 case '6': return 6;
1204 case '7': return 7;
1205 case '8': return 8;
1206 case '9': return 9;
1207 case 'a': case 'A': return 10;
1208 case 'b': case 'B': return 11;
1209 case 'c': case 'C': return 12;
1210 case 'd': case 'D': return 13;
1211 case 'e': case 'E': return 14;
1212 case 'f': case 'F': return 15;
1213 default: return -1;
1214 }
1215 }
1216
1217 /*
1218 * _ds_strip_html(const char *html)
1219 *
1220 * DESCRIPTION
1221 * strip html tags from the supplied message
1222 *
1223 * INPUT ARGUMENTS
1224 * html encoded message body
1225 *
1226 * RETURN VALUES
1227 * a pointer to the allocated character array containing the
1228 * stripped message
1229 *
1230 */
1231
1232 char *
_ds_strip_html(const char * html)1233 _ds_strip_html (const char *html)
1234 {
1235 #ifdef VERBOSE
1236 LOGDEBUG("stripping HTML tags from message block");
1237 #endif
1238 size_t j = 0, k = 0, i = 0;
1239 int visible = 1;
1240 int closing_td_tag = 0;
1241 char *html2;
1242 const char *cdata_close_tag = NULL;
1243
1244 if(!html)
1245 return NULL;
1246
1247 static struct {
1248 unsigned int id;
1249 char *entity;
1250 }
1251 charset[] = {
1252 { 32, " " }, { 34, """ }, { 34, """ }, { 38, "&" },
1253 { 38, "&" }, { 39, "'" }, { 60, "<" }, { 60, "<" },
1254 { 62, ">" }, { 62, ">" }, { 160, " " }, { 161, "¡" },
1255 { 162, "¢" }, { 163, "£" }, { 164, "¤" }, { 165, "¥" },
1256 { 166, "¦" }, { 167, "§" }, { 168, "¨" }, { 169, "©" },
1257 { 170, "ª" }, { 171, "«" }, { 172, "¬" }, { 173, "­" },
1258 { 174, "®" }, { 175, "¯" }, { 176, "°" }, { 177, "±" },
1259 { 178, "²" }, { 179, "³" }, { 180, "´" }, { 181, "µ" },
1260 { 182, "¶" }, { 183, "·" }, { 184, "¸" }, { 185, "¹" },
1261 { 186, "º" }, { 187, "»" }, { 188, "¼" }, { 189, "½" },
1262 { 190, "¾" }, { 191, "¿" }, { 192, "À" }, { 193, "Á" },
1263 { 194, "Â" }, { 195, "Ã" }, { 196, "Ä" }, { 197, "Å" },
1264 { 198, "Æ" }, { 199, "Ç" }, { 200, "È" }, { 201, "É" },
1265 { 202, "Ê" }, { 203, "Ë" }, { 204, "Ì" }, { 205, "Í" },
1266 { 206, "Î" }, { 207, "Ï" }, { 208, "Ð" }, { 209, "Ñ" },
1267 { 210, "Ò" }, { 211, "Ó" }, { 212, "Ô" }, { 213, "Õ" },
1268 { 214, "Ö" }, { 215, "×" }, { 216, "Ø" }, { 217, "Ù" },
1269 { 218, "Ú" }, { 219, "Û" }, { 220, "Ü" }, { 221, "Ý" },
1270 { 222, "Þ" }, { 223, "ß" }, { 224, "à" }, { 225, "á" },
1271 { 226, "â" }, { 227, "ã" }, { 228, "ä" }, { 229, "å" },
1272 { 230, "æ" }, { 231, "ç" }, { 232, "è" }, { 233, "é" },
1273 { 234, "ê" }, { 235, "ë" }, { 236, "ì" }, { 237, "í" },
1274 { 238, "î" }, { 239, "ï" }, { 240, "ð" }, { 241, "ñ" },
1275 { 242, "ò" }, { 243, "ó" }, { 244, "ô" }, { 245, "õ" },
1276 { 246, "ö" }, { 247, "÷" }, { 248, "ø" }, { 249, "ù" },
1277 { 250, "ú" }, { 251, "û" }, { 252, "ü" }, { 253, "ý" },
1278 { 254, "þ" }, { 255, "ÿ" }, { 338, "Œ" }, { 339, "œ" },
1279 { 352, "Š" }, { 353, "š" }, { 376, "Ÿ" }, { 402, "ƒ" },
1280 { 710, "ˆ" }, { 732, "˜" }, { 913, "Α" }, { 914, "Β" },
1281 { 915, "Γ" }, { 916, "Δ" }, { 917, "Ε" }, { 918, "Ζ" },
1282 { 919, "Η" }, { 920, "Θ" }, { 921, "Ι" }, { 922, "Κ" },
1283 { 923, "Λ" }, { 924, "Μ" }, { 925, "Ν" }, { 926, "Ξ" },
1284 { 927, "Ο" }, { 928, "Π" }, { 929, "Ρ" }, { 931, "Σ" },
1285 { 932, "Τ" }, { 933, "Υ" }, { 934, "Φ" }, { 935, "Χ" },
1286 { 936, "Ψ" }, { 937, "Ω" }, { 945, "α" }, { 946, "β" },
1287 { 947, "γ" }, { 948, "δ" }, { 949, "ε" }, { 950, "ζ" },
1288 { 951, "η" }, { 952, "θ" }, { 953, "ι" }, { 954, "κ" },
1289 { 955, "λ" }, { 956, "μ" }, { 957, "ν" }, { 958, "ξ" },
1290 { 959, "ο" }, { 960, "π" }, { 961, "ρ" }, { 962, "ς" },
1291 { 963, "σ" }, { 964, "τ" }, { 965, "υ" }, { 966, "φ" },
1292 { 967, "χ" }, { 968, "ψ" }, { 969, "ω" }, { 977, "&thetasym" },
1293 { 978, "ϒ" }, { 982, "ϖ" }, {8194, " " }, {8195, " " },
1294 { 8201, " " }, {8204, "‌" }, {8205, "‍" }, {8206, "‎" },
1295 { 8207, "‏" }, {8211, "–" }, {8212, "—" }, {8216, "‘" },
1296 { 8217, "’" }, {8218, "‚" }, {8220, "“" }, {8221, "”" },
1297 { 8222, "„" }, {8224, "†" }, {8225, "‡" }, {8226, "•" },
1298 { 8230, "…" }, {8240, "‰" }, {8242, "′" }, {8243, "″" },
1299 { 8249, "‹" }, {8250, "›" }, {8254, "‾" }, {8260, "⁄" },
1300 { 8364, "€" }, {8465, "ℑ" }, {8472, "℘" }, {8476, "ℜ" },
1301 { 8482, "™" }, {8501, "ℵ" }, {8592, "←" }, {8593, "↑" },
1302 { 8594, "→" }, {8595, "↓" }, {8596, "↔" }, {8629, "↵" },
1303 { 8656, "⇐" }, {8657, "⇑" }, {8658, "⇒" }, {8659, "⇓" },
1304 { 8660, "⇔" }, {8704, "∀" }, {8706, "∂" }, {8707, "∃" },
1305 { 8709, "∅" }, {8711, "∇" }, {8712, "∈" }, {8713, "∉" },
1306 { 8715, "∋" }, {8719, "∏" }, {8721, "∑" }, {8722, "−" },
1307 { 8727, "∗" }, {8730, "√" }, {8733, "∝" }, {8734, "∞" },
1308 { 8736, "∠" }, {8743, "∧" }, {8744, "∨" }, {8745, "∩" },
1309 { 8746, "∪" }, {8747, "∫" }, {8756, "∴" }, {8764, "∼" },
1310 { 8773, "≅" }, {8776, "≈" }, {8800, "≠" }, {8801, "≡" },
1311 { 8804, "≤" }, {8805, "≥" }, {8834, "⊂" }, {8835, "⊃" },
1312 { 8836, "⊄" }, {8838, "⊆" }, {8839, "⊇" }, {8853, "⊕" },
1313 { 8855, "⊗" }, {8869, "⊥" }, {8901, "⋅" }, {8968, "⌈" },
1314 { 8969, "⌉" }, {8970, "⌊" }, {8971, "⌋" }, {9001, "⟨" },
1315 { 9002, "⟩" }, {9674, "◊" }, {9824, "♠" }, {9827, "♣" },
1316 { 9829, "♥" }, {9830, "♦" }
1317 };
1318 int num_chars = sizeof(charset) / sizeof(charset[0]);
1319
1320 static struct {
1321 char *open_tag;
1322 char *uri_tag;
1323 }
1324 uritag[] = {
1325 { "<a", "href" }, { "<img", "src" }, { "<input", "src" },
1326 { "<iframe", "src" }, { "<frame", "src" }, { "<script", "src" },
1327 { "<form", "action" }, { "<embed", "src" }, { "<area", "href" },
1328 { "<base", "href" }, { "<link", "href" }, { "<source", "src" },
1329 { "<body", "background" }, { "<blockquote", "cite" }, { "<q", "cite" },
1330 { "<ins", "cite" }, { "<del", "cite" }
1331 };
1332 int num_uri = sizeof(uritag) / sizeof(uritag[0]);
1333
1334 size_t len = strlen(html);
1335 html2 = malloc(len+1);
1336
1337 if (html2 == NULL) {
1338 LOG (LOG_CRIT, ERR_MEM_ALLOC);
1339 return NULL;
1340 }
1341
1342 for (i = 0; i < len; i++) {
1343 if (html[i] == '<') {
1344 if (cdata_close_tag) {
1345 if (strncasecmp(html + i, cdata_close_tag, strlen(cdata_close_tag)) == 0) {
1346 i += strlen(cdata_close_tag) - 1;
1347 cdata_close_tag = NULL;
1348 }
1349 continue;
1350 } else if (strncasecmp(html + i, "</td>", 5) == 0) {
1351 i += 4;
1352 closing_td_tag = 1;
1353 continue;
1354 } else if (strncasecmp(html + i, "<td", 3) == 0 && closing_td_tag) {
1355 if (j > 0 && !isspace(html2[j-1])) {
1356 html2[j++]=' ';
1357 }
1358 visible = 0;
1359 } else {
1360 closing_td_tag = 0;
1361 visible = 1;
1362 }
1363 k = i + 1;
1364
1365 if ((k < len) && (!( (html[k] >= 65 && html[k] <= 90) ||
1366 (html[k] >= 97 && html[k] <= 122) ||
1367 (html[k] == 47) ||
1368 (html[k] == 33) ))) {
1369 /* Not a HTML tag. HTML tags start with a letter, forwardslash or exclamation mark */
1370 visible = 1;
1371 html2[j++]=html[i];
1372 i = k;
1373 const char *w = &(html[k]);
1374 while (j < len && (size_t)(w - html) < len && *w != '<') {
1375 html2[j++]=*w;
1376 w++;
1377 i++;
1378 }
1379 continue;
1380 } else if (html[k]) {
1381 /* find the end of the tag */
1382 while (k < len && html[k] != '<' && html[k] != '>') {k++;}
1383
1384 /* if we've got a tag with a uri, save the address to print later. */
1385 char *url_tag = " ";
1386 int tag_offset = 0, x = 0, y = 0;
1387 for (y = 0; y < num_uri; y++) {
1388 x = strlen(uritag[y].open_tag);
1389 if (strncasecmp(html+i,uritag[y].open_tag,x)==0 && (i+x < len && isspace(html[i+x]))) {
1390 url_tag = uritag[y].uri_tag;
1391 tag_offset = i + x + 1;
1392 break;
1393 }
1394 }
1395 /* tag with uri found */
1396 if (tag_offset > 0) {
1397 size_t url_start; /* start of url tag inclusive [ */
1398 size_t url_tag_len = strlen(url_tag);
1399 char delim = ' ';
1400 /* find start of uri */
1401 for (url_start = tag_offset; url_start <= k; url_start++) {
1402 if (strncasecmp(html + url_start, url_tag, url_tag_len) == 0) {
1403 url_start += url_tag_len;
1404 while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces before = */
1405 if (html[url_start] == '=') {
1406 url_start++;
1407 while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces after = */
1408 if (html[url_start] == '"') {
1409 delim = '"';
1410 url_start++;
1411 } else if (html[url_start] == '\'') {
1412 delim = '\'';
1413 url_start++;
1414 } else {
1415 delim = '>';
1416 }
1417 break;
1418 } else {
1419 /* Start of uri tag found but no '=' after the tag.
1420 * Skip the whole tag.
1421 */
1422 break;
1423 }
1424 } else if ((url_start - tag_offset) >= 50) {
1425 /* The length of the html tag is over 50 characters long without
1426 * finding the start of the url/uri. Skip the whole tag.
1427 */
1428 break;
1429 }
1430 }
1431 /* find end of uri */
1432 if (delim != ' ') {
1433 if (url_start < len &&
1434 (strncasecmp(html + url_start, "http:", 5) == 0 ||
1435 strncasecmp(html + url_start, "https:", 6) == 0 ||
1436 strncasecmp(html + url_start, "ftp:", 4) == 0)) {
1437 html2[j++]=' ';
1438 const char *w = &(html[url_start]);
1439 /* html2 is a buffer of len + 1, where the +1 is for NULL
1440 * termination. This means we only want to loop to len
1441 * since we will replace html2[j] right after the loop.
1442 */
1443 while (j < len && (size_t)(w - html) < len && *w != delim) {
1444 html2[j++]=*w;
1445 w++;
1446 }
1447 html2[j++]=' ';
1448 }
1449 }
1450 } else if (strncasecmp(html + i, "<p>", 3) == 0
1451 || strncasecmp(html + i, "<p ", 3) == 0
1452 || strncasecmp(html + i, "<p\t", 3) == 0
1453 || strncasecmp(html + i, "<tr", 3) == 0
1454 || strncasecmp(html + i, "<option", 7) == 0
1455 || strncasecmp(html + i, "<br", 3) == 0
1456 || strncasecmp(html + i, "<li", 3) == 0
1457 || strncasecmp(html + i, "<div", 4) == 0
1458 || strncasecmp(html + i, "</select>", 9) == 0
1459 || strncasecmp(html + i, "</table>", 8) == 0) {
1460 if (j > 0 && html2[j-1] != '\n' && html2[j-1] != '\r') {
1461 html2[j++] = '\n';
1462 }
1463 } else if (strncasecmp(html + i, "<applet", 7) == 0) {
1464 cdata_close_tag = "</applet>";
1465 } else if (strncasecmp(html + i, "<embed", 6) == 0) {
1466 cdata_close_tag = "</embed>";
1467 } else if (strncasecmp(html + i, "<frameset", 9) == 0) {
1468 cdata_close_tag = "</frameset>";
1469 } else if (strncasecmp(html + i, "<frame", 6) == 0) {
1470 cdata_close_tag = "</frame>";
1471 } else if (strncasecmp(html + i, "<iframe", 7) == 0) {
1472 cdata_close_tag = "</iframe>";
1473 } else if (strncasecmp(html + i, "<noembed", 8) == 0) {
1474 cdata_close_tag = "</noembed>";
1475 } else if (strncasecmp(html + i, "<noscript", 9) == 0) {
1476 cdata_close_tag = "</noscript>";
1477 } else if (strncasecmp(html + i, "<object", 7) == 0) {
1478 cdata_close_tag = "</object>";
1479 } else if (strncasecmp(html + i, "<script", 7) == 0) {
1480 cdata_close_tag = "</script>";
1481 } else if (strncasecmp(html + i, "<style", 6) == 0) {
1482 cdata_close_tag = "</style>";
1483 }
1484 i = (html[k] == '<' || html[k] == '\0')? k - 1: k;
1485 continue;
1486 }
1487 } else if (cdata_close_tag) {
1488 continue;
1489 } else if (!isspace(html[i])) {
1490 visible = 1;
1491 }
1492
1493 if (strncmp(html+i,"&#",2)==0) {
1494 int x = 0;
1495 const char *w = &(html[i+2]);
1496 while (*w == '0') {i++;w++;}
1497 char n[5];
1498 if (html[i+4] && html[i+4] == ';'
1499 && isdigit(html[i+2])
1500 && isdigit(html[i+3])) {
1501 n[0] = html[i+2];
1502 n[1] = html[i+3];
1503 n[2] = 0;
1504 x = atoi(n);
1505 if (x <= 255 && x >= 32)
1506 html2[j++] = x;
1507 i += 4;
1508 } else if (html[i+6]
1509 && html[i+6] == ';'
1510 && isdigit(html[i+2])
1511 && isdigit(html[i+3])
1512 && isdigit(html[i+4])
1513 && isdigit(html[i+5])) {
1514 n[0] = html[i+2];
1515 n[1] = html[i+3];
1516 n[2] = html[i+4];
1517 n[3] = html[i+5];
1518 n[4] = 0;
1519 x = atoi(n);
1520 if (x <= 255 && x >= 32)
1521 html2[j++] = x;
1522 i += 6;
1523 } else {
1524 const char *w = &(html[i]);
1525 while (*w != ';' && *w != ' ' && *w != '\t' && *w != '\0') {i++;w++;}
1526 }
1527 visible = 0;
1528 continue;
1529 } else if (html[i] == '&') {
1530 int x = 0, y = 0;
1531 for (y = 0; y < num_chars; y++) {
1532 x = strlen(charset[y].entity);
1533 if (strncasecmp(html+i,charset[y].entity,x)==0) {
1534 if (charset[y].id <= 255)
1535 html2[j++] = charset[y].id;
1536 i += x-1;
1537 visible = 0;
1538 continue;
1539 }
1540 }
1541 }
1542
1543 if (j < len && visible)
1544 html2[j++] = html[i];
1545
1546 if (j >= len)
1547 i = j = len;
1548 }
1549
1550 html2[j] = '\0';
1551 return (char *)html2;
1552 }
1553