1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include <h/mh.h>
9 #include <h/fmt_scan.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include "../sbr/m_maildir.h"
15 #include "../sbr/m_mktemp.h"
16 #include "../sbr/mime_type.h"
17 #include "mhfree.h"
18 #include "mhoutsbr.h"
19 #include "mhshowsbr.h"
20 #include <fcntl.h>
21
22 #define MHFIXMSG_SWITCHES \
23 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
24 X("nodecodetext", 0, NDECODETEXTSW) \
25 X("decodetypes", 0, DECODETYPESW) \
26 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
27 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
28 X("textcharset", 0, TEXTCHARSETSW) \
29 X("notextcharset", 0, NTEXTCHARSETSW) \
30 X("reformat", 0, REFORMATSW) \
31 X("noreformat", 0, NREFORMATSW) \
32 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
33 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
34 X("fixboundary", 0, FIXBOUNDARYSW) \
35 X("nofixboundary", 0, NFIXBOUNDARYSW) \
36 X("fixcte", 0, FIXCOMPOSITECTESW) \
37 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
38 X("fixtype mimetype", 0, FIXTYPESW) \
39 X("file file", 0, FILESW) \
40 X("outfile file", 0, OUTFILESW) \
41 X("rmmproc program", 0, RPROCSW) \
42 X("normmproc", 0, NRPRCSW) \
43 X("changecur", 0, CHGSW) \
44 X("nochangecur", 0, NCHGSW) \
45 X("verbose", 0, VERBSW) \
46 X("noverbose", 0, NVERBSW) \
47 X("version", 0, VERSIONSW) \
48 X("help", 0, HELPSW) \
49
50 #define X(sw, minchars, id) id,
51 DEFINE_SWITCH_ENUM(MHFIXMSG);
52 #undef X
53
54 #define X(sw, minchars, id) { sw, minchars, id },
55 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
56 #undef X
57
58
59 int verbosw;
60 int debugsw; /* Needed by mhparse.c. */
61
62 #define quitser pipeser
63
64 /* mhparse.c */
65 extern int skip_mp_cte_check; /* flag to InitMultiPart */
66 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
67 extern int bogus_mp_content; /* flag from InitMultiPart */
68 /* flags to/from parse_header_attrs */
69 extern int suppress_extraneous_trailing_semicolon_warning;
70
71 /* mhmisc.c */
72 void flush_errors (void);
73
74 /*
75 * static prototypes
76 */
77 typedef struct fix_transformations {
78 int fixboundary;
79 int fixcompositecte;
80 svector_t fixtypes;
81 int reformat;
82 int replacetextplain;
83 int decodetext;
84 char *decodetypes;
85 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
86 int lf_line_endings;
87 char *textcharset;
88 } fix_transformations;
89
90 int mhfixmsgsbr (CT *, char *, const fix_transformations *, FILE **, char *,
91 FILE **);
92 static int fix_boundary (CT *, int *);
93 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
94 static int get_multipart_boundary (CT, char **);
95 static int replace_boundary (CT, char *, char *);
96 static int fix_types (CT, svector_t, int *);
97 static char *replace_substring (char **, const char *, const char *);
98 static char *remove_parameter (char *, const char *);
99 static int fix_composite_cte (CT, int *);
100 static int set_ce (CT, int);
101 static int ensure_text_plain (CT *, CT, int *, int);
102 static int find_textplain_sibling (CT, int, int *);
103 static int insert_new_text_plain_part (CT, int, CT);
104 static CT build_text_plain_part (CT);
105 static int insert_into_new_mp_alt (CT *, int *);
106 static CT divide_part (CT);
107 static void copy_ctinfo (CI, CI);
108 static int decode_part (CT);
109 static int reformat_part (CT, char *, char *, char *, int);
110 static CT build_multipart_alt (CT, CT, int, int);
111 static int boundary_in_content (FILE **, char *, const char *);
112 static void transfer_noncontent_headers (CT, CT);
113 static int set_ct_type (CT, int type, int subtype, int encoding);
114 static int decode_text_parts (CT, int, const char *, int *);
115 static int should_decode(const char *, const char *, const char *);
116 static int content_encoding (CT, const char **);
117 static int strip_crs (CT, int *);
118 static void update_cte (CT);
119 static int least_restrictive_encoding (CT);
120 static int less_restrictive (int, int);
121 static int convert_charsets (CT, char *, int *);
122 static int fix_always (CT, int *);
123 static int fix_filename_param (char *, char *, PM *, PM *);
124 static int fix_filename_encoding (CT);
125 static int write_content (CT, const char *, char *, FILE *, int, int);
126 static void set_text_ctparams(CT, char *, int);
127 static int remove_file (const char *);
128 static void report (char *, char *, char *, char *, ...);
129 static void pipeser (int);
130
131
132 int
main(int argc,char ** argv)133 main (int argc, char **argv) {
134 int msgnum;
135 char *cp, *file = NULL, *folder = NULL;
136 char *maildir = NULL, buf[100], *outfile = NULL;
137 char **argp, **arguments;
138 struct msgs_array msgs = { 0, 0, NULL };
139 struct msgs *mp = NULL;
140 CT *ctp;
141 FILE *fp, *infp = NULL, *outfp = NULL;
142 int using_stdin = 0;
143 int chgflag = 1;
144 int status = OK;
145 fix_transformations fx;
146 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
147 fx.fixtypes = NULL;
148 fx.replacetextplain = 0;
149 fx.decodetext = CE_8BIT;
150 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
151 fx.lf_line_endings = 0;
152 fx.textcharset = NULL;
153
154 if (nmh_init(argv[0], 2)) { return 1; }
155
156 arguments = getarguments (invo_name, argc, argv, 1);
157 argp = arguments;
158
159 /*
160 * Parse arguments
161 */
162 while ((cp = *argp++)) {
163 if (*cp == '-') {
164 switch (smatch (++cp, switches)) {
165 case AMBIGSW:
166 ambigsw (cp, switches);
167 done (1);
168 case UNKWNSW:
169 adios (NULL, "-%s unknown", cp);
170
171 case HELPSW:
172 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
173 invo_name);
174 print_help (buf, switches, 1);
175 done (0);
176 case VERSIONSW:
177 print_version(invo_name);
178 done (0);
179
180 case DECODETEXTSW:
181 if (! (cp = *argp++) || *cp == '-') {
182 adios (NULL, "missing argument to %s", argp[-2]);
183 }
184 if (! strcasecmp (cp, "8bit")) {
185 fx.decodetext = CE_8BIT;
186 } else if (! strcasecmp (cp, "7bit")) {
187 fx.decodetext = CE_7BIT;
188 } else if (! strcasecmp (cp, "binary")) {
189 fx.decodetext = CE_BINARY;
190 } else {
191 adios (NULL, "invalid argument to %s", argp[-2]);
192 }
193 continue;
194 case NDECODETEXTSW:
195 fx.decodetext = 0;
196 continue;
197 case DECODETYPESW:
198 if (! (cp = *argp++) || *cp == '-') {
199 adios (NULL, "missing argument to %s", argp[-2]);
200 }
201 fx.decodetypes = cp;
202 continue;
203 case CRLFLINEBREAKSSW:
204 fx.lf_line_endings = 0;
205 continue;
206 case NCRLFLINEBREAKSSW:
207 fx.lf_line_endings = 1;
208 continue;
209 case TEXTCHARSETSW:
210 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
211 adios (NULL, "missing argument to %s", argp[-2]);
212 }
213 fx.textcharset = cp;
214 continue;
215 case NTEXTCHARSETSW:
216 fx.textcharset = 0;
217 continue;
218 case FIXBOUNDARYSW:
219 fx.fixboundary = 1;
220 continue;
221 case NFIXBOUNDARYSW:
222 fx.fixboundary = 0;
223 continue;
224 case FIXCOMPOSITECTESW:
225 fx.fixcompositecte = 1;
226 continue;
227 case NFIXCOMPOSITECTESW:
228 fx.fixcompositecte = 0;
229 continue;
230 case FIXTYPESW:
231 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
232 adios (NULL, "missing argument to %s", argp[-2]);
233 }
234 if (! strncasecmp (cp, "multipart/", 10) ||
235 ! strncasecmp (cp, "message/", 8)) {
236 adios (NULL, "-fixtype %s not allowed", cp);
237 } else if (! strchr (cp, '/')) {
238 adios (NULL, "-fixtype requires type/subtype");
239 }
240 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
241 svector_push_back (fx.fixtypes, cp);
242 continue;
243 case REFORMATSW:
244 fx.reformat = 1;
245 continue;
246 case NREFORMATSW:
247 fx.reformat = 0;
248 continue;
249 case REPLACETEXTPLAINSW:
250 fx.replacetextplain = 1;
251 continue;
252 case NREPLACETEXTPLAINSW:
253 fx.replacetextplain = 0;
254 continue;
255 case FILESW:
256 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
257 adios (NULL, "missing argument to %s", argp[-2]);
258 }
259 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
260 continue;
261 case OUTFILESW:
262 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
263 adios (NULL, "missing argument to %s", argp[-2]);
264 }
265 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
266 continue;
267 case RPROCSW:
268 if (!(rmmproc = *argp++) || *rmmproc == '-') {
269 adios (NULL, "missing argument to %s", argp[-2]);
270 }
271 continue;
272 case NRPRCSW:
273 rmmproc = NULL;
274 continue;
275 case CHGSW:
276 chgflag = 1;
277 continue;
278 case NCHGSW:
279 chgflag = 0;
280 continue;
281 case VERBSW:
282 verbosw = 1;
283 continue;
284 case NVERBSW:
285 verbosw = 0;
286 continue;
287 }
288 }
289 if (*cp == '+' || *cp == '@') {
290 if (folder) {
291 adios (NULL, "only one folder at a time!");
292 } else {
293 folder = pluspath (cp);
294 }
295 } else {
296 if (*cp == '/') {
297 /* Interpret a full path as a filename, not a message. */
298 file = mh_xstrdup (cp);
299 } else {
300 app_msgarg (&msgs, cp);
301 }
302 }
303 }
304
305 SIGNAL (SIGQUIT, quitser);
306 SIGNAL (SIGPIPE, pipeser);
307
308 /*
309 * Read the standard profile setup
310 */
311 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
312 readconfig ((struct node **) 0, fp, cp, 0);
313 fclose (fp);
314 }
315
316 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
317 suppress_extraneous_trailing_semicolon_warning = 1;
318
319 if (! context_find ("path")) {
320 free (path ("./", TFOLDER));
321 }
322
323 if (file && msgs.size) {
324 adios (NULL, "cannot specify msg and file at same time!");
325 }
326
327 if (outfile) {
328 /* Open the outfile now, so we don't have to risk opening it
329 after running out of fds. */
330 if (strcmp (outfile, "-") == 0) {
331 outfp = stdout;
332 } else if ((outfp = fopen (outfile, "w")) == NULL) {
333 adios (outfile, "unable to open for writing");
334 }
335 }
336
337 /*
338 * check if message is coming from file
339 */
340 if (file) {
341 /* If file is stdin, create a tmp file name before parse_mime()
342 has a chance, because it might put in on a different
343 filesystem than the output file. Instead, put it in the
344 user's preferred tmp directory. */
345 CT ct;
346
347 if (! strcmp ("-", file)) {
348 int fd;
349 char *cp;
350
351 using_stdin = 1;
352
353 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
354 adios (NULL, "unable to create temporary file in %s",
355 get_temp_dir());
356 } else {
357 free (file);
358 file = mh_xstrdup (cp);
359 cpydata (STDIN_FILENO, fd, "-", file);
360 }
361
362 if (close (fd)) {
363 (void) m_unlink (file);
364 adios (NULL, "failed to write temporary file");
365 }
366 }
367
368 cts = mh_xcalloc(2, sizeof *cts);
369 ctp = cts;
370
371 if ((ct = parse_mime (file))) {
372 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
373 *ctp++ = ct;
374 } else {
375 inform("unable to parse message from file %s", file);
376 status = NOTOK;
377
378 /* If there's an outfile, pass the input message unchanged, so the
379 message won't get dropped from a pipeline. */
380 if (outfile) {
381 /* Something went wrong. Output might be expected, such as if
382 this were run as a filter. Just copy the input to the
383 output. */
384 if ((infp = fopen (file, "r")) == NULL) {
385 adios (file, "unable to open for reading");
386 }
387
388 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
389 inform("unable to copy message to %s, "
390 "it might be lost\n", outfile);
391 }
392
393 fclose (infp);
394 infp = NULL;
395 }
396 }
397 } else {
398 /*
399 * message(s) are coming from a folder
400 */
401 CT ct;
402
403 if (! msgs.size) {
404 app_msgarg(&msgs, "cur");
405 }
406 if (! folder) {
407 folder = getfolder (1);
408 }
409 maildir = mh_xstrdup(m_maildir (folder));
410
411 /* chdir so that error messages, esp. from MIME parser, just
412 refer to the message and not its path. */
413 if (chdir (maildir) == NOTOK) {
414 adios (maildir, "unable to change directory to");
415 }
416
417 /* read folder and create message structure */
418 if (! (mp = folder_read (folder, 1))) {
419 adios (NULL, "unable to read folder %s", folder);
420 }
421
422 /* check for empty folder */
423 if (mp->nummsg == 0) {
424 adios (NULL, "no messages in %s", folder);
425 }
426
427 /* parse all the message ranges/sequences and set SELECTED */
428 for (msgnum = 0; msgnum < msgs.size; msgnum++)
429 if (! m_convert (mp, msgs.msgs[msgnum])) {
430 done (1);
431 }
432 seq_setprev (mp); /* set the previous-sequence */
433
434 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
435 ctp = cts;
436
437 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
438 if (is_selected(mp, msgnum)) {
439 char *msgnam = m_name (msgnum);
440
441 if ((ct = parse_mime (msgnam))) {
442 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
443 *ctp++ = ct;
444 } else {
445 inform("unable to parse message %s", msgnam);
446 status = NOTOK;
447
448 /* If there's an outfile, pass the input message
449 unchanged, so the message won't get dropped from a
450 pipeline. */
451 if (outfile) {
452 /* Something went wrong. Output might be expected,
453 such as if this were run as a filter. Just copy
454 the input to the output. */
455 /* Can't use path() here because 1) it might have been
456 called before and it caches the pwd, and 2) we call
457 chdir() after that. */
458 char *input_filename =
459 concat (maildir, "/", msgnam, NULL);
460
461 if ((infp = fopen (input_filename, "r")) == NULL) {
462 adios (input_filename,
463 "unable to open for reading");
464 }
465
466 if (copy_input_to_output (input_filename, infp,
467 outfile, outfp) != OK) {
468 inform("unable to copy message to %s, "
469 "it might be lost\n", outfile);
470 }
471
472 fclose (infp);
473 infp = NULL;
474 free (input_filename);
475 }
476 }
477 }
478 }
479
480 if (chgflag) {
481 seq_setcur (mp, mp->hghsel); /* update current message */
482 }
483 seq_save (mp); /* synchronize sequences */
484 context_replace (pfolder, folder);/* update current folder */
485 context_save (); /* save the context file */
486 }
487
488 if (*cts) {
489 for (ctp = cts; *ctp; ++ctp) {
490 status += mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp);
491 free_content (*ctp);
492
493 if (using_stdin) {
494 (void) m_unlink (file);
495
496 if (! outfile) {
497 /* Just calling m_backup() unlinks the backup file. */
498 (void) m_backup (file);
499 }
500 }
501 }
502 } else {
503 status = 1;
504 }
505
506 mh_xfree(maildir);
507 free (cts);
508
509 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
510 if (infp) { fclose (infp); } /* even if stdin */
511 if (outfp) { fclose (outfp); } /* even if stdout */
512 free (outfile);
513 free (file);
514 free (folder);
515 free (arguments);
516
517 done (status);
518 return NOTOK;
519 }
520
521
522 /*
523 * Apply transformations to one message.
524 */
525 int
mhfixmsgsbr(CT * ctp,char * maildir,const fix_transformations * fx,FILE ** infp,char * outfile,FILE ** outfp)526 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
527 FILE **infp, char *outfile, FILE **outfp) {
528 /* Store input filename in case one of the transformations, i.e.,
529 fix_boundary(), rewrites to a tmp file. */
530 char *input_filename = maildir
531 ? concat (maildir, "/", (*ctp)->c_file, NULL)
532 : mh_xstrdup ((*ctp)->c_file);
533 int modify_inplace = 0;
534 int message_mods = 0;
535 int status = OK;
536
537 /* Though the input file won't need to be opened if everything goes
538 well, do it here just in case there's a failure, and that failure is
539 running out of file descriptors. */
540 if ((*infp = fopen (input_filename, "r")) == NULL) {
541 adios (input_filename, "unable to open for reading");
542 }
543
544 if (outfile == NULL) {
545 modify_inplace = 1;
546
547 if ((*ctp)->c_file) {
548 char *tempfile;
549 /* outfp will be closed by the caller */
550 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
551 NULL) {
552 adios (NULL, "unable to create temporary file in %s",
553 get_temp_dir());
554 }
555 outfile = mh_xstrdup (tempfile);
556 } else {
557 adios (NULL, "missing both input and output filenames\n");
558 }
559 } /* else *outfp was defined by caller */
560
561 reverse_alternative_parts (*ctp);
562 status = fix_always (*ctp, &message_mods);
563 if (status == OK && fx->fixboundary) {
564 status = fix_boundary (ctp, &message_mods);
565 }
566 if (status == OK && fx->fixtypes != NULL) {
567 status = fix_types (*ctp, fx->fixtypes, &message_mods);
568 }
569 if (status == OK && fx->fixcompositecte) {
570 status = fix_composite_cte (*ctp, &message_mods);
571 }
572 if (status == OK && fx->reformat) {
573 status =
574 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
575 }
576 if (status == OK && fx->decodetext) {
577 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
578 &message_mods);
579 update_cte (*ctp);
580 }
581 if (status == OK && fx->textcharset != NULL) {
582 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
583 }
584
585 if (status == OK && ! (*ctp)->c_umask) {
586 /* Set the umask for the contents file. This currently
587 isn't used but just in case it is in the future. */
588 struct stat st;
589
590 if (stat ((*ctp)->c_file, &st) != NOTOK) {
591 (*ctp)->c_umask = ~(st.st_mode & 0777);
592 } else {
593 (*ctp)->c_umask = ~m_gmprot();
594 }
595 }
596
597 /*
598 * Write the content to a file
599 */
600 if (status == OK) {
601 status = write_content (*ctp, input_filename, outfile, *outfp,
602 modify_inplace, message_mods);
603 } else if (! modify_inplace) {
604 /* Something went wrong. Output might be expected, such
605 as if this were run as a filter. Just copy the input
606 to the output. */
607 if (copy_input_to_output (input_filename, *infp, outfile,
608 *outfp) != OK) {
609 inform("unable to copy message to %s, it might be lost\n",
610 outfile);
611 }
612 }
613
614 if (modify_inplace) {
615 if (status != OK) { (void) m_unlink (outfile); }
616 free (outfile);
617 outfile = NULL;
618 }
619
620 fclose (*infp);
621 *infp = NULL;
622 free (input_filename);
623
624 return status;
625 }
626
627
628 /*
629 * Copy input message to output. Assumes not modifying in place, so this
630 * might be running as part of a pipeline.
631 */
632 static int
copy_input_to_output(const char * input_filename,FILE * infp,const char * output_filename,FILE * outfp)633 copy_input_to_output (const char *input_filename, FILE *infp,
634 const char *output_filename, FILE *outfp) {
635 int in = fileno (infp);
636 int out = fileno (outfp);
637 int status = OK;
638
639 if (in != -1 && out != -1) {
640 cpydata (in, out, input_filename, output_filename);
641 } else {
642 status = NOTOK;
643 }
644
645 return status;
646 }
647
648
649 /*
650 * Fix mismatched outer level boundary.
651 */
652 static int
fix_boundary(CT * ct,int * message_mods)653 fix_boundary (CT *ct, int *message_mods) {
654 struct multipart *mp;
655 int status = OK;
656
657 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
658 mp = (struct multipart *) (*ct)->c_ctparams;
659
660 /*
661 * 1) Get boundary at end of part.
662 * 2) Get boundary at beginning of part and compare to the end-of-part
663 * boundary.
664 * 3) Write out contents of ct to tmp file, replacing boundary in
665 * header with boundary from part. Set c_unlink to 1.
666 * 4) Free ct.
667 * 5) Call parse_mime() on the tmp file, replacing ct.
668 */
669
670 if (mp && mp->mp_start) {
671 char *part_boundary;
672
673 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
674 char *fixed;
675
676 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
677 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
678 char *filename = mh_xstrdup ((*ct)->c_file);
679 CT fixed_ct;
680
681 free_content (*ct);
682 if ((fixed_ct = parse_mime (fixed))) {
683 *ct = fixed_ct;
684 (*ct)->c_unlink = 1;
685
686 ++*message_mods;
687 if (verbosw) {
688 report (NULL, NULL, filename,
689 "fix multipart boundary");
690 }
691 } else {
692 *ct = NULL;
693 inform("unable to parse fixed part");
694 status = NOTOK;
695 }
696 free (filename);
697 } else {
698 inform("unable to replace broken boundary");
699 status = NOTOK;
700 }
701 } else {
702 inform("unable to create temporary file in %s",
703 get_temp_dir());
704 status = NOTOK;
705 }
706
707 free (part_boundary);
708 } else {
709 /* Couldn't fix the boundary. Report failure so that mhfixmsg
710 doesn't modify the message. */
711 status = NOTOK;
712 }
713 } else {
714 /* No multipart struct, even though the content type is
715 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
716 the message. */
717 status = NOTOK;
718 }
719 }
720
721 return status;
722 }
723
724
725 /*
726 * Find boundary at end of multipart.
727 */
728 static int
get_multipart_boundary(CT ct,char ** part_boundary)729 get_multipart_boundary (CT ct, char **part_boundary) {
730 char buffer[NMH_BUFSIZ];
731 char *end_boundary = NULL;
732 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
733 ? (off_t) (ct->c_end - sizeof buffer)
734 : (off_t) ct->c_begin;
735 size_t bytes_read;
736 int status = OK;
737
738 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
739 be big enough, even if it's just 1024, to make that unlikely. */
740
741 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
742 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
743 advise (ct->c_file, "unable to open for reading");
744 return NOTOK;
745 }
746
747 /* Get boundary at end of multipart. */
748 while (begin >= (off_t) ct->c_begin) {
749 fseeko (ct->c_fp, begin, SEEK_SET);
750 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
751 char *cp = rfind_str (buffer, bytes_read, "--");
752
753 if (cp) {
754 char *end;
755
756 /* Trim off trailing "--" and anything beyond. */
757 *cp-- = '\0';
758 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
759 if (strlen (end) > 3 && *end++ == '\n' &&
760 *end++ == '-' && *end++ == '-') {
761 end_boundary = mh_xstrdup (end);
762 break;
763 }
764 }
765 }
766 }
767
768 if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer))
769 break;
770 begin -= sizeof buffer;
771 }
772
773 /* Get boundary at beginning of multipart. */
774 if (end_boundary) {
775 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
776 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
777 if (bytes_read >= strlen (end_boundary)) {
778 char *cp = find_str (buffer, bytes_read, end_boundary);
779
780 if (cp && cp - buffer >= 2 && *--cp == '-' &&
781 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
782 status = OK;
783 break;
784 }
785 } else {
786 /* The start and end boundaries didn't match, or the
787 start boundary doesn't begin with "\n--" (or "--"
788 if at the beginning of buffer). Keep trying. */
789 status = NOTOK;
790 }
791 }
792 } else {
793 status = NOTOK;
794 }
795
796 if (ct->c_fp) {
797 fclose (ct->c_fp);
798 ct->c_fp = NULL;
799 }
800
801 if (status == OK) {
802 *part_boundary = end_boundary;
803 } else {
804 *part_boundary = NULL;
805 free (end_boundary);
806 }
807
808 return status;
809 }
810
811
812 /*
813 * Open and copy ct->c_file to file, replacing the multipart boundary.
814 */
815 static int
replace_boundary(CT ct,char * file,char * boundary)816 replace_boundary (CT ct, char *file, char *boundary) {
817 FILE *fpin, *fpout;
818 int compnum, state;
819 char buf[NMH_BUFSIZ], name[NAMESZ];
820 char *np, *vp;
821 m_getfld_state_t gstate = 0;
822 int status = OK;
823
824 if (ct->c_file == NULL) {
825 inform("missing input filename");
826 return NOTOK;
827 }
828
829 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
830 advise (ct->c_file, "unable to open for reading");
831 return NOTOK;
832 }
833
834 if ((fpout = fopen (file, "w")) == NULL) {
835 fclose (fpin);
836 advise (file, "unable to open for writing");
837 return NOTOK;
838 }
839
840 for (compnum = 1;;) {
841 int bufsz = (int) sizeof buf;
842
843 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
844 case FLD:
845 case FLDPLUS:
846 compnum++;
847
848 /* get copies of the buffers */
849 np = mh_xstrdup (name);
850 vp = mh_xstrdup (buf);
851
852 /* if necessary, get rest of field */
853 while (state == FLDPLUS) {
854 bufsz = sizeof buf;
855 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
856 vp = add (buf, vp); /* add to previous value */
857 }
858
859 if (strcasecmp (TYPE_FIELD, np)) {
860 fprintf (fpout, "%s:%s", np, vp);
861 } else {
862 char *new_ctline, *new_params;
863
864 replace_param(&ct->c_ctinfo.ci_first_pm,
865 &ct->c_ctinfo.ci_last_pm, "boundary",
866 boundary, 0);
867
868 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
869 ct->c_ctinfo.ci_subtype, NULL);
870 new_params = output_params(strlen(TYPE_FIELD) +
871 strlen(new_ctline) + 1,
872 ct->c_ctinfo.ci_first_pm, NULL, 0);
873 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
874 FENDNULL(new_params));
875 free(new_ctline);
876 mh_xfree(new_params);
877 }
878
879 free (vp);
880 free (np);
881
882 continue;
883
884 case BODY:
885 putc('\n', fpout);
886 /* buf will have a terminating NULL, skip it. */
887 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
888 advise (file, "fwrite");
889 }
890 continue;
891
892 case FILEEOF:
893 break;
894
895 case LENERR:
896 case FMTERR:
897 inform("message format error in component #%d", compnum);
898 status = NOTOK;
899 break;
900
901 default:
902 inform("getfld() returned %d", state);
903 status = NOTOK;
904 break;
905 }
906
907 break;
908 }
909
910 m_getfld_state_destroy (&gstate);
911 fclose (fpout);
912 fclose (fpin);
913
914 return status;
915 }
916
917
918 /*
919 * Fix Content-Type header to reflect the content of its part.
920 */
921 static int
fix_types(CT ct,svector_t fixtypes,int * message_mods)922 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
923 int status = OK;
924
925 switch (ct->c_type) {
926 case CT_MULTIPART: {
927 struct multipart *m = (struct multipart *) ct->c_ctparams;
928 struct part *part;
929
930 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
931 status = fix_types (part->mp_part, fixtypes, message_mods);
932 }
933 break;
934 }
935
936 case CT_MESSAGE:
937 if (ct->c_subtype == MESSAGE_EXTERNAL) {
938 struct exbody *e = (struct exbody *) ct->c_ctparams;
939
940 status = fix_types (e->eb_content, fixtypes, message_mods);
941 }
942 break;
943
944 default: {
945 char **typep, *type;
946
947 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
948 for (typep = svector_strs (fixtypes);
949 typep && (type = *typep);
950 ++typep) {
951 char *type_subtype =
952 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
953 NULL);
954
955 if (! strcasecmp (type, type_subtype) &&
956 decode_part (ct) == OK &&
957 ct->c_cefile.ce_file != NULL) {
958 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
959 char *cp;
960
961 if ((cp = strchr (ct_type_subtype, ';'))) {
962 /* Truncate to remove any parameter list from
963 mime_type () result. */
964 *cp = '\0';
965 }
966
967 if (strcasecmp (type, ct_type_subtype)) {
968 char *ct_type, *ct_subtype;
969 HF hf;
970
971 /* The Content-Type header does not match the
972 content, so update these struct Content
973 fields to match:
974 * c_type, c_subtype
975 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
976 * c_ctline
977 */
978 /* Extract type and subtype from type/subtype. */
979 ct_type = mh_xstrdup(ct_type_subtype);
980 if ((cp = strchr (ct_type, '/'))) {
981 *cp = '\0';
982 ct_subtype = mh_xstrdup(++cp);
983 } else {
984 inform("missing / in MIME type of %s %s",
985 ct->c_file, ct->c_partno);
986 free (ct_type);
987 return NOTOK;
988 }
989
990 ct->c_type = ct_str_type (ct_type);
991 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
992
993 free (ct->c_ctinfo.ci_type);
994 ct->c_ctinfo.ci_type = ct_type;
995 free (ct->c_ctinfo.ci_subtype);
996 ct->c_ctinfo.ci_subtype = ct_subtype;
997 if (! replace_substring (&ct->c_ctline, type,
998 ct_type_subtype)) {
999 inform("did not find %s in %s",
1000 type, ct->c_ctline);
1001 }
1002
1003 /* Update Content-Type header field. */
1004 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1005 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1006 if (replace_substring (&hf->value, type,
1007 ct_type_subtype)) {
1008 ++*message_mods;
1009 if (verbosw) {
1010 report (NULL, ct->c_partno, ct->c_file,
1011 "change Content-Type in header "
1012 "from %s to %s",
1013 type, ct_type_subtype);
1014 }
1015 break;
1016 }
1017 inform("did not find %s in %s", type, hf->value);
1018 }
1019 }
1020 }
1021 free (ct_type_subtype);
1022 }
1023 free (type_subtype);
1024 }
1025 }
1026 }}
1027
1028 return status;
1029 }
1030
1031
1032 /*
1033 * Replace a substring, allocating space to hold the new one.
1034 */
1035 char *
replace_substring(char ** str,const char * old,const char * new)1036 replace_substring (char **str, const char *old, const char *new) {
1037 char *cp;
1038
1039 if ((cp = strstr (*str, old))) {
1040 char *remainder = cp + strlen (old);
1041 char *prefix, *new_str;
1042
1043 if (cp - *str) {
1044 prefix = mh_xstrdup(*str);
1045 *(prefix + (cp - *str)) = '\0';
1046 new_str = concat (prefix, new, remainder, NULL);
1047 free (prefix);
1048 } else {
1049 new_str = concat (new, remainder, NULL);
1050 }
1051
1052 free (*str);
1053
1054 return *str = new_str;
1055 }
1056
1057 return NULL;
1058 }
1059
1060
1061 /*
1062 * Remove a name=value parameter, given just its name, from a header value.
1063 */
1064 char *
remove_parameter(char * str,const char * name)1065 remove_parameter (char *str, const char *name) {
1066 /* It looks to me, based on the BNF in RFC 2045, than there can't
1067 be whitespace between the parameter name and the "=", or
1068 between the "=" and the parameter value. */
1069 char *param_name = concat (name, "=", NULL);
1070 char *cp;
1071
1072 if ((cp = strstr (str, param_name))) {
1073 char *start, *end;
1074 size_t count = 1;
1075
1076 /* Remove any leading spaces, before the parameter name. */
1077 for (start = cp;
1078 start > str && isspace ((unsigned char) *(start-1));
1079 --start) {
1080 continue;
1081 }
1082 /* Remove a leading semicolon. */
1083 if (start > str && *(start-1) == ';') { --start; }
1084
1085 end = cp + strlen (name) + 1;
1086 if (*end == '"') {
1087 /* Skip past the quoted value, and then the final quote. */
1088 for (++end ; *end && *end != '"'; ++end) { continue; }
1089 ++end;
1090 } else {
1091 /* Skip past the value. */
1092 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1093 }
1094
1095 /* Count how many characters need to be moved. Include
1096 trailing null, which is accounted for by the
1097 initialization of count to 1. */
1098 for (cp = end; *cp; ++cp) { ++count; }
1099 (void) memmove (start, end, count);
1100 }
1101
1102 free (param_name);
1103
1104 return str;
1105 }
1106
1107
1108 /*
1109 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1110 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1111 * 8 bit.
1112 */
1113 static int
fix_composite_cte(CT ct,int * message_mods)1114 fix_composite_cte (CT ct, int *message_mods) {
1115 int status = OK;
1116
1117 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1118 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1119 ct->c_encoding != CE_BINARY) {
1120 HF hf;
1121
1122 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1123 char *name = hf->name;
1124 for (; *name && isspace ((unsigned char) *name); ++name) {
1125 continue;
1126 }
1127
1128 if (! strncasecmp (name, ENCODING_FIELD,
1129 strlen (ENCODING_FIELD))) {
1130 char *prefix = "Nmh-REPLACED-INVALID-";
1131 HF h;
1132
1133 NEW(h);
1134 h->name = mh_xstrdup (hf->name);
1135 h->hf_encoding = hf->hf_encoding;
1136 h->next = hf->next;
1137 hf->next = h;
1138
1139 /* Retain old header but prefix its name. */
1140 free (hf->name);
1141 hf->name = concat (prefix, h->name, NULL);
1142
1143 ++*message_mods;
1144 if (verbosw) {
1145 char *encoding = cpytrim (hf->value);
1146 report (NULL, ct->c_partno, ct->c_file,
1147 "replace Content-Transfer-Encoding of %s "
1148 "with 8 bit", encoding);
1149 free (encoding);
1150 }
1151
1152 h->value = mh_xstrdup (" 8bit\n");
1153
1154 /* Don't need to warn for multiple C-T-E header
1155 fields, parse_mime() already does that. But
1156 if there are any, fix them all as necessary. */
1157 hf = h;
1158 }
1159 }
1160
1161 set_ce (ct, CE_8BIT);
1162 }
1163
1164 if (ct->c_type == CT_MULTIPART) {
1165 struct multipart *m;
1166 struct part *part;
1167
1168 m = (struct multipart *) ct->c_ctparams;
1169 for (part = m->mp_parts; part; part = part->mp_next) {
1170 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1171 status = NOTOK;
1172 break;
1173 }
1174 }
1175 }
1176 }
1177
1178 return status;
1179 }
1180
1181
1182 /*
1183 * Set content encoding.
1184 */
1185 static int
set_ce(CT ct,int encoding)1186 set_ce (CT ct, int encoding) {
1187 const char *ce = ce_str (encoding);
1188 const struct str2init *ctinit = get_ce_method (ce);
1189
1190 if (ctinit) {
1191 char *cte = concat (" ", ce, "\n", NULL);
1192 int found_cte = 0;
1193 HF hf;
1194 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1195 caller is decode_text_parts (). Save because we'll
1196 overwrite below. */
1197 struct cefile decoded_content_info = ct->c_cefile;
1198
1199 ct->c_encoding = encoding;
1200
1201 ct->c_ctinitfnx = ctinit->si_init;
1202 /* This will assign ct->c_cefile with an all-0 struct, which
1203 is what we want. */
1204 (*ctinit->si_init) (ct);
1205 /* After returning, the caller should set
1206 ct->c_cefile.ce_file to the name of the file containing
1207 the contents. */
1208
1209 if (ct->c_ceclosefnx) {
1210 (*ct->c_ceclosefnx) (ct);
1211 }
1212
1213 /* Restore the cefile. */
1214 ct->c_cefile = decoded_content_info;
1215
1216 /* Update/add Content-Transfer-Encoding header field. */
1217 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1218 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1219 found_cte = 1;
1220 free (hf->value);
1221 hf->value = cte;
1222 }
1223 }
1224 if (! found_cte) {
1225 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1226 }
1227
1228 /* Update c_celine. It's used only by mhlist -debug. */
1229 free (ct->c_celine);
1230 ct->c_celine = mh_xstrdup (cte);
1231
1232 return OK;
1233 }
1234
1235 return NOTOK;
1236 }
1237
1238
1239 /*
1240 * Make sure each text part has a corresponding text/plain part.
1241 */
1242 static int
ensure_text_plain(CT * ct,CT parent,int * message_mods,int replacetextplain)1243 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1244 int status = OK;
1245
1246 switch ((*ct)->c_type) {
1247 case CT_TEXT: {
1248 /* Nothing to do for text/plain. */
1249 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1250
1251 if (parent && parent->c_type == CT_MULTIPART &&
1252 parent->c_subtype == MULTI_ALTERNATE) {
1253 int new_subpart_number = 1;
1254 int has_text_plain =
1255 find_textplain_sibling (parent, replacetextplain,
1256 &new_subpart_number);
1257
1258 if (! has_text_plain) {
1259 /* Parent is a multipart/alternative. Insert a new
1260 text/plain subpart. */
1261 const int inserted =
1262 insert_new_text_plain_part (*ct, new_subpart_number,
1263 parent);
1264 if (inserted) {
1265 ++*message_mods;
1266 if (verbosw) {
1267 report (NULL, parent->c_partno, parent->c_file,
1268 "insert text/plain part");
1269 }
1270 } else {
1271 status = NOTOK;
1272 }
1273 }
1274 } else if (parent && parent->c_type == CT_MULTIPART &&
1275 parent->c_subtype == MULTI_RELATED) {
1276 char *type_subtype =
1277 concat ((*ct)->c_ctinfo.ci_type, "/",
1278 (*ct)->c_ctinfo.ci_subtype, NULL);
1279 const char *parent_type =
1280 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1281 int new_subpart_number = 1;
1282 int has_text_plain = 0;
1283
1284 /* Have to do string comparison on the subtype because we
1285 don't enumerate all of them in c_subtype values.
1286 parent_type will be NULL if the multipart/related part
1287 doesn't have a type parameter. The type parameter must
1288 be specified according to RFC 2387 Sec. 3.1 but not all
1289 messages comply. */
1290 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1291 /* The type of this part matches the root type of the
1292 parent multipart/related. Look to see if there's
1293 text/plain sibling. */
1294 has_text_plain =
1295 find_textplain_sibling (parent, replacetextplain,
1296 &new_subpart_number);
1297 }
1298
1299 free (type_subtype);
1300
1301 if (! has_text_plain) {
1302 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1303 struct part *part;
1304 int siblings = 0;
1305
1306 for (part = mp->mp_parts; part; part = part->mp_next) {
1307 if (*ct != part->mp_part) {
1308 ++siblings;
1309 }
1310 }
1311
1312 if (siblings) {
1313 /* Parent is a multipart/related. Insert a new
1314 text/plain subpart in a new multipart/alternative. */
1315 if (insert_into_new_mp_alt (ct, message_mods)) {
1316 /* Not an error if text/plain couldn't be added. */
1317 }
1318 } else {
1319 /* There are no siblings, so insert a new text/plain
1320 subpart, and change the parent type from
1321 multipart/related to multipart/alternative. */
1322 const int inserted =
1323 insert_new_text_plain_part (*ct, new_subpart_number,
1324 parent);
1325
1326 if (inserted) {
1327 HF hf;
1328
1329 parent->c_subtype = MULTI_ALTERNATE;
1330 free (parent->c_ctinfo.ci_subtype);
1331 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1332 if (! replace_substring (&parent->c_ctline, "/related",
1333 "/alternative")) {
1334 inform("did not find multipart/related in %s",
1335 parent->c_ctline);
1336 }
1337
1338 /* Update Content-Type header field. */
1339 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1340 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1341 if (replace_substring (&hf->value, "/related",
1342 "/alternative")) {
1343 ++*message_mods;
1344 if (verbosw) {
1345 report (NULL, parent->c_partno,
1346 parent->c_file,
1347 "insert text/plain part");
1348 }
1349
1350 /* Remove, e.g., type="text/html" from
1351 multipart/alternative. */
1352 remove_parameter (hf->value, "type");
1353 break;
1354 }
1355 inform("did not find multipart/"
1356 "related in header %s", hf->value);
1357 }
1358 }
1359 } else {
1360 /* Not an error if text/plain couldn't be inserted. */
1361 }
1362 }
1363 }
1364 } else {
1365 if (insert_into_new_mp_alt (ct, message_mods)) {
1366 status = NOTOK;
1367 }
1368 }
1369 break;
1370 }
1371
1372 case CT_MULTIPART: {
1373 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1374 struct part *part;
1375
1376 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1377 if ((*ct)->c_type == CT_MULTIPART) {
1378 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1379 replacetextplain);
1380 }
1381 }
1382 break;
1383 }
1384
1385 case CT_MESSAGE:
1386 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1387 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1388
1389 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1390 replacetextplain);
1391 }
1392 break;
1393 }
1394
1395 return status;
1396 }
1397
1398
1399 /*
1400 * See if there is a sibling text/plain, and return its subpart number.
1401 */
1402 static int
find_textplain_sibling(CT parent,int replacetextplain,int * new_subpart_number)1403 find_textplain_sibling (CT parent, int replacetextplain,
1404 int *new_subpart_number) {
1405 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1406 struct part *part, *prev;
1407 int has_text_plain = 0;
1408
1409 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1410 ++*new_subpart_number;
1411 if (part->mp_part->c_type == CT_TEXT &&
1412 part->mp_part->c_subtype == TEXT_PLAIN) {
1413 if (replacetextplain) {
1414 struct part *old_part;
1415 if (part == mp->mp_parts) {
1416 old_part = mp->mp_parts;
1417 mp->mp_parts = part->mp_next;
1418 } else {
1419 old_part = prev->mp_next;
1420 prev->mp_next = part->mp_next;
1421 }
1422 if (verbosw) {
1423 report (NULL, parent->c_partno, parent->c_file,
1424 "remove text/plain part %s",
1425 old_part->mp_part->c_partno);
1426 }
1427 free_content (old_part->mp_part);
1428 free (old_part);
1429 } else {
1430 has_text_plain = 1;
1431 }
1432 break;
1433 }
1434 prev = part;
1435 }
1436
1437 return has_text_plain;
1438 }
1439
1440
1441 /*
1442 * Insert a new text/plain part.
1443 */
1444 static int
insert_new_text_plain_part(CT ct,int new_subpart_number,CT parent)1445 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1446 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1447 struct part *new_part;
1448
1449 NEW(new_part);
1450 if ((new_part->mp_part = build_text_plain_part (ct))) {
1451 char buffer[16];
1452 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1453
1454 new_part->mp_next = mp->mp_parts;
1455 mp->mp_parts = new_part;
1456 new_part->mp_part->c_partno =
1457 concat (parent->c_partno ? parent->c_partno : "1", ".",
1458 buffer, NULL);
1459
1460 return 1;
1461 }
1462
1463 free_content (new_part->mp_part);
1464 free (new_part);
1465
1466 return 0;
1467 }
1468
1469
1470 /*
1471 * Create a text/plain part to go along with non-plain sibling part.
1472 */
1473 static CT
build_text_plain_part(CT encoded_part)1474 build_text_plain_part (CT encoded_part) {
1475 CT tp_part = divide_part (encoded_part);
1476 char *tmp_plain_file = NULL;
1477
1478 if (decode_part (tp_part) == OK) {
1479 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1480 contains the decoded contents. And the decoding function, such
1481 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1482 be unlinked by free_content (). */
1483 char *tempfile;
1484
1485 /* This m_mktemp2() call closes the temp file. */
1486 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1487 inform("unable to create temporary file in %s",
1488 get_temp_dir());
1489 } else {
1490 tmp_plain_file = mh_xstrdup (tempfile);
1491 if (reformat_part (tp_part, tmp_plain_file,
1492 tp_part->c_ctinfo.ci_type,
1493 tp_part->c_ctinfo.ci_subtype,
1494 tp_part->c_type) == OK) {
1495 return tp_part;
1496 }
1497 }
1498 }
1499
1500 free_content (tp_part);
1501 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1502 free (tmp_plain_file);
1503
1504 return NULL;
1505 }
1506
1507
1508 /*
1509 * Slip new text/plain part into a new multipart/alternative.
1510 */
1511 static int
insert_into_new_mp_alt(CT * ct,int * message_mods)1512 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1513 CT tp_part = build_text_plain_part (*ct);
1514 int status = OK;
1515
1516 if (tp_part) {
1517 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1518 MULTI_ALTERNATE);
1519 if (mp_alt) {
1520 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1521
1522 if (mp && mp->mp_parts) {
1523 mp->mp_parts->mp_part = tp_part;
1524 /* Make the new multipart/alternative the parent. */
1525 *ct = mp_alt;
1526
1527 ++*message_mods;
1528 if (verbosw) {
1529 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1530 "insert text/plain part");
1531 }
1532 } else {
1533 free_content (tp_part);
1534 free_content (mp_alt);
1535 status = NOTOK;
1536 }
1537 } else {
1538 status = NOTOK;
1539 }
1540 } else {
1541 /* Not an error if text/plain couldn't be built. */
1542 }
1543
1544 return status;
1545 }
1546
1547
1548 /*
1549 * Clone a MIME part.
1550 */
1551 static CT
divide_part(CT ct)1552 divide_part (CT ct) {
1553 CT new_part;
1554
1555 NEW0(new_part);
1556 /* Just copy over what is needed for decoding. c_vrsn and
1557 c_celine aren't necessary. */
1558 new_part->c_file = mh_xstrdup (ct->c_file);
1559 new_part->c_begin = ct->c_begin;
1560 new_part->c_end = ct->c_end;
1561 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1562 new_part->c_type = ct->c_type;
1563 new_part->c_cefile = ct->c_cefile;
1564 new_part->c_encoding = ct->c_encoding;
1565 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1566 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1567 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1568 new_part->c_cesizefnx = ct->c_cesizefnx;
1569
1570 /* c_ctline is used by reformat__part(), so it can preserve
1571 anything after the type/subtype. */
1572 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1573
1574 return new_part;
1575 }
1576
1577
1578 /*
1579 * Copy the content info from one part to another.
1580 */
1581 static void
copy_ctinfo(CI dest,CI src)1582 copy_ctinfo (CI dest, CI src) {
1583 PM s_pm, d_pm;
1584
1585 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1586 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1587
1588 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1589 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1590 s_pm->pm_value, 0);
1591 if (s_pm->pm_charset) {
1592 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1593 }
1594 if (s_pm->pm_lang) {
1595 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1596 }
1597 }
1598
1599 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1600 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1601 }
1602
1603
1604 /*
1605 * Decode content.
1606 */
1607 static int
decode_part(CT ct)1608 decode_part (CT ct) {
1609 char *tmp_decoded;
1610 int status;
1611 FILE *file;
1612 char *tempfile;
1613
1614 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1615 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1616 }
1617 tmp_decoded = mh_xstrdup (tempfile);
1618 /* The following call will load ct->c_cefile.ce_file with the tmp
1619 filename of the decoded content. tmp_decoded will contain the
1620 encoded output, get rid of that. */
1621 status = output_message_fp (ct, file, tmp_decoded);
1622 (void) m_unlink (tmp_decoded);
1623 free (tmp_decoded);
1624 if (fclose (file)) {
1625 inform("unable to close temporary file %s, continuing...", tempfile);
1626 }
1627
1628 return status;
1629 }
1630
1631
1632 /*
1633 * Reformat content as plain text.
1634 * Some of the arguments aren't really needed now, but maybe will
1635 * be in the future for other than text types.
1636 */
1637 static int
reformat_part(CT ct,char * file,char * type,char * subtype,int c_type)1638 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1639 int output_subtype, output_encoding;
1640 const char *reason = NULL;
1641 char *cp, *cf;
1642 int status;
1643
1644 /* Hacky: this redirects the output from whatever command is used
1645 to show the part to a file. So, the user can't have any output
1646 redirection in that command.
1647 Could show_multi() in mhshowsbr.c avoid this? */
1648
1649 /* Check for invo_name-format-type/subtype. */
1650 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1651 if (verbosw) {
1652 inform("Don't know how to convert %s, there is no "
1653 "%s-format-%s/%s profile entry",
1654 ct->c_file, invo_name, type, subtype);
1655 }
1656 return NOTOK;
1657 }
1658 if (strchr (cf, '>')) {
1659 inform("'>' prohibited in \"%s\",\nplease fix your "
1660 "%s-format-%s/%s profile entry", cf, invo_name, type,
1661 FENDNULL(subtype));
1662
1663 return NOTOK;
1664 }
1665
1666 cp = concat (cf, " >", file, NULL);
1667 status = show_content_aux (ct, 0, cp, NULL, NULL);
1668 free (cp);
1669
1670 /* Unlink decoded content tmp file and free its filename to avoid
1671 leaks. The file stream should already have been closed. */
1672 if (ct->c_cefile.ce_unlink) {
1673 (void) m_unlink (ct->c_cefile.ce_file);
1674 free (ct->c_cefile.ce_file);
1675 ct->c_cefile.ce_file = NULL;
1676 ct->c_cefile.ce_unlink = 0;
1677 }
1678
1679 if (c_type == CT_TEXT) {
1680 output_subtype = TEXT_PLAIN;
1681 } else {
1682 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1683 output_subtype = 0;
1684 }
1685
1686 output_encoding = content_encoding (ct, &reason);
1687 if (status == OK &&
1688 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1689 ct->c_cefile.ce_file = file;
1690 ct->c_cefile.ce_unlink = 1;
1691 } else {
1692 ct->c_cefile.ce_unlink = 0;
1693 status = NOTOK;
1694 }
1695
1696 return status;
1697 }
1698
1699
1700 /*
1701 * Fill in a multipart/alternative part.
1702 */
1703 static CT
build_multipart_alt(CT first_alt,CT new_part,int type,int subtype)1704 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1705 char *boundary_prefix = "----=_nmh-multipart";
1706 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1707 char *boundary_indicator = "; boundary=";
1708 char *typename, *subtypename, *name;
1709 CT ct;
1710 struct part *p;
1711 struct multipart *m;
1712 const struct str2init *ctinit;
1713
1714 NEW0(ct);
1715
1716 /* Set up the multipart/alternative part. These fields of *ct were
1717 initialized to 0 by mh_xcalloc():
1718 c_fp, c_unlink, c_begin, c_end,
1719 c_vrsn, c_ctline, c_celine,
1720 c_id, c_descr, c_dispo, c_partno,
1721 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1722 c_cefile, c_encoding,
1723 c_digested, c_digest[16], c_ctexbody,
1724 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1725 c_umask, c_rfc934,
1726 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1727 */
1728
1729 ct->c_file = mh_xstrdup (first_alt->c_file);
1730 ct->c_type = type;
1731 ct->c_subtype = subtype;
1732
1733 ctinit = get_ct_init (ct->c_type);
1734
1735 typename = ct_type_str (type);
1736 subtypename = ct_subtype_str (type, subtype);
1737
1738 {
1739 int serial = 0;
1740 int found_boundary = 1;
1741
1742 while (found_boundary && serial < 1000000) {
1743 found_boundary = 0;
1744
1745 /* Ensure that the boundary doesn't appear in the decoded
1746 content. */
1747 if (new_part->c_cefile.ce_file) {
1748 if ((found_boundary =
1749 boundary_in_content (&new_part->c_cefile.ce_fp,
1750 new_part->c_cefile.ce_file,
1751 boundary)) == NOTOK) {
1752 free_content (ct);
1753 return NULL;
1754 }
1755 }
1756
1757 /* Ensure that the boundary doesn't appear in the encoded
1758 content. */
1759 if (! found_boundary && new_part->c_file) {
1760 if ((found_boundary =
1761 boundary_in_content (&new_part->c_fp,
1762 new_part->c_file,
1763 boundary)) == NOTOK) {
1764 free_content (ct);
1765 return NULL;
1766 }
1767 }
1768
1769 if (found_boundary) {
1770 /* Try a slightly different boundary. */
1771 char buffer2[16];
1772
1773 free (boundary);
1774 ++serial;
1775 snprintf (buffer2, sizeof buffer2, "%d", serial);
1776 boundary =
1777 concat (boundary_prefix,
1778 FENDNULL(first_alt->c_partno),
1779 "-", buffer2, NULL);
1780 }
1781 }
1782
1783 if (found_boundary) {
1784 inform("giving up trying to find a unique boundary");
1785 free_content (ct);
1786 return NULL;
1787 }
1788 }
1789
1790 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1791 boundary, "\"", NULL);
1792
1793 /* Load c_first_hf and c_last_hf. */
1794 transfer_noncontent_headers (first_alt, ct);
1795 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1796 free (name);
1797
1798 /* Load c_partno. */
1799 if (first_alt->c_partno) {
1800 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1801 free (first_alt->c_partno);
1802 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1803 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1804 } else {
1805 first_alt->c_partno = mh_xstrdup ("1");
1806 new_part->c_partno = mh_xstrdup ("2");
1807 }
1808
1809 if (ctinit) {
1810 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1811 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1812 }
1813
1814 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1815 "boundary", boundary, 0);
1816
1817 NEW(p);
1818 NEW(p->mp_next);
1819 p->mp_next->mp_next = NULL;
1820 p->mp_next->mp_part = first_alt;
1821
1822 NEW0(m);
1823 m->mp_start = concat (boundary, "\n", NULL);
1824 m->mp_stop = concat (boundary, "--\n", NULL);
1825 m->mp_parts = p;
1826 ct->c_ctparams = m;
1827
1828 free (boundary);
1829
1830 return ct;
1831 }
1832
1833
1834 /*
1835 * Check that the boundary does not appear in the content.
1836 */
1837 static int
boundary_in_content(FILE ** fp,char * file,const char * boundary)1838 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1839 char buffer[NMH_BUFSIZ];
1840 size_t bytes_read;
1841 int found_boundary = 0;
1842
1843 /* free_content() will close *fp if we fopen it here. */
1844 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1845 advise (file, "unable to open %s for reading", file);
1846 return NOTOK;
1847 }
1848
1849 fseeko (*fp, 0L, SEEK_SET);
1850 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1851 if (find_str (buffer, bytes_read, boundary)) {
1852 found_boundary = 1;
1853 break;
1854 }
1855 }
1856
1857 return found_boundary;
1858 }
1859
1860
1861 /*
1862 * Remove all non-Content headers.
1863 */
1864 static void
transfer_noncontent_headers(CT old,CT new)1865 transfer_noncontent_headers (CT old, CT new) {
1866 HF hp, hp_prev;
1867
1868 hp_prev = hp = old->c_first_hf;
1869 while (hp) {
1870 HF next = hp->next;
1871
1872 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1873 if (hp == old->c_last_hf) {
1874 if (hp == old->c_first_hf) {
1875 old->c_last_hf = old->c_first_hf = NULL;
1876 } else {
1877 hp_prev->next = NULL;
1878 old->c_last_hf = hp_prev;
1879 }
1880 } else {
1881 if (hp == old->c_first_hf) {
1882 old->c_first_hf = next;
1883 } else {
1884 hp_prev->next = next;
1885 }
1886 }
1887
1888 /* Put node hp in the new CT. */
1889 if (new->c_first_hf == NULL) {
1890 new->c_first_hf = hp;
1891 } else {
1892 new->c_last_hf->next = hp;
1893 }
1894 new->c_last_hf = hp;
1895 } else {
1896 /* A Content- header, leave in old. */
1897 hp_prev = hp;
1898 }
1899
1900 hp = next;
1901 }
1902 }
1903
1904
1905 /*
1906 * Set content type.
1907 */
1908 static int
set_ct_type(CT ct,int type,int subtype,int encoding)1909 set_ct_type (CT ct, int type, int subtype, int encoding) {
1910 char *typename = ct_type_str (type);
1911 char *subtypename = ct_subtype_str (type, subtype);
1912 /* E.g, " text/plain" */
1913 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1914 /* E.g, " text/plain\n" */
1915 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1916 int found_content_type = 0;
1917 HF hf;
1918 const char *cp = NULL;
1919 char *ctline;
1920 int status;
1921
1922 /* Update/add Content-Type header field. */
1923 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1924 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1925 found_content_type = 1;
1926 free (hf->value);
1927 hf->value = (cp = strchr (ct->c_ctline, ';'))
1928 ? concat (type_subtypename, cp, "\n", NULL)
1929 : mh_xstrdup (name_plus_nl);
1930 }
1931 }
1932 if (! found_content_type) {
1933 add_header (ct, mh_xstrdup (TYPE_FIELD),
1934 (cp = strchr (ct->c_ctline, ';'))
1935 ? concat (type_subtypename, cp, "\n", NULL)
1936 : mh_xstrdup (name_plus_nl));
1937 }
1938
1939 /* Some of these might not be used, but set them anyway. */
1940 ctline = cp
1941 ? concat (type_subtypename, cp, NULL)
1942 : concat (type_subtypename, NULL);
1943 free (ct->c_ctline);
1944 ct->c_ctline = ctline;
1945 /* Leave other ctinfo members as they were. */
1946 free (ct->c_ctinfo.ci_type);
1947 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1948 free (ct->c_ctinfo.ci_subtype);
1949 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1950 ct->c_type = type;
1951 ct->c_subtype = subtype;
1952
1953 free (name_plus_nl);
1954 free (type_subtypename);
1955
1956 status = set_ce (ct, encoding);
1957
1958 return status;
1959 }
1960
1961
1962 /*
1963 * It's not necessary to update the charset parameter of a Content-Type
1964 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1965 * (content) was originally in the specified charset, "and will be in
1966 * that character set again after decoding."
1967 */
1968 static int
decode_text_parts(CT ct,int encoding,const char * decodetypes,int * message_mods)1969 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1970 int *message_mods) {
1971 int status = OK;
1972 int lf_line_endings = 0;
1973
1974 switch (ct->c_type) {
1975 case CT_MULTIPART: {
1976 struct multipart *m = (struct multipart *) ct->c_ctparams;
1977 struct part *part;
1978
1979 /* Should check to see if the body for this part is encoded?
1980 For now, it gets passed along as-is by InitMultiPart(). */
1981 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1982 status = decode_text_parts (part->mp_part, encoding, decodetypes,
1983 message_mods);
1984 }
1985 break;
1986 }
1987
1988 case CT_MESSAGE:
1989 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1990 struct exbody *e = (struct exbody *) ct->c_ctparams;
1991
1992 status = decode_text_parts (e->eb_content, encoding, decodetypes,
1993 message_mods);
1994 }
1995 break;
1996
1997 default:
1998 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1999 break;
2000 }
2001
2002 lf_line_endings =
2003 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
2004
2005 switch (ct->c_encoding) {
2006 case CE_BASE64:
2007 case CE_QUOTED: {
2008 int ct_encoding;
2009
2010 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2011 const char *reason = NULL;
2012
2013 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2014 && encoding != CE_BINARY) {
2015 /* The decoding isn't acceptable so discard it.
2016 Leave status as OK to allow other transformations. */
2017 if (verbosw) {
2018 report (NULL, ct->c_partno, ct->c_file,
2019 "will not decode%s because it is binary (%s)",
2020 ct->c_partno ? ""
2021 : (FENDNULL(ct->c_ctline)),
2022 reason);
2023 }
2024 (void) m_unlink (ct->c_cefile.ce_file);
2025 free (ct->c_cefile.ce_file);
2026 ct->c_cefile.ce_file = NULL;
2027 } else if (ct->c_encoding == CE_QUOTED &&
2028 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2029 /* The decoding isn't acceptable so discard it.
2030 Leave status as OK to allow other transformations. */
2031 if (verbosw) {
2032 report (NULL, ct->c_partno, ct->c_file,
2033 "will not decode%s because it is 8bit",
2034 ct->c_partno ? ""
2035 : (FENDNULL(ct->c_ctline)));
2036 }
2037 (void) m_unlink (ct->c_cefile.ce_file);
2038 free (ct->c_cefile.ce_file);
2039 ct->c_cefile.ce_file = NULL;
2040 } else {
2041 int enc;
2042
2043 if (ct_encoding == CE_BINARY) {
2044 enc = CE_BINARY;
2045 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2046 enc = CE_QUOTED;
2047 } else {
2048 enc = ct_encoding;
2049 }
2050 if (set_ce (ct, enc) == OK) {
2051 ++*message_mods;
2052 if (verbosw) {
2053 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2054 FENDNULL(ct->c_ctline));
2055 }
2056 if (lf_line_endings) {
2057 strip_crs (ct, message_mods);
2058 }
2059 } else {
2060 status = NOTOK;
2061 }
2062 }
2063 } else {
2064 status = NOTOK;
2065 }
2066 break;
2067 }
2068 case CE_8BIT:
2069 case CE_7BIT:
2070 if (lf_line_endings) {
2071 strip_crs (ct, message_mods);
2072 }
2073 break;
2074 default:
2075 break;
2076 }
2077
2078 break;
2079 }
2080
2081 return status;
2082 }
2083
2084
2085 /*
2086 * Determine if the part with type[/subtype] should be decoded, according to
2087 * decodetypes (which came from the -decodetypes switch).
2088 */
2089 static int
should_decode(const char * decodetypes,const char * type,const char * subtype)2090 should_decode(const char *decodetypes, const char *type, const char *subtype) {
2091 /* Quick search for matching type[/subtype] in decodetypes: bracket
2092 decodetypes with commas, then search for ,type, and ,type/subtype, in
2093 it. */
2094
2095 int found_match = 0;
2096 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2097 char *delimited_type = concat(",", type, ",", NULL);
2098
2099 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2100 found_match = 1;
2101 } else if (subtype != NULL) {
2102 char *delimited_type_subtype =
2103 concat(",", type, "/", subtype, ",", NULL);
2104
2105 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2106 found_match = 1;
2107 }
2108 free(delimited_type_subtype);
2109 }
2110
2111 free(delimited_type);
2112 free(delimited_decodetypes);
2113
2114 return found_match;
2115 }
2116
2117
2118 /*
2119 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2120 * if it has any NUL characters, a CR not followed by a LF, or lines
2121 * greater than 998 characters in length. If binary, reason is set
2122 * to a string explaining why.
2123 */
2124 static int
content_encoding(CT ct,const char ** reason)2125 content_encoding (CT ct, const char **reason) {
2126 CE ce = &ct->c_cefile;
2127 int encoding = CE_7BIT;
2128
2129 if (ce->ce_file) {
2130 size_t line_len = 0;
2131 char buffer[NMH_BUFSIZ];
2132 size_t inbytes;
2133
2134 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2135 advise (ce->ce_file, "unable to open for reading");
2136 return CE_UNKNOWN;
2137 }
2138
2139 fseeko (ce->ce_fp, 0L, SEEK_SET);
2140 while (encoding != CE_BINARY &&
2141 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2142 char *cp;
2143 size_t i;
2144 int last_char_was_cr = 0;
2145
2146 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2147 if (*cp == '\0' || ++line_len > 998 ||
2148 (*cp != '\n' && last_char_was_cr)) {
2149 encoding = CE_BINARY;
2150 if (*cp == '\0') {
2151 *reason = "null character";
2152 } else if (line_len > 998) {
2153 *reason = "line length > 998";
2154 } else if (*cp != '\n' && last_char_was_cr) {
2155 *reason = "CR not followed by LF";
2156 } else {
2157 /* Should not reach this. */
2158 *reason = "";
2159 }
2160 break;
2161 }
2162 if (*cp == '\n') {
2163 line_len = 0;
2164 } else if (! isascii ((unsigned char) *cp)) {
2165 encoding = CE_8BIT;
2166 }
2167
2168 last_char_was_cr = *cp == '\r';
2169 }
2170 }
2171
2172 fclose (ce->ce_fp);
2173 ce->ce_fp = NULL;
2174 } /* else should never happen */
2175
2176 return encoding;
2177 }
2178
2179
2180 /*
2181 * Strip carriage returns from content.
2182 */
2183 static int
strip_crs(CT ct,int * message_mods)2184 strip_crs (CT ct, int *message_mods) {
2185 char *charset = content_charset (ct);
2186 int status = OK;
2187
2188 /* Only strip carriage returns if content is ASCII or another
2189 charset that has the same readily recognizable CR followed by a
2190 LF. We can include UTF-8 here because if the high-order bit of
2191 a UTF-8 byte is 0, then it must be a single-byte ASCII
2192 character. */
2193 if (! strcasecmp (charset, "US-ASCII") ||
2194 ! strcasecmp (charset, "UTF-8") ||
2195 ! strncasecmp (charset, "ISO-8859-", 9) ||
2196 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2197 char **file = NULL;
2198 FILE **fp = NULL;
2199 size_t begin;
2200 size_t end;
2201 int has_crs = 0;
2202 int opened_input_file = 0;
2203
2204 if (ct->c_cefile.ce_file) {
2205 file = &ct->c_cefile.ce_file;
2206 fp = &ct->c_cefile.ce_fp;
2207 begin = end = 0;
2208 } else if (ct->c_file) {
2209 file = &ct->c_file;
2210 fp = &ct->c_fp;
2211 begin = (size_t) ct->c_begin;
2212 end = (size_t) ct->c_end;
2213 } /* else don't know where the content is */
2214
2215 if (file && *file && fp) {
2216 if (! *fp) {
2217 if ((*fp = fopen (*file, "r")) == NULL) {
2218 advise (*file, "unable to open for reading");
2219 status = NOTOK;
2220 } else {
2221 opened_input_file = 1;
2222 }
2223 }
2224 }
2225
2226 if (fp && *fp) {
2227 char buffer[NMH_BUFSIZ];
2228 size_t bytes_read;
2229 size_t bytes_to_read =
2230 end > 0 && end > begin ? end - begin : sizeof buffer;
2231
2232 fseeko (*fp, begin, SEEK_SET);
2233 while ((bytes_read = fread (buffer, 1,
2234 min (bytes_to_read, sizeof buffer),
2235 *fp)) > 0) {
2236 /* Look for CR followed by a LF. This is supposed to
2237 be text so there should be LF's. If not, don't
2238 modify the content. */
2239 char *cp;
2240 size_t i;
2241 int last_char_was_cr = 0;
2242
2243 if (end > 0) { bytes_to_read -= bytes_read; }
2244
2245 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2246 if (*cp == '\n' && last_char_was_cr) {
2247 has_crs = 1;
2248 break;
2249 }
2250
2251 last_char_was_cr = *cp == '\r';
2252 }
2253 }
2254
2255 if (has_crs) {
2256 int fd;
2257 char *stripped_content_file;
2258 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2259
2260 if (tempfile == NULL) {
2261 adios (NULL, "unable to create temporary file in %s",
2262 get_temp_dir());
2263 }
2264 stripped_content_file = mh_xstrdup (tempfile);
2265
2266 /* Strip each CR before a LF from the content. */
2267 fseeko (*fp, begin, SEEK_SET);
2268 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2269 0) {
2270 char *cp;
2271 size_t i;
2272 int last_char_was_cr = 0;
2273
2274 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2275 if (*cp == '\r') {
2276 last_char_was_cr = 1;
2277 } else if (last_char_was_cr) {
2278 if (*cp != '\n') {
2279 if (write (fd, "\r", 1) < 0) {
2280 advise (tempfile, "CR write");
2281 }
2282 }
2283 if (write (fd, cp, 1) < 0) {
2284 advise (tempfile, "write");
2285 }
2286 last_char_was_cr = 0;
2287 } else {
2288 if (write (fd, cp, 1) < 0) {
2289 advise (tempfile, "write");
2290 }
2291 last_char_was_cr = 0;
2292 }
2293 }
2294 }
2295
2296 if (close (fd)) {
2297 inform("unable to write temporary file %s, continuing...",
2298 stripped_content_file);
2299 (void) m_unlink (stripped_content_file);
2300 status = NOTOK;
2301 } else {
2302 /* Replace the decoded file with the converted one. */
2303 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2304 (void) m_unlink (ct->c_cefile.ce_file);
2305
2306 mh_xfree(ct->c_cefile.ce_file);
2307 ct->c_cefile.ce_file = stripped_content_file;
2308 ct->c_cefile.ce_unlink = 1;
2309
2310 ++*message_mods;
2311 if (verbosw) {
2312 report (NULL, ct->c_partno,
2313 begin == 0 && end == 0 ? "" : *file,
2314 "stripped CRs");
2315 }
2316 }
2317 }
2318
2319 if (opened_input_file) {
2320 fclose (*fp);
2321 *fp = NULL;
2322 }
2323 }
2324 }
2325
2326 free (charset);
2327
2328 return status;
2329 }
2330
2331
2332 /*
2333 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2334 * of the part C-T-E's.
2335 */
2336 static void
update_cte(CT ct)2337 update_cte (CT ct) {
2338 const int least_restrictive_enc = least_restrictive_encoding (ct);
2339
2340 if (least_restrictive_enc != CE_UNKNOWN &&
2341 least_restrictive_enc != CE_7BIT) {
2342 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2343 HF hf;
2344 int found_cte = 0;
2345
2346 /* Update/add Content-Transfer-Encoding header field. */
2347 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2348 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2349 found_cte = 1;
2350 free (hf->value);
2351 hf->value = cte;
2352 }
2353 }
2354 if (! found_cte) {
2355 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2356 }
2357 }
2358 }
2359
2360
2361 /*
2362 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2363 * within a message.
2364 */
2365 static int
least_restrictive_encoding(CT ct)2366 least_restrictive_encoding (CT ct) {
2367 int encoding = CE_UNKNOWN;
2368
2369 switch (ct->c_type) {
2370 case CT_MULTIPART: {
2371 struct multipart *m = (struct multipart *) ct->c_ctparams;
2372 struct part *part;
2373
2374 for (part = m->mp_parts; part; part = part->mp_next) {
2375 const int part_encoding =
2376 least_restrictive_encoding (part->mp_part);
2377
2378 if (less_restrictive (encoding, part_encoding)) {
2379 encoding = part_encoding;
2380 }
2381 }
2382 break;
2383 }
2384
2385 case CT_MESSAGE:
2386 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2387 struct exbody *e = (struct exbody *) ct->c_ctparams;
2388 const int part_encoding =
2389 least_restrictive_encoding (e->eb_content);
2390
2391 if (less_restrictive (encoding, part_encoding)) {
2392 encoding = part_encoding;
2393 }
2394 }
2395 break;
2396
2397 default: {
2398 if (less_restrictive (encoding, ct->c_encoding)) {
2399 encoding = ct->c_encoding;
2400 }
2401 }}
2402
2403 return encoding;
2404 }
2405
2406
2407 /*
2408 * Return whether the second encoding is less restrictive than the first, where
2409 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2410 * CE_BINARY is less restrictive than CE_8BIT and
2411 * CE_8BIT is less restrictive than CE_7BIT.
2412 */
2413 static int
less_restrictive(int encoding,int second_encoding)2414 less_restrictive (int encoding, int second_encoding) {
2415 switch (second_encoding) {
2416 case CE_BINARY:
2417 return encoding != CE_BINARY;
2418 case CE_8BIT:
2419 return encoding != CE_BINARY && encoding != CE_8BIT;
2420 case CE_7BIT:
2421 return encoding != CE_BINARY && encoding != CE_8BIT &&
2422 encoding != CE_7BIT;
2423 default :
2424 return 0;
2425 }
2426 }
2427
2428
2429 /*
2430 * Convert character set of each part.
2431 */
2432 static int
convert_charsets(CT ct,char * dest_charset,int * message_mods)2433 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2434 int status = OK;
2435
2436 switch (ct->c_type) {
2437 case CT_TEXT:
2438 if (ct->c_subtype == TEXT_PLAIN) {
2439 status = convert_charset (ct, dest_charset, message_mods);
2440 if (status == OK) {
2441 if (verbosw) {
2442 char *ct_charset = content_charset (ct);
2443
2444 report (NULL, ct->c_partno, ct->c_file,
2445 "convert %s to %s", ct_charset, dest_charset);
2446 free (ct_charset);
2447 }
2448 } else {
2449 char *ct_charset = content_charset (ct);
2450
2451 report ("iconv", ct->c_partno, ct->c_file,
2452 "failed to convert %s to %s", ct_charset, dest_charset);
2453 free (ct_charset);
2454 }
2455 }
2456 break;
2457
2458 case CT_MULTIPART: {
2459 struct multipart *m = (struct multipart *) ct->c_ctparams;
2460 struct part *part;
2461
2462 /* Should check to see if the body for this part is encoded?
2463 For now, it gets passed along as-is by InitMultiPart(). */
2464 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2465 status =
2466 convert_charsets (part->mp_part, dest_charset, message_mods);
2467 }
2468 break;
2469 }
2470
2471 case CT_MESSAGE:
2472 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2473 struct exbody *e = (struct exbody *) ct->c_ctparams;
2474
2475 status =
2476 convert_charsets (e->eb_content, dest_charset, message_mods);
2477 }
2478 break;
2479
2480 default:
2481 break;
2482 }
2483
2484 return status;
2485 }
2486
2487
2488 /*
2489 * Fix various problems that aren't handled elsewhere. These
2490 * are fixed unconditionally: there are no switches to disable
2491 * them. Currently, "problems" are these:
2492 * 1) remove extraneous semicolon at the end of a header parameter list
2493 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2494 * filename parameters in Content-Type and Content-Disposition
2495 * headers, respectively.
2496 */
2497 static int
fix_always(CT ct,int * message_mods)2498 fix_always (CT ct, int *message_mods) {
2499 int status = OK;
2500
2501 switch (ct->c_type) {
2502 case CT_MULTIPART: {
2503 struct multipart *m = (struct multipart *) ct->c_ctparams;
2504 struct part *part;
2505
2506 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2507 status = fix_always (part->mp_part, message_mods);
2508 }
2509 break;
2510 }
2511
2512 case CT_MESSAGE:
2513 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2514 struct exbody *e = (struct exbody *) ct->c_ctparams;
2515
2516 status = fix_always (e->eb_content, message_mods);
2517 }
2518 break;
2519
2520 default: {
2521 HF hf;
2522
2523 if (ct->c_first_hf) {
2524 fix_filename_encoding (ct);
2525 }
2526
2527 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2528 size_t len = strlen (hf->value);
2529
2530 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2531 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2532 /* Only do this for Content-Type and
2533 Content-Disposition fields because those are the
2534 only headers that parse_mime() warns about. */
2535 continue;
2536 }
2537
2538 /* whitespace following a trailing ';' will be nuked as well */
2539 if (hf->value[len - 1] == '\n') {
2540 while (isspace((unsigned char)(hf->value[len - 2]))) {
2541 if (len-- == 0) { break; }
2542 }
2543 }
2544
2545 if (hf->value[len - 2] == ';') {
2546 /* Remove trailing ';' from parameter value. */
2547 hf->value[len - 2] = '\n';
2548 hf->value[len - 1] = '\0';
2549
2550 /* Also, if Content-Type parameter, remove trailing ';'
2551 from ct->c_ctline. This probably isn't necessary
2552 but can't hurt. */
2553 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2554 size_t l = strlen(ct->c_ctline) - 1;
2555 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2556 ct->c_ctline[l] == ';') {
2557 ct->c_ctline[l--] = '\0';
2558 if (l == 0) { break; }
2559 }
2560 }
2561
2562 ++*message_mods;
2563 if (verbosw) {
2564 report (NULL, ct->c_partno, ct->c_file,
2565 "remove trailing ; from %s parameter value",
2566 hf->name);
2567 }
2568 }
2569 }
2570 }}
2571
2572 return status;
2573 }
2574
2575
2576 /*
2577 * Factor out common code for loops in fix_filename_encoding().
2578 */
2579 static int
fix_filename_param(char * name,char * value,PM * first_pm,PM * last_pm)2580 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
2581 int fixed = 0;
2582
2583 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2584 /* Looks like an RFC 2047 encoded parameter. */
2585 char decoded[PATH_MAX + 1];
2586
2587 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2588 /* Encode using RFC 2231. */
2589 replace_param (first_pm, last_pm, name, decoded, 0);
2590 fixed = 1;
2591 } else {
2592 inform("failed to decode %s parameter %s", name, value);
2593 }
2594 }
2595
2596 return fixed;
2597 }
2598
2599
2600 /*
2601 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2602 * filename parameters in Content-Type and Content-Disposition
2603 * headers, respectively.
2604 */
2605 static int
fix_filename_encoding(CT ct)2606 fix_filename_encoding (CT ct) {
2607 PM pm;
2608 HF hf;
2609 int fixed = 0;
2610
2611 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2612 if (pm->pm_name && pm->pm_value &&
2613 strcasecmp (pm->pm_name, "name") == 0) {
2614 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2615 &ct->c_ctinfo.ci_first_pm,
2616 &ct->c_ctinfo.ci_last_pm);
2617 }
2618 }
2619
2620 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2621 if (pm->pm_name && pm->pm_value &&
2622 strcasecmp (pm->pm_name, "filename") == 0) {
2623 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2624 &ct->c_dispo_first,
2625 &ct->c_dispo_last);
2626 }
2627 }
2628
2629 /* Fix hf values to correspond. */
2630 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2631 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2632
2633 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2634 field = TYPE_HEADER;
2635 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2636 field = DISPO_HEADER;
2637 }
2638
2639 if (field != OTHER) {
2640 const char *const semicolon_loc = strchr (hf->value, ';');
2641
2642 if (semicolon_loc) {
2643 const size_t len =
2644 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2645 const char *const params =
2646 output_params (len,
2647 field == TYPE_HEADER
2648 ? ct->c_ctinfo.ci_first_pm
2649 : ct->c_dispo_first,
2650 NULL, 0);
2651 const char *const new_params = concat (params, "\n", NULL);
2652
2653 replace_substring (&hf->value, semicolon_loc, new_params);
2654 free((void *)new_params); /* Cast away const. Sigh. */
2655 free((void *)params);
2656 } else {
2657 inform("did not find semicolon in %s:%s\n",
2658 hf->name, hf->value);
2659 }
2660 }
2661 }
2662
2663 return OK;
2664 }
2665
2666
2667 /*
2668 * Output content in input file to output file.
2669 */
2670 static int
write_content(CT ct,const char * input_filename,char * outfile,FILE * outfp,int modify_inplace,int message_mods)2671 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2672 int modify_inplace, int message_mods) {
2673 int status = OK;
2674
2675 if (modify_inplace) {
2676 if (message_mods > 0) {
2677 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2678 char *infile = input_filename
2679 ? mh_xstrdup (input_filename)
2680 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2681
2682 if (remove_file (infile) == OK) {
2683 if (rename (outfile, infile)) {
2684 /* Rename didn't work, possibly because of an
2685 attempt to rename across filesystems. Try
2686 brute force copy. */
2687 int old = open (outfile, O_RDONLY);
2688 int new =
2689 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2690 int i = -1;
2691
2692 if (old != -1 && new != -1) {
2693 char buffer[NMH_BUFSIZ];
2694
2695 while ((i = read (old, buffer, sizeof buffer)) >
2696 0) {
2697 if (write (new, buffer, i) != i) {
2698 i = -1;
2699 break;
2700 }
2701 }
2702 }
2703 if (new != -1) { close (new); }
2704 if (old != -1) { close (old); }
2705 (void) m_unlink (outfile);
2706
2707 if (i < 0) {
2708 /* The -file argument processing used path() to
2709 expand filename to absolute path. */
2710 int file = ct->c_file && ct->c_file[0] == '/';
2711
2712 inform("unable to rename %s %s to %s, continuing...",
2713 file ? "file" : "message", outfile,
2714 infile);
2715 status = NOTOK;
2716 }
2717 }
2718 } else {
2719 inform("unable to remove input file %s, "
2720 "not modifying it, continuing...", infile);
2721 (void) m_unlink (outfile);
2722 status = NOTOK;
2723 }
2724
2725 free (infile);
2726 } else {
2727 status = NOTOK;
2728 }
2729 } else {
2730 /* No modifications and didn't need the tmp outfile. */
2731 (void) m_unlink (outfile);
2732 }
2733 } else {
2734 /* Output is going to some file. Produce it whether or not
2735 there were modifications. */
2736 status = output_message_fp (ct, outfp, outfile);
2737 }
2738
2739 flush_errors ();
2740 return status;
2741 }
2742
2743
2744 /*
2745 * parse_mime() does not set lf_line_endings in struct text, so use this
2746 * function to do it. It touches the parts the decodetypes identifies.
2747 */
2748 static void
set_text_ctparams(CT ct,char * decodetypes,int lf_line_endings)2749 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2750 switch (ct->c_type) {
2751 case CT_MULTIPART: {
2752 struct multipart *m = (struct multipart *) ct->c_ctparams;
2753 struct part *part;
2754
2755 for (part = m->mp_parts; part; part = part->mp_next) {
2756 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2757 }
2758 break;
2759 }
2760
2761 case CT_MESSAGE:
2762 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2763 struct exbody *e = (struct exbody *) ct->c_ctparams;
2764
2765 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2766 }
2767 break;
2768
2769 default:
2770 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2771 if (ct->c_ctparams == NULL) {
2772 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2773 }
2774 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2775 }
2776 }
2777 }
2778
2779
2780 /*
2781 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2782 * use the standard MH backup file.
2783 */
2784 static int
remove_file(const char * file)2785 remove_file (const char *file) {
2786 if (rmmproc) {
2787 char *rmm_command = concat (rmmproc, " ", file, NULL);
2788 int status = system (rmm_command);
2789
2790 free (rmm_command);
2791 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2792 }
2793 /* This is OK for a non-message file, it still uses the
2794 BACKUP_PREFIX form. The backup file will be in the same
2795 directory as file. */
2796 return rename (file, m_backup (file));
2797 }
2798
2799
2800 /*
2801 * Output formatted message to user.
2802 */
2803 static void
report(char * what,char * partno,char * filename,char * message,...)2804 report (char *what, char *partno, char *filename, char *message, ...) {
2805 va_list args;
2806 char *fmt;
2807
2808 if (verbosw) {
2809 va_start (args, message);
2810 fmt = concat (filename, partno ? " part " : ", ",
2811 FENDNULL(partno), partno ? ", " : "", message, NULL);
2812
2813 advertise (what, NULL, fmt, args);
2814
2815 free (fmt);
2816 va_end (args);
2817 }
2818 }
2819
2820
2821 static void
pipeser(int i)2822 pipeser (int i)
2823 {
2824 if (i == SIGQUIT) {
2825 fflush (stdout);
2826 fprintf (stderr, "\n");
2827 fflush (stderr);
2828 }
2829
2830 done (1);
2831 /* NOTREACHED */
2832 }
2833