1 /* wvWare
2 * Copyright (C) Caolan McNamara, Dom Lachowicz, and others
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17 * 02111-1307, USA.
18 */
19
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <errno.h>
27 #include <string.h>
28
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32
33 #include <time.h>
34
35 #include "getopt.h"
36 #include "wv.h"
37
38 /* By Dom Lachowicz (cinamod@hotmail.com) */
39
40 /* i don't like appearing to use printf */
41 #define rtf_output printf
42 #define rtf_output_char(c) do {rtf_output("%c", (c));} while(0)
43 #define ENSURE_BUF() fflush(stdout)
44
45 typedef struct _rtfUserData {
46 /* formatting variables */
47
48 /* cached integer values */
49 int cFont;
50 int cFontSize;
51 int cCol;
52
53 /* boolean formats */
54 int bIsBold:1;
55 int bIsItalic:1;
56 int bIsStrike:1;
57 int bIsUl:1;
58 int bIsSup:1;
59 int bIsSub:1;
60
61 /* paragraph related */
62 int bInPara:1;
63 int bInSec:1;
64 } rtfUserData;
65
66 /* this gets printed at the top of each document in the {\fonttbl section */
67
68 struct _fontMapping {
69 char *word;
70 char *rtf;
71 };
72
73 /* TODO: build me up appropriately */
74 static struct _fontMapping fontMap[] = {
75 {"Arial", "Arial"},
76 {"Bitstream Charter", "Bitstream Charter"},
77 {"Bookman", "Bookman"},
78 {"Courier", "Courier"},
79 {"Courier New", "Courier New"},
80 {"Century Schoolbook", "Century Schoolbook"},
81 {"Dingbats", "Dingbats"},
82 {"Goth", "Goth"},
83 {"Nimbus Sans", "Nimbus Sans"},
84 {"Palladio", "Palladio"},
85 {"Standard Symbol", "Standard Symbol"},
86 {"Symbol", "Symbol"},
87 {"Times", "Times New Roman"},
88 {"Times New Roman", "Times New Roman"},
89 };
90
91 #define FontTblSize (sizeof(fontMap)/sizeof(fontMap[0]))
92 #define DFL_FONT_INDEX 13 /* I map this to whatever "Times New Roman" is */
93 static void
output_fonttable(void)94 output_fonttable (void)
95 {
96 int i;
97
98 rtf_output ("{\\fonttbl\n");
99
100 for (i = 0; i < FontTblSize; i++)
101 rtf_output ("{\\f%d\\fnil\\fcharset0\\fprq0\\fttruetype %s;}\n", i,
102 fontMap[i].rtf);
103
104 rtf_output ("}\n");
105 }
106
107 /* map the MSWord name to the corresponding RTF name index */
108 static int
mapFont(const char * name)109 mapFont (const char *name)
110 {
111 int k;
112
113 for (k = 0; k < FontTblSize; k++)
114 if (!strcasecmp (fontMap[k].word, name))
115 return k;
116
117 return DFL_FONT_INDEX;
118 }
119
120 #undef DFL_FONT_INDEX
121 #undef FontTblSize
122
123 /* this gets printed at the top of each document in the {\colortbl section */
124 static int colorTable[][3] = {
125 {0x00, 0x00, 0x00}, /* black */
126 {0x00, 0x00, 0xff}, /* blue */
127 {0x00, 0xff, 0xff}, /* cyan */
128 {0x00, 0xff, 0x00}, /* green */
129 {0xff, 0x00, 0xff}, /* magenta */
130 {0xff, 0x00, 0x00}, /* red */
131 {0xff, 0xff, 0x00}, /* yellow */
132 {0xff, 0xff, 0xff}, /* white */
133 {0x00, 0x00, 0x80}, /* dark blue */
134 {0x00, 0x80, 0x80}, /* dark cyan */
135 {0x00, 0x80, 0x00}, /* dark green */
136 {0x80, 0x00, 0x80}, /* dark magenta */
137 {0x80, 0x00, 0x00}, /* dark red */
138 {0x80, 0x80, 0x00}, /* dark yellow */
139 {0x80, 0x80, 0x80}, /* dark gray */
140 {0xc0, 0xc0, 0xc0}, /* light gray */
141 };
142
143 /* rtf names for the color_table as above */
144 static char *rtfColors[] = {
145 "\\cf0", /* black */
146 "\\cf1", /* blue */
147 "\\cf2", /* cyan */
148 "\\cf3", /* green */
149 "\\cf4", /* magenta */
150 "\\cf5", /* red */
151 "\\cf6", /* yellow */
152 "\\cf7", /* white */
153 "\\cf8", /* dark blue */
154 "\\cf9", /* dark cyan */
155 "\\cf10", /* dark green */
156 "\\cf11", /* dark magenta */
157 "\\cf12", /* dark red */
158 "\\cf13", /* dark yellow */
159 "\\cf14", /* dark gray */
160 "\\cf15", /* light gray */
161 };
162
163 #define RED(i) colorTable[(i)][0]
164 #define GREEN(i) colorTable[(i)][1]
165 #define BLUE(i) colorTable[(i)][2]
166 #define ClrTblSize (sizeof(colorTable)/sizeof(colorTable[0]))
167 static void
output_colortable(void)168 output_colortable (void)
169 {
170 int i;
171
172 rtf_output ("{\\colortbl\n");
173
174 for (i = 0; i < ClrTblSize; i++)
175 {
176 rtf_output ("\\red%d\\green%d\\blue%d;\n", RED (i), GREEN (i),
177 BLUE (i));
178 }
179
180 rtf_output ("}\n");
181 }
182
183 #undef RED
184 #undef GREEN
185 #undef BLUE
186 #undef ClrTblSize
187
188 static void
output_rtfUserData(rtfUserData * ud)189 output_rtfUserData (rtfUserData * ud)
190 {
191 /* add the initial bracket */
192 rtf_output_char ('{');
193
194 /* font color */
195 rtf_output (rtfColors[ud->cCol]);
196
197 /* font face */
198 rtf_output ("\\f%d", ud->cFont);
199
200 /* font size */
201 rtf_output ("\\fs%d", ud->cFontSize);
202
203 /* italic text */
204 if (ud->bIsItalic)
205 rtf_output ("\\i");
206
207 /* bold text */
208 if (ud->bIsBold)
209 rtf_output ("\\b");
210
211 /* underline and strike-through */
212 if (ud->bIsUl)
213 rtf_output ("\\ul");
214 if (ud->bIsStrike)
215 rtf_output ("\\strike");
216
217 /* sub/superscript */
218 if (ud->bIsSup)
219 {
220 rtf_output ("\\super");
221 }
222 else if (ud->bIsSub)
223 {
224 rtf_output ("\\sub");
225 }
226 /* add the final space */
227 rtf_output_char (' ');
228 }
229
230 static void
fill_rtfUserData(rtfUserData * ud,CHP * chp,wvParseStruct * ps)231 fill_rtfUserData (rtfUserData * ud, CHP * chp, wvParseStruct * ps)
232 {
233 char *fname = NULL;
234
235 if (!ps->fib.fFarEast)
236 {
237 fname = wvGetFontnameFromCode (&ps->fonts, chp->ftcAscii);
238 }
239 else
240 {
241 fname = wvGetFontnameFromCode (&ps->fonts, chp->ftcFE);
242 }
243
244 ud->cCol = 0;
245 if (chp->ico)
246 ud->cCol = chp->ico - 1;
247
248 ud->cFont = mapFont (fname);
249 ud->cFontSize = chp->hps;
250 ud->bIsBold = (chp->fBold);
251 ud->bIsItalic = (chp->fItalic);
252 ud->bIsUl = (chp->kul);
253 ud->bIsStrike = (chp->fStrike);
254 ud->bIsSup = (chp->iss == 1);
255 ud->bIsSub = (chp->iss == 2);
256 free(fname);
257 }
258
259 static void
handleImage(Blip * b,long width,long height)260 handleImage (Blip * b, long width, long height)
261 {
262
263 /* TODO: image support */
264 wvStream * pwv = NULL;
265 size_t size = 0;
266 int data = 0;
267 int cnt = 0;
268 int tag = time (NULL);
269
270 /* short-circuit this method if we don't support
271 the incoming format */
272 switch (b->type)
273 {
274 case msoblipPNG:
275 /* conveniently I know how to export to PNG */
276 rtf_output
277 ("{\\*\\shppict{\\pict\\pngblip\\picw%d\\pich%d\\picwgoal\\pichgoal\n",
278 width, height);
279 break;
280 case msoblipDIB:
281 case msoblipWMF:
282 case msoblipEMF:
283 case msoblipPICT:
284 case msoblipJPEG:
285 default:
286 /* TODO: support other image types */
287 return;
288 }
289
290 rtf_output ("\bliptag%d{\\*\\blipuid%032x}", tag, tag);
291
292 pwv = b->blip.bitmap.m_pvBits;
293 size = wvStream_size (pwv);
294 wvStream_rewind(pwv);
295 while (cnt < size)
296 {
297 if (cnt++ % 64 == 0)
298 rtf_output_char ('\n');
299 rtf_output ("%02x", read_8ubit(pwv));
300 }
301
302 rtf_output_char ('}');
303 }
304
305 static int
charProc(wvParseStruct * ps,U16 eachchar,U8 chartype,U16 lid)306 charProc (wvParseStruct * ps, U16 eachchar, U8 chartype, U16 lid)
307 {
308
309 /* convert incoming character to unicode */
310 if (chartype)
311 eachchar = wvHandleCodePage (eachchar, lid);
312
313 /* take care of any oddities in Microsoft's character "encoding" */
314 /* TODO: does the above code page handler take care of these? */
315 if (chartype == 1 && eachchar == 146)
316 eachchar = 39; /* apostrophe */
317
318 switch (eachchar)
319 {
320 case 13: /* paragraph end */
321 return 0;
322
323 case 11: /* hard line break */
324 break;
325
326 case 12: /* page breaks, section marks */
327 break;
328
329 case 14: /* column break */
330 break;
331
332 case 19: /* field begin */
333 /* flush current text buffer */
334 ps->fieldstate++;
335 ps->fieldmiddle = 0;
336 return 0;
337 case 20: /* field separator */
338 ps->fieldmiddle = 1;
339 return 0;
340 case 21: /* field end */
341 ps->fieldstate--;
342 ps->fieldmiddle = 0;
343 return 0;
344
345 default:
346 break;
347 }
348
349 /* todo: properly handle fields */
350 if (eachchar == 0x13 || eachchar == 0x14)
351 return 0;
352
353 /* properly escape this */
354 if (eachchar == '{' || eachchar == '}')
355 rtf_output_char ('\\');
356
357 rtf_output_char (eachchar);
358 return 0;
359 }
360
361 static int
specCharProc(wvParseStruct * ps,U16 eachchar,CHP * achp)362 specCharProc (wvParseStruct * ps, U16 eachchar, CHP * achp)
363 {
364 Blip blip;
365 wvStream *fil;
366 long pos;
367 FSPA *fspa;
368 PICF picf;
369 FDOA *fdoa;
370
371 switch (eachchar)
372 {
373 case 19: /* field begin */
374 ps->fieldstate++;
375 ps->fieldmiddle = 0;
376 return 0;
377 case 20: /* field separator */
378 if (achp->fOle2)
379 {
380 wvTrace (("Field has an embedded OLE2 object\n"));
381 }
382 ps->fieldmiddle = 1;
383 return 0;
384 case 21: /* field end */
385 ps->fieldstate--;
386 ps->fieldmiddle = 0;
387 return 0;
388 default:
389 break;
390 }
391
392 /* TODO: properly handle fields */
393 if (ps->fieldstate)
394 {
395 if (eachchar == 0x13 || eachchar == 0x14)
396 return 0;
397 }
398
399 /* image handling */
400 switch (eachchar)
401 {
402 case 0x01:
403
404 if (achp->fOle2)
405 {
406 wvTrace (("embedded OLE2 component. currently unsupported"));
407 return 0;
408 }
409
410 pos = wvStream_tell (ps->data);
411
412 wvStream_goto (ps->data, achp->fcPic_fcObj_lTagObj);
413
414 wvGetPICF (wvQuerySupported (&ps->fib, NULL), &picf, ps->data);
415
416 fil = picf.rgb;
417
418 if (wv0x01 (&blip, fil, picf.lcb - picf.cbHeader))
419 {
420 handleImage (&blip, picf.dxaGoal, picf.dyaGoal);
421 }
422 else
423 {
424 wvTrace (("Dom: strange no graphic data 1\n"));
425 }
426
427 wvStream_goto (ps->data, pos);
428
429 return 0;
430 break;
431
432 case 0x08:
433
434 if (wvQuerySupported (&ps->fib, NULL) == WORD8)
435 {
436 if (ps->nooffspa > 0)
437 {
438
439 fspa = wvGetFSPAFromCP (ps->currentcp, ps->fspa,
440 ps->fspapos, ps->nooffspa);
441
442 if (!fspa)
443 {
444 wvError (
445 ("No fspa! Panic and Insanity Abounds!\n"));
446 return 0;
447 }
448
449 if (wv0x08 (&blip, fspa->spid, ps))
450 {
451 handleImage (&blip, fspa->xaRight - fspa->xaLeft,
452 fspa->yaBottom - fspa->yaTop);
453 }
454 else
455 {
456 wvTrace (("Dom: strange no graphic data 2\n"));
457 return 0;
458 }
459 }
460 else
461 {
462 wvTrace (("nooffspa was <=0 -- ignoring"));
463 }
464 }
465 else
466 {
467 wvError (
468 ("pre Word8 0x08 graphic -- unsupported at the moment"));
469 fdoa =
470 wvGetFDOAFromCP (ps->currentcp, NULL, ps->fdoapos,
471 ps->nooffdoa);
472 }
473
474 }
475
476 return 0;
477 }
478
479 static int
eleProc(wvParseStruct * ps,wvTag tag,void * props,int dirty)480 eleProc (wvParseStruct * ps, wvTag tag, void *props, int dirty)
481 {
482 /* some word structures */
483 PAP *apap;
484 CHP *achp;
485 SEP *asep;
486 int iRes;
487
488 rtfUserData *ud = (rtfUserData *) ps->userData;
489
490 switch (tag)
491 {
492 case SECTIONBEGIN:
493
494 /* TODO: get smarter */
495 asep = (SEP *) props;
496 rtf_output ("\\sectd\\sbknone\\colsx360\n");
497
498 ud->bInSec = 1;
499 break;
500
501 case SECTIONEND:
502 ud->bInSec = 0;
503 break;
504
505 case PARABEGIN:
506 apap = (PAP *) props;
507
508 ud->bInPara = 1;
509
510 rtf_output ("\\pard");
511 switch (apap->jc)
512 {
513 case 0: /* left */
514 break;
515 case 1: /* center */
516 rtf_output ("\\qc");
517 break;
518 case 2: /* right */
519 rtf_output ("\\qr");
520 break;
521 default:
522 break;
523 }
524
525 break;
526
527 case PARAEND: /* pretty much nothing */
528 rtf_output_char ('\n');
529 ud->bInPara = 0;
530 break;
531
532 case CHARPROPBEGIN:
533 achp = (CHP *) props;
534 fill_rtfUserData (ud, achp, ps);
535 output_rtfUserData (ud);
536 break;
537
538 case CHARPROPEND:
539 achp = (CHP *) props;
540 fill_rtfUserData (ud, achp, ps);
541 if (ud->bInPara)
542 {
543 rtf_output_char ('}');
544 }
545 break;
546
547 default:
548 break;
549 }
550
551 return 0;
552 }
553
554 static int
docProc(wvParseStruct * ps,wvTag tag)555 docProc (wvParseStruct * ps, wvTag tag)
556 {
557 switch (tag)
558 {
559 case DOCBEGIN:
560 /* print out my rtf preamble */
561 rtf_output ("{\\rtf1\\ansi\\ansicpg1252\\deff0\n");
562
563 /* now print out a font table */
564 /* and a color table */
565 output_fonttable ();
566 output_colortable ();
567 rtf_output
568 ("\\kerning0\\cf0\\viewkind1\\paperw12240\\paperh15840\\margl1440\\margr1440\\widowctl\n");
569 break;
570
571 case DOCEND:
572 rtf_output ("}\n");
573 ENSURE_BUF ();
574 break;
575
576 default:
577 break;
578 }
579
580 return 0;
581 }
582
583 static void
do_version(void)584 do_version (void)
585 {
586 printf ("wvRTF version %s\n",VERSION);
587 }
588
589 static void
do_help(void)590 do_help (void)
591 {
592 do_version ();
593 printf ("(c) Dom Lachowicz 2000\n");
594 printf ("Usage:\n");
595 printf ("\t-c --charset=set\n");
596 printf ("\t-p --password=pass\n");
597 printf ("\t-v --version\n");
598 printf ("\t-? --help\n");
599 printf ("\nConverts MSWord documents to RTF\n");
600 }
601
602 static char *charset = NULL;
603
604 int
main(int argc,char * argv[])605 main (int argc, char *argv[])
606 {
607 FILE *input;
608 char *fname, *password;
609 int ret;
610
611 wvParseStruct ps;
612 char *dir = NULL;
613
614 rtfUserData ud;
615
616 static struct option long_options[] = {
617 {"charset", 1, 0, 'c'},
618 {"password", 1, 0, 'p'},
619 {"dir", 1, 0, 'd'},
620 {"version", 0, 0, 'v'},
621 {"help", 0, 0, '?'},
622 {0, 0, 0, 0}
623 };
624
625 int c, index = 0;
626
627 if (argc < 2)
628 {
629 do_help ();
630 return 1;
631 }
632
633 while (1)
634 {
635 c = getopt_long (argc, argv, "?vc:p:d:", long_options, &index);
636 if (c == -1)
637 break;
638 switch (c)
639 {
640 case '?':
641 do_help ();
642 return 0;
643 case 'v':
644 do_version ();
645 return 0;
646 case 'c':
647 if (optarg)
648 charset = optarg;
649 else
650 wvError (("No argument given to charset"));
651 break;
652 case 'p':
653 if (optarg)
654 password = optarg;
655 else
656 wvError (("No password given to password option"));
657 break;
658 case 'd':
659 if (optarg)
660 dir = optarg;
661 else
662 wvError (("No directory given to dir option"));
663 break;
664 default:
665 do_help ();
666 return -1;
667 }
668 }
669
670 if (optind >= argc)
671 {
672 fprintf (stderr, "No file name given to open\n");
673 return -1;
674 }
675
676 fname = argv[optind];
677
678 input = fopen (fname, "rb");
679 if (!input)
680 {
681 fprintf (stderr, "Failed to open %s\n", fname);
682 return -1;
683 }
684 fclose (input);
685
686 wvInit ();
687 ret = wvInitParser (&ps, fname);
688 ps.filename = fname;
689 ps.dir = dir;
690
691 /* set to 0 */
692 memset (&ud, 1, sizeof (rtfUserData));
693 ps.userData = &ud;
694
695 if (ret & 0x8000) /* Password protected? */
696 {
697 if ((ret & 0x7fff) == WORD8)
698 {
699 ret = 0;
700 if (password == NULL)
701 {
702 fprintf (stderr,
703 "Password required, this is an encrypted document\n");
704 return -1;
705 }
706 else
707 {
708 wvSetPassword (password, &ps);
709 if (wvDecrypt97 (&ps))
710 {
711 wvError (("Incorrect Password\n"));
712 return -1;
713 }
714 }
715 }
716 else if (((ret & 0x7fff) == WORD7) || ((ret & 0x7fff) == WORD6))
717 {
718 ret = 0;
719 if (password == NULL)
720 {
721 fprintf (stderr,
722 "Password required, this is an encrypted document\n");
723 return -1;
724 }
725 else
726 {
727 wvSetPassword (password, &ps);
728 if (wvDecrypt95 (&ps))
729 {
730 wvError (("Incorrect Password\n"));
731 return -1;
732 }
733 }
734 }
735 }
736
737 if (ret)
738 {
739 wvError (("startup error\n"));
740 wvOLEFree (&ps);
741 return -1;
742 }
743
744 wvSetElementHandler (&ps, eleProc);
745 wvSetDocumentHandler (&ps, docProc);
746 wvSetCharHandler (&ps, charProc);
747 wvSetSpecialCharHandler (&ps, specCharProc);
748
749 wvText (&ps);
750
751 /* free associated memory */
752 wvOLEFree (&ps);
753
754 return 0;
755 }
756