1 /* wvWare
2  * Copyright (C) Caolan McNamara, Dom Lachowicz, and others
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17  * 02111-1307, USA.
18  */
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <errno.h>
27 #include <string.h>
28 
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32 
33 #include <time.h>
34 
35 #include "getopt.h"
36 #include "wv.h"
37 
38 /* By Dom Lachowicz (cinamod@hotmail.com) */
39 
40 /* i don't like appearing to use printf */
41 #define rtf_output printf
42 #define rtf_output_char(c)  do {rtf_output("%c", (c));} while(0)
43 #define ENSURE_BUF()    fflush(stdout)
44 
45 typedef struct _rtfUserData {
46     /* formatting variables */
47 
48     /* cached integer values */
49     int cFont;
50     int cFontSize;
51     int cCol;
52 
53     /* boolean formats */
54     int bIsBold:1;
55     int bIsItalic:1;
56     int bIsStrike:1;
57     int bIsUl:1;
58     int bIsSup:1;
59     int bIsSub:1;
60 
61     /* paragraph related */
62     int bInPara:1;
63     int bInSec:1;
64 } rtfUserData;
65 
66 /* this gets printed at the top of each document in the {\fonttbl section */
67 
68 struct _fontMapping {
69     char *word;
70     char *rtf;
71 };
72 
73 /* TODO: build me up appropriately */
74 static struct _fontMapping fontMap[] = {
75     {"Arial", "Arial"},
76     {"Bitstream Charter", "Bitstream Charter"},
77     {"Bookman", "Bookman"},
78     {"Courier", "Courier"},
79     {"Courier New", "Courier New"},
80     {"Century Schoolbook", "Century Schoolbook"},
81     {"Dingbats", "Dingbats"},
82     {"Goth", "Goth"},
83     {"Nimbus Sans", "Nimbus Sans"},
84     {"Palladio", "Palladio"},
85     {"Standard Symbol", "Standard Symbol"},
86     {"Symbol", "Symbol"},
87     {"Times", "Times New Roman"},
88     {"Times New Roman", "Times New Roman"},
89 };
90 
91 #define FontTblSize (sizeof(fontMap)/sizeof(fontMap[0]))
92 #define DFL_FONT_INDEX 13	/* I map this to whatever "Times New Roman" is */
93 static void
output_fonttable(void)94 output_fonttable (void)
95 {
96     int i;
97 
98     rtf_output ("{\\fonttbl\n");
99 
100     for (i = 0; i < FontTblSize; i++)
101 	rtf_output ("{\\f%d\\fnil\\fcharset0\\fprq0\\fttruetype %s;}\n", i,
102 		    fontMap[i].rtf);
103 
104     rtf_output ("}\n");
105 }
106 
107 /* map the MSWord name to the corresponding RTF name index */
108 static int
mapFont(const char * name)109 mapFont (const char *name)
110 {
111     int k;
112 
113     for (k = 0; k < FontTblSize; k++)
114 	if (!strcasecmp (fontMap[k].word, name))
115 	    return k;
116 
117     return DFL_FONT_INDEX;
118 }
119 
120 #undef DFL_FONT_INDEX
121 #undef FontTblSize
122 
123 /* this gets printed at the top of each document in the {\colortbl section */
124 static int colorTable[][3] = {
125     {0x00, 0x00, 0x00},		/* black */
126     {0x00, 0x00, 0xff},		/* blue */
127     {0x00, 0xff, 0xff},		/* cyan */
128     {0x00, 0xff, 0x00},		/* green */
129     {0xff, 0x00, 0xff},		/* magenta */
130     {0xff, 0x00, 0x00},		/* red */
131     {0xff, 0xff, 0x00},		/* yellow */
132     {0xff, 0xff, 0xff},		/* white */
133     {0x00, 0x00, 0x80},		/* dark blue */
134     {0x00, 0x80, 0x80},		/* dark cyan */
135     {0x00, 0x80, 0x00},		/* dark green */
136     {0x80, 0x00, 0x80},		/* dark magenta */
137     {0x80, 0x00, 0x00},		/* dark red */
138     {0x80, 0x80, 0x00},		/* dark yellow */
139     {0x80, 0x80, 0x80},		/* dark gray */
140     {0xc0, 0xc0, 0xc0},		/* light gray */
141 };
142 
143 /* rtf names for the color_table as above */
144 static char *rtfColors[] = {
145     "\\cf0",			/* black */
146     "\\cf1",			/* blue */
147     "\\cf2",			/* cyan */
148     "\\cf3",			/* green */
149     "\\cf4",			/* magenta */
150     "\\cf5",			/* red */
151     "\\cf6",			/* yellow */
152     "\\cf7",			/* white */
153     "\\cf8",			/* dark blue */
154     "\\cf9",			/* dark cyan */
155     "\\cf10",			/* dark green */
156     "\\cf11",			/* dark magenta */
157     "\\cf12",			/* dark red */
158     "\\cf13",			/* dark yellow */
159     "\\cf14",			/* dark gray */
160     "\\cf15",			/* light gray */
161 };
162 
163 #define RED(i)   colorTable[(i)][0]
164 #define GREEN(i) colorTable[(i)][1]
165 #define BLUE(i)  colorTable[(i)][2]
166 #define ClrTblSize (sizeof(colorTable)/sizeof(colorTable[0]))
167 static void
output_colortable(void)168 output_colortable (void)
169 {
170     int i;
171 
172     rtf_output ("{\\colortbl\n");
173 
174     for (i = 0; i < ClrTblSize; i++)
175       {
176 	  rtf_output ("\\red%d\\green%d\\blue%d;\n", RED (i), GREEN (i),
177 		      BLUE (i));
178       }
179 
180     rtf_output ("}\n");
181 }
182 
183 #undef RED
184 #undef GREEN
185 #undef BLUE
186 #undef ClrTblSize
187 
188 static void
output_rtfUserData(rtfUserData * ud)189 output_rtfUserData (rtfUserData * ud)
190 {
191     /* add the initial bracket */
192     rtf_output_char ('{');
193 
194     /* font color */
195     rtf_output (rtfColors[ud->cCol]);
196 
197     /* font face */
198     rtf_output ("\\f%d", ud->cFont);
199 
200     /* font size */
201     rtf_output ("\\fs%d", ud->cFontSize);
202 
203     /* italic text */
204     if (ud->bIsItalic)
205 	rtf_output ("\\i");
206 
207     /* bold text */
208     if (ud->bIsBold)
209 	rtf_output ("\\b");
210 
211     /* underline and strike-through */
212     if (ud->bIsUl)
213 	rtf_output ("\\ul");
214     if (ud->bIsStrike)
215 	rtf_output ("\\strike");
216 
217     /* sub/superscript */
218     if (ud->bIsSup)
219       {
220 	  rtf_output ("\\super");
221       }
222     else if (ud->bIsSub)
223       {
224 	  rtf_output ("\\sub");
225       }
226     /* add the final space */
227     rtf_output_char (' ');
228 }
229 
230 static void
fill_rtfUserData(rtfUserData * ud,CHP * chp,wvParseStruct * ps)231 fill_rtfUserData (rtfUserData * ud, CHP * chp, wvParseStruct * ps)
232 {
233     char *fname = NULL;
234 
235     if (!ps->fib.fFarEast)
236       {
237 	  fname = wvGetFontnameFromCode (&ps->fonts, chp->ftcAscii);
238       }
239     else
240       {
241 	  fname = wvGetFontnameFromCode (&ps->fonts, chp->ftcFE);
242       }
243 
244     ud->cCol = 0;
245     if (chp->ico)
246       ud->cCol = chp->ico - 1;
247 
248     ud->cFont = mapFont (fname);
249     ud->cFontSize = chp->hps;
250     ud->bIsBold = (chp->fBold);
251     ud->bIsItalic = (chp->fItalic);
252     ud->bIsUl = (chp->kul);
253     ud->bIsStrike = (chp->fStrike);
254     ud->bIsSup = (chp->iss == 1);
255     ud->bIsSub = (chp->iss == 2);
256     free(fname);
257 }
258 
259 static void
handleImage(Blip * b,long width,long height)260 handleImage (Blip * b, long width, long height)
261 {
262 
263     /* TODO: image support */
264     wvStream * pwv = NULL;
265     size_t size = 0;
266     int data = 0;
267     int cnt = 0;
268     int tag = time (NULL);
269 
270     /* short-circuit this method if we don't support
271        the incoming format */
272     switch (b->type)
273       {
274       case msoblipPNG:
275 	  /* conveniently I know how to export to PNG */
276 	  rtf_output
277 	      ("{\\*\\shppict{\\pict\\pngblip\\picw%d\\pich%d\\picwgoal\\pichgoal\n",
278 	       width, height);
279 	  break;
280       case msoblipDIB:
281       case msoblipWMF:
282       case msoblipEMF:
283       case msoblipPICT:
284       case msoblipJPEG:
285       default:
286 	  /* TODO: support other image types */
287 	  return;
288       }
289 
290     rtf_output ("\bliptag%d{\\*\\blipuid%032x}", tag, tag);
291 
292     pwv = b->blip.bitmap.m_pvBits;
293     size = wvStream_size (pwv);
294     wvStream_rewind(pwv);
295     while (cnt < size)
296       {
297 	  if (cnt++ % 64 == 0)
298 	      rtf_output_char ('\n');
299 	  rtf_output ("%02x", read_8ubit(pwv));
300       }
301 
302     rtf_output_char ('}');
303 }
304 
305 static int
charProc(wvParseStruct * ps,U16 eachchar,U8 chartype,U16 lid)306 charProc (wvParseStruct * ps, U16 eachchar, U8 chartype, U16 lid)
307 {
308 
309     /* convert incoming character to unicode */
310     if (chartype)
311 	eachchar = wvHandleCodePage (eachchar, lid);
312 
313     /* take care of any oddities in Microsoft's character "encoding" */
314     /* TODO: does the above code page handler take care of these? */
315     if (chartype == 1 && eachchar == 146)
316 	eachchar = 39;		/* apostrophe */
317 
318     switch (eachchar)
319       {
320       case 13:			/* paragraph end */
321 	  return 0;
322 
323       case 11:			/* hard line break */
324 	  break;
325 
326       case 12:			/* page breaks, section marks */
327 	  break;
328 
329       case 14:			/* column break */
330 	  break;
331 
332       case 19:			/* field begin */
333 	  /* flush current text buffer */
334 	  ps->fieldstate++;
335 	  ps->fieldmiddle = 0;
336 	  return 0;
337       case 20:			/* field separator */
338 	  ps->fieldmiddle = 1;
339 	  return 0;
340       case 21:			/* field end */
341 	  ps->fieldstate--;
342 	  ps->fieldmiddle = 0;
343 	  return 0;
344 
345       default:
346 	  break;
347       }
348 
349     /* todo: properly handle fields */
350     if (eachchar == 0x13 || eachchar == 0x14)
351 	return 0;
352 
353     /* properly escape this */
354     if (eachchar == '{' || eachchar == '}')
355 	rtf_output_char ('\\');
356 
357     rtf_output_char (eachchar);
358     return 0;
359 }
360 
361 static int
specCharProc(wvParseStruct * ps,U16 eachchar,CHP * achp)362 specCharProc (wvParseStruct * ps, U16 eachchar, CHP * achp)
363 {
364     Blip blip;
365     wvStream *fil;
366     long pos;
367     FSPA *fspa;
368     PICF picf;
369     FDOA *fdoa;
370 
371     switch (eachchar)
372       {
373       case 19:			/* field begin */
374 	  ps->fieldstate++;
375 	  ps->fieldmiddle = 0;
376 	  return 0;
377       case 20:			/* field separator */
378 	  if (achp->fOle2)
379 	    {
380 		wvTrace (("Field has an embedded OLE2 object\n"));
381 	    }
382 	  ps->fieldmiddle = 1;
383 	  return 0;
384       case 21:			/* field end */
385 	  ps->fieldstate--;
386 	  ps->fieldmiddle = 0;
387 	  return 0;
388       default:
389 	  break;
390       }
391 
392     /* TODO: properly handle fields */
393     if (ps->fieldstate)
394       {
395 	  if (eachchar == 0x13 || eachchar == 0x14)
396 	      return 0;
397       }
398 
399     /* image handling */
400     switch (eachchar)
401       {
402       case 0x01:
403 
404 	  if (achp->fOle2)
405 	    {
406 		wvTrace (("embedded OLE2 component. currently unsupported"));
407 		return 0;
408 	    }
409 
410 	  pos = wvStream_tell (ps->data);
411 
412 	  wvStream_goto (ps->data, achp->fcPic_fcObj_lTagObj);
413 
414 	  wvGetPICF (wvQuerySupported (&ps->fib, NULL), &picf, ps->data);
415 
416 	  fil = picf.rgb;
417 
418 	  if (wv0x01 (&blip, fil, picf.lcb - picf.cbHeader))
419 	    {
420 		handleImage (&blip, picf.dxaGoal, picf.dyaGoal);
421 	    }
422 	  else
423 	    {
424 		wvTrace (("Dom: strange no graphic data 1\n"));
425 	    }
426 
427 	  wvStream_goto (ps->data, pos);
428 
429 	  return 0;
430 	  break;
431 
432       case 0x08:
433 
434 	  if (wvQuerySupported (&ps->fib, NULL) == WORD8)
435 	    {
436 		if (ps->nooffspa > 0)
437 		  {
438 
439 		      fspa = wvGetFSPAFromCP (ps->currentcp, ps->fspa,
440 					      ps->fspapos, ps->nooffspa);
441 
442 		      if (!fspa)
443 			{
444 			    wvError (
445 				     ("No fspa! Panic and Insanity Abounds!\n"));
446 			    return 0;
447 			}
448 
449 		      if (wv0x08 (&blip, fspa->spid, ps))
450 			{
451 			    handleImage (&blip, fspa->xaRight - fspa->xaLeft,
452 					 fspa->yaBottom - fspa->yaTop);
453 			}
454 		      else
455 			{
456 			    wvTrace (("Dom: strange no graphic data 2\n"));
457 			    return 0;
458 			}
459 		  }
460 		else
461 		  {
462 		      wvTrace (("nooffspa was <=0 -- ignoring"));
463 		  }
464 	    }
465 	  else
466 	    {
467 		wvError (
468 			 ("pre Word8 0x08 graphic -- unsupported at the moment"));
469 		fdoa =
470 		    wvGetFDOAFromCP (ps->currentcp, NULL, ps->fdoapos,
471 				     ps->nooffdoa);
472 	    }
473 
474       }
475 
476     return 0;
477 }
478 
479 static int
eleProc(wvParseStruct * ps,wvTag tag,void * props,int dirty)480 eleProc (wvParseStruct * ps, wvTag tag, void *props, int dirty)
481 {
482     /* some word structures */
483     PAP *apap;
484     CHP *achp;
485     SEP *asep;
486     int iRes;
487 
488     rtfUserData *ud = (rtfUserData *) ps->userData;
489 
490     switch (tag)
491       {
492       case SECTIONBEGIN:
493 
494 	  /* TODO: get smarter */
495 	  asep = (SEP *) props;
496 	  rtf_output ("\\sectd\\sbknone\\colsx360\n");
497 
498 	  ud->bInSec = 1;
499 	  break;
500 
501       case SECTIONEND:
502 	  ud->bInSec = 0;
503 	  break;
504 
505       case PARABEGIN:
506 	  apap = (PAP *) props;
507 
508 	  ud->bInPara = 1;
509 
510 	  rtf_output ("\\pard");
511 	  switch (apap->jc)
512 	    {
513 	    case 0:		/* left */
514 		break;
515 	    case 1:		/* center */
516 		rtf_output ("\\qc");
517 		break;
518 	    case 2:		/* right */
519 		rtf_output ("\\qr");
520 		break;
521 	    default:
522 		break;
523 	    }
524 
525 	  break;
526 
527       case PARAEND:		/* pretty much nothing */
528 	  rtf_output_char ('\n');
529 	  ud->bInPara = 0;
530 	  break;
531 
532       case CHARPROPBEGIN:
533 	  achp = (CHP *) props;
534 	  fill_rtfUserData (ud, achp, ps);
535 	  output_rtfUserData (ud);
536 	  break;
537 
538       case CHARPROPEND:
539 	  achp = (CHP *) props;
540 	  fill_rtfUserData (ud, achp, ps);
541 	  if (ud->bInPara)
542 	    {
543 		rtf_output_char ('}');
544 	    }
545 	  break;
546 
547       default:
548 	  break;
549       }
550 
551     return 0;
552 }
553 
554 static int
docProc(wvParseStruct * ps,wvTag tag)555 docProc (wvParseStruct * ps, wvTag tag)
556 {
557     switch (tag)
558       {
559       case DOCBEGIN:
560 	  /* print out my rtf preamble */
561 	  rtf_output ("{\\rtf1\\ansi\\ansicpg1252\\deff0\n");
562 
563 	  /* now print out a font table */
564 	  /* and a color table */
565 	  output_fonttable ();
566 	  output_colortable ();
567 	  rtf_output
568 	      ("\\kerning0\\cf0\\viewkind1\\paperw12240\\paperh15840\\margl1440\\margr1440\\widowctl\n");
569 	  break;
570 
571       case DOCEND:
572 	  rtf_output ("}\n");
573 	  ENSURE_BUF ();
574 	  break;
575 
576       default:
577 	  break;
578       }
579 
580     return 0;
581 }
582 
583 static void
do_version(void)584 do_version (void)
585 {
586     printf ("wvRTF version %s\n",VERSION);
587 }
588 
589 static void
do_help(void)590 do_help (void)
591 {
592     do_version ();
593     printf ("(c) Dom Lachowicz 2000\n");
594     printf ("Usage:\n");
595     printf ("\t-c --charset=set\n");
596     printf ("\t-p --password=pass\n");
597     printf ("\t-v --version\n");
598     printf ("\t-? --help\n");
599     printf ("\nConverts MSWord documents to RTF\n");
600 }
601 
602 static char *charset = NULL;
603 
604 int
main(int argc,char * argv[])605 main (int argc, char *argv[])
606 {
607     FILE *input;
608     char *fname, *password;
609     int ret;
610 
611     wvParseStruct ps;
612     char *dir = NULL;
613 
614     rtfUserData ud;
615 
616     static struct option long_options[] = {
617 	{"charset", 1, 0, 'c'},
618 	{"password", 1, 0, 'p'},
619 	{"dir", 1, 0, 'd'},
620 	{"version", 0, 0, 'v'},
621 	{"help", 0, 0, '?'},
622 	{0, 0, 0, 0}
623     };
624 
625     int c, index = 0;
626 
627     if (argc < 2)
628       {
629 	  do_help ();
630 	  return 1;
631       }
632 
633     while (1)
634       {
635 	  c = getopt_long (argc, argv, "?vc:p:d:", long_options, &index);
636 	  if (c == -1)
637 	      break;
638 	  switch (c)
639 	    {
640 	    case '?':
641 		do_help ();
642 		return 0;
643 	    case 'v':
644 		do_version ();
645 		return 0;
646 	    case 'c':
647 		if (optarg)
648 		    charset = optarg;
649 		else
650 		    wvError (("No argument given to charset"));
651 		break;
652 	    case 'p':
653 		if (optarg)
654 		    password = optarg;
655 		else
656 		    wvError (("No password given to password option"));
657 		break;
658 	    case 'd':
659 		if (optarg)
660 		    dir = optarg;
661 		else
662 		    wvError (("No directory given to dir option"));
663 		break;
664 	    default:
665 		do_help ();
666 		return -1;
667 	    }
668       }
669 
670     if (optind >= argc)
671       {
672 	  fprintf (stderr, "No file name given to open\n");
673 	  return -1;
674       }
675 
676     fname = argv[optind];
677 
678     input = fopen (fname, "rb");
679     if (!input)
680       {
681 	fprintf (stderr, "Failed to open %s\n", fname);
682 	  return -1;
683       }
684     fclose (input);
685 
686     wvInit ();
687     ret = wvInitParser (&ps, fname);
688     ps.filename = fname;
689     ps.dir = dir;
690 
691     /* set to 0 */
692     memset (&ud, 1, sizeof (rtfUserData));
693     ps.userData = &ud;
694 
695     if (ret & 0x8000)		/* Password protected? */
696       {
697 	  if ((ret & 0x7fff) == WORD8)
698 	    {
699 		ret = 0;
700 		if (password == NULL)
701 		  {
702 		      fprintf (stderr,
703 			       "Password required, this is an encrypted document\n");
704 		      return -1;
705 		  }
706 		else
707 		  {
708 		      wvSetPassword (password, &ps);
709 		      if (wvDecrypt97 (&ps))
710 			{
711 			    wvError (("Incorrect Password\n"));
712 			    return -1;
713 			}
714 		  }
715 	    }
716 	  else if (((ret & 0x7fff) == WORD7) || ((ret & 0x7fff) == WORD6))
717 	    {
718 		ret = 0;
719 		if (password == NULL)
720 		  {
721 		      fprintf (stderr,
722 			       "Password required, this is an encrypted document\n");
723 		      return -1;
724 		  }
725 		else
726 		  {
727 		      wvSetPassword (password, &ps);
728 		      if (wvDecrypt95 (&ps))
729 			{
730 			    wvError (("Incorrect Password\n"));
731 			    return -1;
732 			}
733 		  }
734 	    }
735       }
736 
737     if (ret)
738       {
739 	  wvError (("startup error\n"));
740 	  wvOLEFree (&ps);
741 	  return -1;
742       }
743 
744     wvSetElementHandler (&ps, eleProc);
745     wvSetDocumentHandler (&ps, docProc);
746     wvSetCharHandler (&ps, charProc);
747     wvSetSpecialCharHandler (&ps, specCharProc);
748 
749     wvText (&ps);
750 
751     /* free associated memory */
752     wvOLEFree (&ps);
753 
754     return 0;
755 }
756