1 /* -*- c -*- */
2 
3 /* Butchered version of html2html.l to post-process RTF files
4  *
5  * Steve Tynor (tynor@atlanta.twr.com). I'm not proud of this code. It's a hack
6  * foisted upon a hack, nested within another hack or two.  It sort of works
7  * well enough for my purposes (generating WinHelp .RTF files for my documents),
8  * but it could definitely use a redesign/rewrite. I shamelessly blame the RTF
9  * format for most of the hackery here -- RTF is not a _language_ like LaTeX or
10  * ROFF -- it's just a file format. So, we can't rely on RTF to do even simple
11  * things like "insert a paragraph break here only if the previous token was not
12  * also a paragraph break. Since the SGML front end has no conditional processing
13  * capabilities, multiple blank likes in the SGML get translated to multiple para
14  * breaks in the RTF (actually, that's why I use the <@@par> crap....).
15  *
16  * XREF: BROKEN_DELIMITERS:  We say "{}K{\footnote .." instead of just
17  * "\nK{\footnote .." since the MicroS$ft RTF parser does not (always?) treat
18  * white space as a delimeter before a hyperlink markup.
19  *
20  * ESR reordered the @@indent rules to eliminate a generation-time warning.
21  */
22 
23 %{
24 
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 
29 /* #define DEBUG */
30 
31   /* NOTE: must be kept in sync with the <p></p> defs in mapping */
32 #define LEFTMARGIN 500
33 #define NEWPAR "\\par\\pard\\plain\\li500\\sb200\\fi0"
34 #define INDENT_AMT 500 /* twips for each level of indentation */
35 
36 const static char* header =
37     "{\\rtf1\\ansi\n"
38     "{\\fonttbl\n"
39     "{\\f0\\fswiss Helvetica;}\n"
40     "{\\f1\\fmodern Courier;}\n"
41     "{\\f2\\froman Times;}\n"
42     "{\\f3\\froman Symbol;}\n"
43     "}\n"
44     "{\\info{\\comment Translated from SGML to RTF w/ Linuxdoc-SGML}}\n"
45     ;
46 
47 int big_page_mode = 1; /* default is backwards compatible */
48 
49 /* <report> style uses <chapt> as the top-level section; <article> uses
50    <sect>.  We need to know whether a <sect> should be numbered as "n"
51    or "n.m".  We assume <article> more unless we see a <chapt> */
52 int chapter_mode = 0;
53 int section_id = 0; /* used for TOC hyperlinks */
54 int in_appendix = 0;
55 
56 FILE *out;
57 FILE *idx_fp;
58 
59 char buf[2048], sectname[256], labeltxt[256];
60 char firstname[256], outname[262];
61 char ssectname[256], Tmp1[4096], Tmp2[256], headname[256];
62 char urlname[256], Headbuf[4096], refname[256];
63 
64 int indent = 0;
65 int nofill = 0;
66 int filenum = 1;
67 int filecount = 1;
68 int tmpfn;
69 int secnr = 0;
70 int ssecnr;
71 int current;
72 int skipnewline = 0;
73 
74 extern char *yytext;
75 
76 #if (YY_FLEX_MAJOR_VERSION == 2 && YY_FLEX_MINOR_VERSION == 5 && YY_FLEX_SUBMINOR_VERSION > 35) \
77  || (YY_FLEX_MAJOR_VERSION == 2 && YY_FLEX_MINOR_VERSION == 6 && YY_FLEX_SUBMINOR_VERSION == 0)
78 extern yy_size_t yyleng;
79 #else
80 extern int yyleng;
81 #endif
82 
83 #define CHAPTER 1
84 #define SECTION 2
85 #define SUBSECT 3
86 
fatal_perror(char * fmt,char * arg)87 void fatal_perror ( char *fmt, char *arg )
88   {
89     fprintf( stderr, "rtf2rtf: Fatal: " );
90     fprintf( stderr, fmt, arg);
91     perror( "\nReason");
92     exit(1);
93   }
94 
getstr(char * str,int n)95 int getstr( char *str, int n )
96   {
97 #ifdef DEBUG
98     fprintf( stderr, "DEBUG: yytext: <<%s>>\n", yytext );
99 #endif
100     if( strlen( yytext ) == n )
101       return 0;
102     strncpy( str, yytext+n, yyleng-n);
103     str[ yyleng-n ] = 0;
104     return 1;
105   }
106 
107 void
put_reference(FILE * fp,char * str)108 put_reference (FILE *fp, char *str)
109   {
110 
111     char *p;
112     char buf[2] = {0, 0};
113     /* trim trailing spaces: */
114     while (str[strlen(str)-1] == ' ') {
115       str[strlen(str)-1] = '\0';
116     }
117     /* replace non-legal topic-id characters with underscores */
118     p = str;
119     while (*p) {
120       buf[0] = *p;
121       if (! strpbrk(buf,
122 		    "abcdegfhijklmnopqrstuvwxyz"
123 		    "ABCDEGFHIJKLMNOPQRSTUVWXYZ"
124 		    "0123456789")) {
125 	fprintf (fp, "_");
126       } else {
127 	fprintf (fp, "%c", *p);
128       }
129       p++;
130     }
131   }
132 
133 void
put_secnum(FILE * fp,int s1,int s2)134 put_secnum (FILE *fp, int s1, int s2)
135   {
136     if (in_appendix) {
137       if (s2 == 0) {
138 	fprintf (fp, "%c", 'A'-1+s1);
139       } else {
140 	fprintf (fp, "%c.%d", 'A'-1+s1, s2);
141       }
142     } else {
143       if (s2 == 0) {
144 	fprintf (fp, "%d", s1);
145       } else {
146 	fprintf (fp, "%d.%d", s1, s2);
147       }
148     }
149   }
150 
151 void
heading(char * str,int s1,int s2,int newpage)152 heading (char *str, int s1, int s2, int newpage)
153    {
154      if (newpage && (out == stdout))
155        {
156 	 sprintf (outname, "%s-0.rtf", firstname);
157 	 out = fopen (outname, "w");
158 	 if (! out )
159 	   {
160 	     fatal_perror ("Could not open file \"%s\"", outname);
161 	   }
162     	 fputs (header, out);
163        }
164 
165      /*     fprintf (out, "\n\\page{\\pard\\plain\\s2\\f0\\fs%d\\b\\fi0\\li576\\sb160\\sa160\\tx576 { }",
166 	    (s2==0 ? 36 : 24) ); */
167 
168      if (newpage)
169        {
170 	 fprintf (out, "\n\\page{\\pard\\plain\\keepn\\f0\\fs%d\\b\\tx576{",
171 		  (s2==0 ? 36 : 24) );
172        }
173      else
174        {
175 	 fprintf (out, "\n{\\pard\\sb200\\plain\\f0\\fs%d\\b\\tx576{",
176 		  (s2==0 ? 36 : 24) );
177        }
178      put_secnum (out, s1, s2);
179      fprintf (out, " %s}}", str);
180 
181      /* Used as a target for the link on the TOC page. Use a SECT_ prefix
182 	just in case the document has any explicit <label>'s with the same
183 	`id' as a section header */
184      /* XREF: BROKEN_DELIMITERS: See above for why we say "{}#{\footnote .."
185       * instead of just "\n#{\footnote ..".
186       */
187      fprintf (out, "\n{}#{\\footnote TOC_%d}\n", section_id);
188      if (newpage)
189        {
190 	 fprintf (out, "\n${\\footnote ");
191 	 put_reference (out, str);
192 	 fprintf (out, "}\n");
193 	 fprintf (out, "+{\\footnote browse}\n");
194 	 fprintf (out, "%s\n", NEWPAR);
195        }
196      /* XREF: BROKEN_DELIMITERS: See above for why we say "{}K{\footnote .."
197       * instead of just "\nK{\footnote ..".
198       */
199      fprintf (out, "{}K{\\footnote %s}\n", str);
200 /*     fprintf (out, "\\par\\pard\n");*/
201 
202      fprintf (idx_fp, "\\li%d\\fi-200{\\f3\\\'B7} ",
203 	      (s2==0 ? 1 : 2) * 500);
204      put_secnum (idx_fp, s1, s2);
205      fprintf (idx_fp, " {\\uldb ");
206      fprintf (idx_fp, "%s}{\\v TOC_%d}\\par\n", str, section_id);
207      section_id++;
208    }
209 
210 void
subheading(char * str,int chapter_size)211 subheading (char *str, int chapter_size)
212   {
213     /*     fprintf (fp, "\n{\\pard\\plain\\s2\\f0\\fs%d\\b \\fi0\\li576\\sb160\\sa160\\tx576 { }%s}\n",
214 	   (chapter_size ? 36 : 24), str);*/
215 
216     fprintf (out, "\n{\\pard\\plain\\f0\\sb200\\fs%d\\b\\tx576{%s}}\n",
217 	     (chapter_size ? 36 : 24), str);
218      /* XREF: BROKEN_DELIMITERS: See above for why we say "{}K{\footnote .."
219       * instead of just "\nK{\footnote ..".
220       */
221     fprintf (out, "{}K{\\footnote %s}%s\n", str, NEWPAR);
222   }
223 
224 %}
225 
226 %s SECT
227 
228 %%
229 
230 "<@@indent+>\n"		{ indent++; }
231 "<@@indent->\n"		{ indent--; }
232 "<@@indent>\n"		{ fprintf( out, "\\li%d{}", LEFTMARGIN+indent*INDENT_AMT); }
233 
234 (\\par\\pard\\plain\n"<@@indent>"\n\\sb200\\fi0\n)+ {
235   /* HACK: Filter out redundant paragraph marks. I'd rather not put them
236    * in in the first place, but it's just TOO HARD.
237    */
238   fprintf (out, "\\par\\pard\\plain\n\\li%d{}\n\\sb200\\fi0\n",
239 	   LEFTMARGIN+indent*INDENT_AMT);
240 }
241 
242 (("<@@par>"\n)|("<@@indent"[\+\-]">"\n))+ {
243   /* Unfortunately, <@@par>'s and <@@indent+>'s get mixed together at the start
244    * of <itemize>'s and <descrip>'s.  This hack allows us to treat them as a
245    * single <@@par>.
246    *
247    * Adjust the indentation based on the number of <@@indent+/->'s we see
248    */
249   char *p = yytext;
250   while (*p) {
251     if (*p == '+') {
252       indent++;
253     } else if (*p == '-') {
254       indent--;
255     }
256     p++;
257   }
258   fprintf (out, "\\par\\pard\\plain\n\\li%d{}\n\\sb200\\fi0\n",
259 	   LEFTMARGIN+indent*INDENT_AMT);
260 }
261 
262 ^"<@@chapt><@@head>".*$		{
263   chapter_mode = 1;
264   /*    skipnewline=1; */
265   getstr( sectname, 17 ) ;
266   current=SECTION;
267   secnr++;
268   ssecnr=0;
269   heading( sectname, secnr, 0, 1);
270 
271 }
272 
273 ^"<@@sect><@@head>".*$		{
274 
275   /*    skipnewline=1; */
276   if (chapter_mode)
277     {
278       getstr( ssectname, 16 ) ;
279       current=SECTION;
280       ssecnr++;
281       heading( ssectname, secnr, ssecnr, !big_page_mode);
282     }
283   else
284     {
285       getstr( sectname, 16 ) ;
286       current=CHAPTER;
287       secnr++;
288       ssecnr=0;
289       heading( sectname, secnr, 0, 1);
290     }
291 }
292 
293 ^"<@@ssect><@@head>".*$		{
294   /*    skipnewline=1;*/
295   getstr( ssectname, 17 ) ;
296   current=SUBSECT;
297   if (! chapter_mode)
298     {
299       ssecnr++;
300       heading( ssectname, secnr, ssecnr, !big_page_mode );
301     }
302   else
303     {
304       current=SUBSECT;
305       subheading( ssectname, 0 );
306     }
307 }
308 
309 
310 ^"<@@appendix>".*$		{
311   in_appendix = 1; secnr=0; ssecnr=0;
312   fprintf (idx_fp, "\\par\\pard\\qc{\\b Appendix}\\par\\pard\n");
313 }
314 
315 ^"<@@part><@@head>".*$		{
316   fprintf (idx_fp, "\\par\\pard\\qc{\\b %s}\\par\\pard\n", yytext+16);
317 }
318 
319 ^"<@@head>".*$		{
320 
321   /*  skipnewline=1; */
322   getstr( ssectname, 8 ) ;
323   current=SUBSECT;
324   subheading( ssectname, 0 );
325 }
326 
327 ^"<@@label>".*$		{
328 
329      /* XREF: BROKEN_DELIMITERS: See above for why we say "{}K{\footnote .."
330       * instead of just "\nK{\footnote ..".
331       */
332   fprintf (out, "{}#{\\footnote ");
333   put_reference (out, yytext+9 );
334   fprintf (out, "}\n");
335 /* Grrr.  WinHelp uses K footnotes for references from without the .HLP file */
336 /* (e.g. via the WinHelp API) */
337   fprintf (out, "{}K{\\footnote ");
338   put_reference (out, yytext+9 );
339   fprintf (out, "}\n");
340 }
341 
342 ^"<@@title>".*$		{
343 
344   /*    skipnewline=1;*/
345   getstr( ssectname, 9 ) ;
346   subheading( ssectname, 1 );
347 }
348 
349 "<@@nofill>\n"		{ indent++; nofill = 1; fprintf( out, "\\par\\pard\\sb200\\keep\\li%d{}", LEFTMARGIN+indent*INDENT_AMT); }
350 "<@@endnofill>\n"	{ indent--; nofill = 0; fprintf( out, "\\par\\pard\\sb200\\li%d{}", LEFTMARGIN+indent*INDENT_AMT); }
351 
352 ..*			{
353 
354   if (skipnewline) {
355     /*    strcat(Headbuf, yytext ); */
356   } else if( out != stdout ) {
357     fprintf( out, "%s", yytext );
358   } else {
359     ECHO;
360   }
361 }
362 
363 \n			{
364 
365 /*  fprintf( out, " ");*/		/* Add a space at the end of each line since RTF does
366 				   not treat EOL as whitespace and will concatenate
367 				   tokents spanning lines */
368   if (nofill)
369     {
370       fprintf( out, "\\line");
371     }
372   if(!skipnewline)
373     {
374       if( out != stdout )
375 	{
376 	  fprintf( out, "%s", yytext );
377 	}
378       else
379 	{
380 	  ECHO;
381 	}
382     }
383 }
384 
385 
386 %%
387 
388 int
389 main( int argc, char **argv )
390 {
391   int i;
392 
393   out = stdout;
394   idx_fp = stdout;
395 
396   strcpy( firstname, "RTF" );
397 
398   for (i = 1; i < argc; i++)
399     {
400       if (0 == strcmp("-2", argv[i]))
401 	{
402 	  big_page_mode = 0;
403 	}
404       else if (argv[i][0] != '-')
405 	{
406 	  strncpy( firstname, argv[i], 256 );
407 	}
408       else
409 	{
410 	  fprintf (stderr,
411 		   "%s: Warning: unhandled command line option \"%s\"\n",
412 		   argv[0], argv[i]);
413 	}
414     }
415 
416   fputs (header, out);
417   fprintf (out, "+{\\footnote browse}\n");
418 
419 
420   secnr=0;
421   yylex();
422 
423   if( out != stdout )
424     {
425       fclose( out );
426     }
427 
428   fputs ("\n}\n", idx_fp);
429   fclose( idx_fp );
430 
431   exit( 0 );
432 }
433 
434 /* Use the existing indentation style:
435  Local variables:
436  c-indent-level: 2
437  c-continued-statement-offset: 2
438  c-brace-offset: 0
439  c-label-offset: -2
440  End:
441  */
442