1 %{
2 /*-
3 * Copyright (c) 2005, 2006 intron <intron@intron.ac>. All rights reserved.
4 * Copyright (c) 2005, 2006 The FreeBSD Simplified Chinese Project.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The FreeBSD Simplified
8 * Chinese Project by intron.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * From CNPROJ: doc/zh_CN.GB2312/share/mk/fixrtf.lex,v 1.1.1000.20 2006/02/19 10:21:40 intron Exp
32 */
33
34 #include <sys/cdefs.h>
35
36 #include <err.h>
37 #include <assert.h>
38 #include <stdio.h>
39 #include <string.h>
40 #include <sys/param.h>
41 #include <stdlib.h>
42 #include <unistd.h>
43 #include <time.h>
44 #include <png.h>
45
46 /*
47 * This program is used to fix RTF:
48 * 1. Embed PNGs into RTF.
49 * 2. Embed FreeBSD-specific information into RTF, such as organization name,
50 * building time. But unfortunately, so far only Microsoft Word can read
51 * them. In contrast, Microsoft Word Viewer and OpenOffice even cannot read
52 * this kind of information from RTF created by Microsoft Word and
53 * OpenOffice. (Option: -i)
54 * 3. Do some locale-specific fixing. (Option: -e <encoding>)
55 *
56 * See also Rich Text Format (RTF) Specification:
57 * 1. Version 1.8 (Microsoft Word 2003)
58 * http://www.microsoft.com/downloads/details.aspx?familyid=ac57de32-17f0-4b46-9e4e-467ef9bc5540&displaylang=en
59 * 2. Version 1.7 (Microsoft Word 2002)
60 * http://support.microsoft.com/kb/q86999/
61 * 3. Version 1.6 (Microsoft Word 2000)
62 * http://msdn.microsoft.com/library/en-us/dnrtfspec/html/rtfspec.asp
63 */
64
65
66 int embedpng_enable=0;
67
68 /* See also http://msdn.microsoft.com/library/en-us/intl/unicode_81rn.asp */
69 #define ENCODING_UNKNOWN 0
70 #define ENCODING_GB2312 936
71 #define ENCODING_GB18030 54936
72 #define ENCODING_BIG5 950
73
74 int encoding=ENCODING_UNKNOWN;
75
76
77 int fetchinfo_enable=0; /* FALSE */
78
79
80 #define MY_BUFFER_SIZE 3072
81 #define MY_BUFFER_LIMIT 2048
82
83 /* MY_BUFFER_LIMIT is smaller MY_BUFFER_SIZE, reserving some redundance. */
84
85 /*
86 * "mybuffer" is used to cache RTF stream
87 * while fetching book/article information.
88 */
89 size_t mybufferlength=0;
90 char mybuffer[MY_BUFFER_SIZE];
91
92
93 #define INFO_TITLE 0
94 #define INFO_AUTHOR 1
95
96 /* To store fetched book/article information */
97 struct
98 {
99 size_t length;
100 char text[MY_BUFFER_SIZE];
101 } *pinfobuf=NULL,infobuf[]=
102 {
103 {0,""},
104 {0,""}
105 };
106
107 /*
108 * See also the section "Pictures" in RTF specification.
109 */
110 void
embedpng(char * field)111 embedpng(char *field)
112 {
113 char *p1,*p2,fn[PATH_MAX];
114 unsigned char buf[256];
115 FILE *fp;
116 int l,i,nret;
117 png_structp png_ptr;
118 png_infop info_ptr,end_info;
119 png_uint_32 width,height;
120
121 p1=strcasestr(field,"INCLUDEPICTURE");
122 p1=strchr(p1+14,'"'); /* String after "INCLUDEPICTURE" */
123 p2=strchr(p1+1,'"');
124 l=p2-(p1+1); /* Substantial length of file name */
125 if(l>sizeof(fn)-1)
126 {
127 warnx("*** Buffer Overflow Attack Detected !!! ***");
128 exit(1);
129 }
130 memcpy(fn,p1+1,l);
131 fn[l]=0;
132
133 if(l<4) /* It should be longer than ".png". */
134 {
135 warnx("File name '%s' is too short!",fn);
136 goto embedpng_exit_1;
137 }
138
139 if(strcasecmp(fn+(l-4),".png")!=0)
140 {
141 warnx("File name '%s' has not a suffix '.png'. Keep untouched.",fn);
142 goto embedpng_exit_1;
143 }
144
145 if((fp=fopen(fn,"rb"))==NULL)
146 {
147 warnx("Failed to open '%s'!",fn);
148 goto embedpng_exit_1;
149 }
150
151 fread(buf,1,8,fp);
152 if (png_sig_cmp(buf,0,8))
153 {
154 warnx("The file '%s' is NOT in PNG format!",fn);
155 goto embedpng_exit_2;
156 }
157 png_ptr=png_create_read_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
158 if (!png_ptr)
159 {
160 warnx("Unable to create PNG read struct(*png_ptr)!");
161 goto embedpng_exit_2;
162 }
163 info_ptr=png_create_info_struct(png_ptr);
164 if (!info_ptr)
165 {
166 warnx("Unable to create PNG info struct(*info_ptr)!");
167 png_destroy_read_struct(&png_ptr,(png_infopp)NULL,(png_infopp)NULL);
168 goto embedpng_exit_2;
169 }
170 end_info=png_create_info_struct(png_ptr);
171 if(!end_info)
172 {
173 warnx("Unable to create PNG info struct(*end_info)!");
174 png_destroy_read_struct(&png_ptr,&info_ptr,(png_infopp)NULL);
175 goto embedpng_exit_2;
176 }
177 if (setjmp(png_jmpbuf(png_ptr)))
178 {
179 warnx("LibPNG crashed!");
180 png_destroy_read_struct(&png_ptr,&info_ptr,&end_info);
181 goto embedpng_exit_2;
182 }
183 rewind(fp);
184 png_init_io(png_ptr,fp);
185 png_read_info(png_ptr,info_ptr);
186 width=png_get_image_width(png_ptr,info_ptr);
187 height=png_get_image_height(png_ptr,info_ptr);
188
189 if(width>1024 || height>768) warnx("Picture is too large!");
190
191 /*
192 * According to Microsoft's RTF specification, \picwN and \pichN is
193 * mandatory for \pict group. Actually, in both Microsoft Word Viewer
194 * and OpenOffice, these two control words take no effect for PNG.
195 */
196 printf("{\\pict\\pngblip\\picscalex100\\picscaley100\\picw%u\\pich%u",
197 (unsigned int)width,(unsigned int)height);
198
199 rewind(fp);
200 while((nret=fread(buf,1,64,fp))>0)
201 {
202 printf("\n");
203 for(i=0;i<nret;i++)
204 printf("%02x",(unsigned int)((unsigned char)buf[i]));
205 }
206
207 printf("}");
208
209 warnx("'%s' (%ux%u) embedded.",fn,(unsigned int)width,(unsigned int)height);
210
211 png_destroy_read_struct(&png_ptr,&info_ptr,&end_info);
212 fclose(fp);
213 goto embedpng_exit_0;
214
215 embedpng_exit_2:;
216 fclose(fp);
217 embedpng_exit_1:;
218 printf("%s",field); /* Keep link in RTF untouched */
219 embedpng_exit_0:;
220 return;
221 }
222
223 /*
224 * See also the section "Font Table" in RTF specification.
225 */
226 void
modifycharset(char * fcharset)227 modifycharset(char *fcharset)
228 {
229 char *s;
230
231 switch(encoding)
232 {
233 case ENCODING_GB2312:
234 case ENCODING_GB18030: /* GB18030 is not supported in RTF so far */
235 s="\\fcharset134";
236 break;
237 case ENCODING_BIG5:
238 s="\\fcharset136";
239 break;
240 default:
241 s="\\fcharset1"; /* "Default" */
242 break;
243 }
244
245 printf("%s",s);
246
247 warnx("Charset control word modified: %s -> %s",fcharset,s);
248
249 return;
250 }
251
252 /*
253 * (init|addto|flush)mybuffer maintain buffer to cache RTF stream
254 * while fetching book/article information.
255 */
initmybuffer()256 void initmybuffer()
257 {
258 int i;
259
260 mybufferlength=0;
261 for(i=0;i<sizeof(infobuf)/sizeof(infobuf[0]);i++)
262 {
263 infobuf[i].length=0;
264 infobuf[i].text[0]=0;
265 }
266 }
267
addtomybuffer(char * text,size_t leng)268 int addtomybuffer(char *text, size_t leng)
269 {
270 if(mybufferlength+leng>MY_BUFFER_LIMIT) return -1;
271 /* warnx("_%s_",yytext); */
272 memcpy(mybuffer+mybufferlength,text,leng);
273 mybufferlength+=leng; /* No terminator '\0' */
274 return 0;
275 }
276
flushmybuffer()277 void flushmybuffer()
278 {
279 fwrite(mybuffer,1,mybufferlength,yyout);
280 mybufferlength=0;
281 }
282
283 #define ADDTOBUF { \
284 if(addtomybuffer(yytext,yyleng)) \
285 { \
286 haltfetch(); \
287 ECHO; \
288 BEGIN(0); \
289 warnx("Had been fetching book/article information until buffer was full!"); \
290 YY_BREAK; \
291 } \
292 }
293
294
295 /* Collect book/article information RTF sequence */
collectinfo(char * text,size_t leng)296 void collectinfo(char *text, size_t leng)
297 {
298 assert(pinfobuf!=NULL);
299 if(pinfobuf->length+leng>=MY_BUFFER_LIMIT) /* Consider terminator '\0' */
300 {
301 warnx("*** Too long text for title or author !!! ***");
302 warnx("*** Buffer Overflow Attack To Be Considered !!! ***");
303 return; /* Information item buffer is full. */
304 }
305 memcpy(pinfobuf->text+pinfobuf->length,text,leng);
306 pinfobuf->length+=leng;
307 pinfobuf->text[pinfobuf->length]=0;
308 }
309
310 /* Identify a RTF control word */
identifyctrlword(char * text,size_t leng,char * key)311 int identifyctrlword(char *text, size_t leng, char *key)
312 {
313 if(text[leng-1]==' ')
314 { /* Tailed by a space as delimiter */
315 if(strlen(key)!=leng-1) return 0;
316 return !strncmp(text,key,leng-1);
317 }
318
319 return !strcmp(text,key);
320 }
321
322 /*
323 * Output fetch book/article information.
324 * See also the section "Information Group" in RTF specification.
325 */
outputinfo()326 void outputinfo()
327 {
328 time_t t;
329 char buf[128];
330
331 printf("{\\info\\uc0");
332
333 printf("{\\title %s}{\\author %s}",
334 infobuf[INFO_TITLE].text,infobuf[INFO_AUTHOR].text);
335
336 time(&t);
337 strftime(buf,sizeof(buf),"\\yr%Y\\mo%m\\dy%d\\hr%H\\min%M\\sec%S",localtime(&t));
338 printf("{\\creatim%s}",buf);
339
340 printf("}");
341 }
342
haltfetch()343 void haltfetch()
344 {
345 warnx("Title: %s",infobuf[INFO_TITLE].text);
346 warnx("Author: %s",infobuf[INFO_AUTHOR].text);
347 outputinfo();
348 flushmybuffer();
349 }
350
351 %}
352
353 %option noyywrap
354
355 %s fetchinfo
356
357 pnglink \{\\field[^{}]*\{[^{}]*INCLUDEPICTURE[^{}]*\".+\"[^{}]*\}\{[^{}]*\}[^{}]*\}
358 sjischarset \\fcharset128
359 stylesheet \{\\stylesheet[ ]?
360 titlebegin \\pard.{1,25}\\fs49[ ]?
361 authorbegin \\pard.{1,25}\\fs34[ ]?
362 rtfhexvalue \\\'[0-9A-Fa-f]{2}
363 rtfctrlword \\[a-z]+([-]?[0-9]+)?[ ]?
364 rtfctrlsymbol \\[^a-z]
365
366 %%
367
368 {pnglink} { /*
369 * Substitute RTF \pict group for RTF field group.
370 * An example generated by Jade/OpenJade:
371 * {\field\flddirty{\*\fldinst INCLUDEPICTURE "sockets/layers.png" }{\fldrslt }}
372 */
373 if(embedpng_enable) embedpng(yytext);
374 else { ECHO; }
375 }
376
377 {sjischarset} {
378 /*
379 * Jade/OpenJade mis-mark Chinese as Shift-JIS encoded Japanese.
380 * This may cause RTF viewer to display Chinese with Japanese font.
381 */
382 if(encoding!=ENCODING_UNKNOWN) modifycharset(yytext);
383 else { ECHO; }
384 }
385
386 {stylesheet} { /* Insert book/article information just before style sheet. */
387 if(fetchinfo_enable)
388 { /* Begin fetching book/article information. */
389 initmybuffer();
390 BEGIN(fetchinfo);
391 fetchinfo_enable=0; /* FALSE, one-off */
392 ADDTOBUF;
393 }
394 else
395 {
396 ECHO;
397 }
398 }
399
400 <fetchinfo>{titlebegin} { /* Beginning of title, hacked by font size. */
401 ADDTOBUF;
402 pinfobuf=&(infobuf[INFO_TITLE]);
403 if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */
404 }
405
406 <fetchinfo>{authorbegin} { /* Beginning of author, hacked by font size. */
407 ADDTOBUF;
408 pinfobuf=&(infobuf[INFO_AUTHOR]);
409 if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */
410 }
411
412 <fetchinfo>{rtfhexvalue} { /* A hexadecimal value, ignore. */
413 ADDTOBUF;
414 }
415
416 <fetchinfo>\\~ { /* Nonbreaking space, a control symbol, collect */
417 ADDTOBUF;
418 if(pinfobuf!=NULL) collectinfo(" ",1);
419 }
420
421 <fetchinfo>\\[-_] { /* Optional/nonbreaking hyphen, a control symbol, collect */
422 ADDTOBUF;
423 if(pinfobuf!=NULL) collectinfo("-",1);
424 }
425
426 <fetchinfo>{rtfctrlsymbol} { /* Other control symbols, ignore */
427 ADDTOBUF;
428 }
429
430 <fetchinfo>{rtfctrlword} { /* Control word */
431 ADDTOBUF;
432
433 if(identifyctrlword(yytext,yyleng,"\\keepn"))
434 { /* End of title or author, actually a hack */
435 pinfobuf=NULL;
436 }
437 else if(yytext[0]=='\\' && yytext[1]=='u' &&
438 ((yytext[2]>='0' && yytext[2]<='9') || yytext[2]=='-') )
439 { /* Unicode Character, collect */
440 if(pinfobuf!=NULL)
441 {
442 collectinfo(yytext,yyleng);
443 if(yytext[yyleng-1]!=' ') collectinfo(" ",1);
444 }
445 }
446 else if(identifyctrlword(yytext,yyleng,"\\page"))
447 { /* Accomplished !!! */
448 haltfetch();
449 BEGIN(0);
450 }
451 }
452
453 <fetchinfo>[\n{}] { /* Ignore */
454 ADDTOBUF;
455 }
456
457 <fetchinfo>. { /* Collect */
458 ADDTOBUF;
459 if(pinfobuf!=NULL) collectinfo(yytext,yyleng);
460 }
461
462 %%
463
464 void printusage()
465 {
466 fprintf(stderr, "Usage: fixrtf [-e encoding] [-i] [-p] < inputfile > outputfile\n"
467 " Fix RTF file generated by Jade/OpenJade.\n"
468 "Options:\n"
469 " -e encoding\n"
470 " Specify encoding to do specific fixing. (GB2312|BIG5)\n"
471 " -i\n"
472 " Fill RTF file information, such as title and author,\n"
473 " hacked from RTF file generated by Jade/OpenJade.\n"
474 " -p\n"
475 " Embed linked PNG images into RTF file.\n"
476 );
477 }
478
479 int
480 main(int argc, char *argv[])
481 {
482 int ch;
483
484 if(argc<=1)
485 {
486 warnx("You should indicate at least one kind of fixing.");
487 printusage();
488 return 1;
489 }
490
491 while ((ch = getopt(argc, argv, "e:ip")) != -1)
492 {
493 switch (ch)
494 {
495 case 'e':
496 if(strcasecmp(optarg,"GB2312")==0 ||
497 strcasecmp(optarg,"GBK")==0)
498 {
499 encoding=ENCODING_GB2312;
500 }
501 else if(strcasecmp(optarg,"GB18030")==0)
502 {
503 encoding=ENCODING_GB18030;
504 }
505 else if(strcasecmp(optarg,"BIG5")==0)
506 {
507 encoding=ENCODING_BIG5;
508 }
509 break;
510 case 'i':
511 fetchinfo_enable=1; /* One-off */
512 break;
513 case 'p':
514 embedpng_enable=1;
515 break;
516 default:
517 printusage();
518 return 1;
519 break;
520 }
521 }
522
523 yylex();
524
525 return 0;
526 }
527