1 %{
2 /*-
3  * Copyright (c) 2005, 2006 intron <intron@intron.ac>.  All rights reserved.
4  * Copyright (c) 2005, 2006 The FreeBSD Simplified Chinese Project.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The FreeBSD Simplified
8  * Chinese Project by intron.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  * From CNPROJ: doc/zh_CN.GB2312/share/mk/fixrtf.lex,v 1.1.1000.20 2006/02/19 10:21:40 intron Exp
32  */
33 
34 #include <sys/cdefs.h>
35 
36 #include <err.h>
37 #include <assert.h>
38 #include <stdio.h>
39 #include <string.h>
40 #include <sys/param.h>
41 #include <stdlib.h>
42 #include <unistd.h>
43 #include <time.h>
44 #include <png.h>
45 
46 /*
47  * This program is used to fix RTF:
48  * 1. Embed PNGs into RTF.
49  * 2. Embed FreeBSD-specific information into RTF, such as organization name,
50  *    building time. But unfortunately, so far only Microsoft Word can read
51  *    them. In contrast, Microsoft Word Viewer and OpenOffice even cannot read
52  *    this kind of information from RTF created by Microsoft Word and
53  *    OpenOffice. (Option: -i)
54  * 3. Do some locale-specific fixing. (Option: -e <encoding>)
55  *
56  * See also Rich Text Format (RTF) Specification:
57  * 1. Version 1.8 (Microsoft Word 2003)
58  *    http://www.microsoft.com/downloads/details.aspx?familyid=ac57de32-17f0-4b46-9e4e-467ef9bc5540&displaylang=en
59  * 2. Version 1.7 (Microsoft Word 2002)
60  *    http://support.microsoft.com/kb/q86999/
61  * 3. Version 1.6 (Microsoft Word 2000)
62  *    http://msdn.microsoft.com/library/en-us/dnrtfspec/html/rtfspec.asp
63  */
64 
65 
66 int embedpng_enable=0;
67 
68 /* See also http://msdn.microsoft.com/library/en-us/intl/unicode_81rn.asp */
69 #define	ENCODING_UNKNOWN	0
70 #define	ENCODING_GB2312		936
71 #define	ENCODING_GB18030	54936
72 #define	ENCODING_BIG5		950
73 
74 int encoding=ENCODING_UNKNOWN;
75 
76 
77 int fetchinfo_enable=0; /* FALSE */
78 
79 
80 #define MY_BUFFER_SIZE		3072
81 #define MY_BUFFER_LIMIT		2048
82 
83 /* MY_BUFFER_LIMIT is smaller MY_BUFFER_SIZE, reserving some redundance. */
84 
85 /*
86  * "mybuffer" is used to cache RTF stream
87  * while fetching book/article information.
88  */
89 size_t mybufferlength=0;
90 char mybuffer[MY_BUFFER_SIZE];
91 
92 
93 #define	INFO_TITLE	0
94 #define	INFO_AUTHOR	1
95 
96 /* To store fetched book/article information */
97 struct
98 {
99 	size_t length;
100 	char text[MY_BUFFER_SIZE];
101 } *pinfobuf=NULL,infobuf[]=
102 {
103 	{0,""},
104 	{0,""}
105 };
106 
107 /*
108  * See also the section "Pictures" in RTF specification.
109  */
110 void
embedpng(char * field)111 embedpng(char *field)
112 {
113 	char *p1,*p2,fn[PATH_MAX];
114 	unsigned char buf[256];
115 	FILE *fp;
116 	int l,i,nret;
117 	png_structp png_ptr;
118 	png_infop info_ptr,end_info;
119 	png_uint_32 width,height;
120 
121 	p1=strcasestr(field,"INCLUDEPICTURE");
122 	p1=strchr(p1+14,'"'); /* String after "INCLUDEPICTURE" */
123 	p2=strchr(p1+1,'"');
124 	l=p2-(p1+1); /* Substantial length of file name */
125 	if(l>sizeof(fn)-1)
126 	{
127 		warnx("*** Buffer Overflow Attack Detected !!! ***");
128 		exit(1);
129 	}
130 	memcpy(fn,p1+1,l);
131 	fn[l]=0;
132 
133 	if(l<4) /* It should be longer than ".png". */
134 	{
135 		warnx("File name '%s' is too short!",fn);
136 		goto embedpng_exit_1;
137 	}
138 
139 	if(strcasecmp(fn+(l-4),".png")!=0)
140 	{
141 		warnx("File name '%s' has not a suffix '.png'. Keep untouched.",fn);
142 		goto embedpng_exit_1;
143 	}
144 
145 	if((fp=fopen(fn,"rb"))==NULL)
146 	{
147 		warnx("Failed to open '%s'!",fn);
148 		goto embedpng_exit_1;
149 	}
150 
151 	fread(buf,1,8,fp);
152 	if (png_sig_cmp(buf,0,8))
153 	{
154 		warnx("The file '%s' is NOT in PNG format!",fn);
155 		goto embedpng_exit_2;
156 	}
157 	png_ptr=png_create_read_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
158 	if (!png_ptr)
159 	{
160 		warnx("Unable to create PNG read struct(*png_ptr)!");
161 		goto embedpng_exit_2;
162 	}
163 	info_ptr=png_create_info_struct(png_ptr);
164 	if (!info_ptr)
165 	{
166 		warnx("Unable to create PNG info struct(*info_ptr)!");
167 		png_destroy_read_struct(&png_ptr,(png_infopp)NULL,(png_infopp)NULL);
168 		goto embedpng_exit_2;
169 	}
170 	end_info=png_create_info_struct(png_ptr);
171 	if(!end_info)
172 	{
173 		warnx("Unable to create PNG info struct(*end_info)!");
174         	png_destroy_read_struct(&png_ptr,&info_ptr,(png_infopp)NULL);
175 		goto embedpng_exit_2;
176 	}
177 	if (setjmp(png_jmpbuf(png_ptr)))
178 	{
179 		warnx("LibPNG crashed!");
180 		png_destroy_read_struct(&png_ptr,&info_ptr,&end_info);
181 		goto embedpng_exit_2;
182 	}
183 	rewind(fp);
184 	png_init_io(png_ptr,fp);
185 	png_read_info(png_ptr,info_ptr);
186 	width=png_get_image_width(png_ptr,info_ptr);
187 	height=png_get_image_height(png_ptr,info_ptr);
188 
189 	if(width>1024 || height>768) warnx("Picture is too large!");
190 
191 	/*
192 	 * According to Microsoft's RTF specification, \picwN and \pichN is
193 	 * mandatory for \pict group. Actually, in both Microsoft Word Viewer
194 	 * and OpenOffice, these two control words take no effect for PNG.
195 	 */
196 	printf("{\\pict\\pngblip\\picscalex100\\picscaley100\\picw%u\\pich%u",
197 		(unsigned int)width,(unsigned int)height);
198 
199 	rewind(fp);
200 	while((nret=fread(buf,1,64,fp))>0)
201 	{
202 		printf("\n");
203 		for(i=0;i<nret;i++)
204 			printf("%02x",(unsigned int)((unsigned char)buf[i]));
205 	}
206 
207 	printf("}");
208 
209 	warnx("'%s' (%ux%u) embedded.",fn,(unsigned int)width,(unsigned int)height);
210 
211 	png_destroy_read_struct(&png_ptr,&info_ptr,&end_info);
212 	fclose(fp);
213 	goto embedpng_exit_0;
214 
215 embedpng_exit_2:;
216 	fclose(fp);
217 embedpng_exit_1:;
218 	printf("%s",field); /* Keep link in RTF untouched */
219 embedpng_exit_0:;
220 	return;
221 }
222 
223 /*
224  * See also the section "Font Table" in RTF specification.
225  */
226 void
modifycharset(char * fcharset)227 modifycharset(char *fcharset)
228 {
229 	char *s;
230 
231 	switch(encoding)
232 	{
233 	case ENCODING_GB2312:
234 	case ENCODING_GB18030: /* GB18030 is not supported in RTF so far */
235 		s="\\fcharset134";
236 		break;
237 	case ENCODING_BIG5:
238 		s="\\fcharset136";
239 		break;
240 	default:
241 		s="\\fcharset1"; /* "Default" */
242 		break;
243 	}
244 
245 	printf("%s",s);
246 
247 	warnx("Charset control word modified: %s -> %s",fcharset,s);
248 
249 	return;
250 }
251 
252 /*
253  * (init|addto|flush)mybuffer maintain buffer to cache RTF stream
254  * while fetching book/article information.
255  */
initmybuffer()256 void initmybuffer()
257 {
258 	int i;
259 
260 	mybufferlength=0;
261 	for(i=0;i<sizeof(infobuf)/sizeof(infobuf[0]);i++)
262 	{
263 		infobuf[i].length=0;
264 		infobuf[i].text[0]=0;
265 	}
266 }
267 
addtomybuffer(char * text,size_t leng)268 int addtomybuffer(char *text, size_t leng)
269 {
270 	if(mybufferlength+leng>MY_BUFFER_LIMIT) return -1;
271 	/* warnx("_%s_",yytext); */
272 	memcpy(mybuffer+mybufferlength,text,leng);
273 	mybufferlength+=leng; /* No terminator '\0' */
274 	return 0;
275 }
276 
flushmybuffer()277 void flushmybuffer()
278 {
279 	fwrite(mybuffer,1,mybufferlength,yyout);
280 	mybufferlength=0;
281 }
282 
283 #define	ADDTOBUF { \
284 	if(addtomybuffer(yytext,yyleng)) \
285 	{ \
286 		haltfetch(); \
287 		ECHO; \
288 		BEGIN(0); \
289 		warnx("Had been fetching book/article information until buffer was full!"); \
290 		YY_BREAK; \
291 	} \
292    }
293 
294 
295 /* Collect book/article information RTF sequence */
collectinfo(char * text,size_t leng)296 void collectinfo(char *text, size_t leng)
297 {
298 	assert(pinfobuf!=NULL);
299 	if(pinfobuf->length+leng>=MY_BUFFER_LIMIT) /* Consider terminator '\0' */
300 	{
301 		warnx("*** Too long text for title or author !!! ***");
302 		warnx("*** Buffer Overflow Attack To Be Considered !!! ***");
303 		return; /* Information item buffer is full. */
304 	}
305 	memcpy(pinfobuf->text+pinfobuf->length,text,leng);
306 	pinfobuf->length+=leng;
307 	pinfobuf->text[pinfobuf->length]=0;
308 }
309 
310 /* Identify a RTF control word */
identifyctrlword(char * text,size_t leng,char * key)311 int identifyctrlword(char *text, size_t leng, char *key)
312 {
313 	if(text[leng-1]==' ')
314 	{ /* Tailed by a space as delimiter */
315 		if(strlen(key)!=leng-1) return 0;
316 		return !strncmp(text,key,leng-1);
317 	}
318 
319 	return !strcmp(text,key);
320 }
321 
322 /*
323  * Output fetch book/article information.
324  * See also the section "Information Group" in RTF specification.
325  */
outputinfo()326 void outputinfo()
327 {
328 	time_t t;
329 	char buf[128];
330 
331 	printf("{\\info\\uc0");
332 
333 	printf("{\\title %s}{\\author %s}",
334 		infobuf[INFO_TITLE].text,infobuf[INFO_AUTHOR].text);
335 
336 	time(&t);
337 	strftime(buf,sizeof(buf),"\\yr%Y\\mo%m\\dy%d\\hr%H\\min%M\\sec%S",localtime(&t));
338 	printf("{\\creatim%s}",buf);
339 
340 	printf("}");
341 }
342 
haltfetch()343 void haltfetch()
344 {
345 	warnx("Title: %s",infobuf[INFO_TITLE].text);
346 	warnx("Author: %s",infobuf[INFO_AUTHOR].text);
347 	outputinfo();
348 	flushmybuffer();
349 }
350 
351 %}
352 
353 %option noyywrap
354 
355 %s	fetchinfo
356 
357 pnglink		\{\\field[^{}]*\{[^{}]*INCLUDEPICTURE[^{}]*\".+\"[^{}]*\}\{[^{}]*\}[^{}]*\}
358 sjischarset	\\fcharset128
359 stylesheet	\{\\stylesheet[ ]?
360 titlebegin	\\pard.{1,25}\\fs49[ ]?
361 authorbegin	\\pard.{1,25}\\fs34[ ]?
362 rtfhexvalue	\\\'[0-9A-Fa-f]{2}
363 rtfctrlword	\\[a-z]+([-]?[0-9]+)?[ ]?
364 rtfctrlsymbol	\\[^a-z]
365 
366 %%
367 
368 {pnglink}  { /*
369 	      * Substitute RTF \pict group for RTF field group.
370 	      * An example generated by Jade/OpenJade:
371 	      * {\field\flddirty{\*\fldinst INCLUDEPICTURE "sockets/layers.png" }{\fldrslt }}
372 	      */
373 		if(embedpng_enable) embedpng(yytext);
374 		else { ECHO; }
375 	   }
376 
377 {sjischarset}  {
378 	      /*
379 	       * Jade/OpenJade mis-mark Chinese as Shift-JIS encoded Japanese.
380 	       * This may cause RTF viewer to display Chinese with Japanese font.
381 	       */
382 		if(encoding!=ENCODING_UNKNOWN) modifycharset(yytext);
383 		else { ECHO; }
384 	   }
385 
386 {stylesheet}  { /* Insert book/article information just before style sheet. */
387 		if(fetchinfo_enable)
388 		{ /* Begin fetching book/article information. */
389 			initmybuffer();
390 			BEGIN(fetchinfo);
391 			fetchinfo_enable=0; /* FALSE, one-off */
392 			ADDTOBUF;
393 		}
394 		else
395 		{
396 			ECHO;
397 		}
398 	   }
399 
400 <fetchinfo>{titlebegin}  { /* Beginning of title, hacked by font size. */
401 		ADDTOBUF;
402 		pinfobuf=&(infobuf[INFO_TITLE]);
403 		if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */
404 	   }
405 
406 <fetchinfo>{authorbegin}  { /* Beginning of author, hacked by font size. */
407 		ADDTOBUF;
408 		pinfobuf=&(infobuf[INFO_AUTHOR]);
409 		if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */
410 	   }
411 
412 <fetchinfo>{rtfhexvalue}  { /* A hexadecimal value, ignore. */
413 		ADDTOBUF;
414 	   }
415 
416 <fetchinfo>\\~  { /* Nonbreaking space, a control symbol, collect */
417 		ADDTOBUF;
418 		if(pinfobuf!=NULL) collectinfo(" ",1);
419 	   }
420 
421 <fetchinfo>\\[-_]  { /* Optional/nonbreaking hyphen, a control symbol, collect */
422 		ADDTOBUF;
423 		if(pinfobuf!=NULL) collectinfo("-",1);
424 	   }
425 
426 <fetchinfo>{rtfctrlsymbol}  { /* Other control symbols, ignore */
427 		ADDTOBUF;
428 	   }
429 
430 <fetchinfo>{rtfctrlword}  { /* Control word */
431 		ADDTOBUF;
432 
433 		if(identifyctrlword(yytext,yyleng,"\\keepn"))
434 		{ /* End of title or author, actually a hack */
435 			pinfobuf=NULL;
436 		}
437 		else if(yytext[0]=='\\' && yytext[1]=='u' &&
438 			((yytext[2]>='0' && yytext[2]<='9') || yytext[2]=='-') )
439 		{ /* Unicode Character, collect */
440 			if(pinfobuf!=NULL)
441 			{
442 				collectinfo(yytext,yyleng);
443 				if(yytext[yyleng-1]!=' ') collectinfo(" ",1);
444 			}
445 		}
446 		else if(identifyctrlword(yytext,yyleng,"\\page"))
447 		{ /* Accomplished !!!  */
448 			haltfetch();
449 			BEGIN(0);
450 		}
451 	   }
452 
453 <fetchinfo>[\n{}]  {  /* Ignore */
454 		ADDTOBUF;
455 	   }
456 
457 <fetchinfo>.  { /* Collect */
458 		ADDTOBUF;
459 		if(pinfobuf!=NULL) collectinfo(yytext,yyleng);
460 	   }
461 
462 %%
463 
464 void printusage()
465 {
466 	fprintf(stderr,	"Usage: fixrtf [-e encoding] [-i] [-p] < inputfile > outputfile\n"
467 			"     Fix RTF file generated by Jade/OpenJade.\n"
468 			"Options:\n"
469 			"       -e encoding\n"
470 			"             Specify encoding to do specific fixing. (GB2312|BIG5)\n"
471 			"       -i\n"
472 			"             Fill RTF file information, such as title and author,\n"
473 			"             hacked from RTF file generated by Jade/OpenJade.\n"
474 			"       -p\n"
475 			"             Embed linked PNG images into RTF file.\n"
476 		);
477 }
478 
479 int
480 main(int argc, char *argv[])
481 {
482 	int ch;
483 
484 	if(argc<=1)
485 	{
486 		warnx("You should indicate at least one kind of fixing.");
487 		printusage();
488 		return 1;
489 	}
490 
491 	while ((ch = getopt(argc, argv, "e:ip")) != -1)
492 	{
493 		switch (ch)
494 		{
495 		case 'e':
496 			if(strcasecmp(optarg,"GB2312")==0 ||
497 				strcasecmp(optarg,"GBK")==0)
498 			{
499 				encoding=ENCODING_GB2312;
500 			}
501 			else if(strcasecmp(optarg,"GB18030")==0)
502 			{
503 				encoding=ENCODING_GB18030;
504 			}
505 			else if(strcasecmp(optarg,"BIG5")==0)
506 			{
507 				encoding=ENCODING_BIG5;
508 			}
509 			break;
510 		case 'i':
511 			fetchinfo_enable=1; /* One-off */
512 			break;
513 		case 'p':
514 			embedpng_enable=1;
515 			break;
516 		default:
517 			printusage();
518 			return 1;
519 			break;
520 		}
521 	}
522 
523 	yylex();
524 
525 	return 0;
526 }
527