1 /*
2  *   Common text filter routines for CUPS.
3  *
4  *   Copyright 2007-2011 by Apple Inc.
5  *   Copyright 1997-2007 by Easy Software Products.
6  *
7  *   These coded instructions, statements, and computer programs are the
8  *   property of Apple Inc. and are protected by Federal copyright
9  *   law.  Distribution and use rights are outlined in the file "COPYING"
10  *   which should have been included with this file.
11  *
12  * Contents:
13  *
14  *   TextMain()         - Standard main entry for text filters.
15  *   compare_keywords() - Compare two C/C++ keywords.
16  *   getutf8()          - Get a UTF-8 encoded wide character...
17  */
18 
19 /*
20  * Include necessary headers...
21  */
22 
23 #include "textcommon.h"
24 #include <limits.h>
25 
26 
27 /*
28  * Globals...
29  */
30 
31 int	WrapLines = 1,		/* Wrap text in lines */
32 	SizeLines = 60,		/* Number of lines on a page */
33 	SizeColumns = 80,	/* Number of columns on a line */
34 	PageColumns = 1,	/* Number of columns on a page */
35 	ColumnGutter = 0,	/* Number of characters between text columns */
36 	ColumnWidth = 80,	/* Width of each column */
37 	PrettyPrint = 0,	/* Do pretty code formatting */
38 	Copies = 1;		/* Number of copies */
39 lchar_t	**Page = NULL;		/* Page characters */
40 int	NumPages = 0;		/* Number of pages in document */
41 float	CharsPerInch = 10;	/* Number of character columns per inch */
42 float	LinesPerInch = 6;	/* Number of lines per inch */
43 int	NumKeywords = 0;	/* Number of known keywords */
44 char	**Keywords = NULL;	/* List of known keywords */
45 
46 
47 /*
48  * Local globals...
49  */
50 
51 static char *code_keywords[] =	/* List of known C/C++ keywords... */
52 	{
53 	  "and",
54 	  "and_eq",
55 	  "asm",
56 	  "auto",
57 	  "bitand",
58 	  "bitor",
59 	  "bool",
60 	  "break",
61 	  "case",
62 	  "catch",
63 	  "char",
64 	  "class",
65 	  "compl",
66 	  "const",
67 	  "const_cast",
68 	  "continue",
69 	  "default",
70 	  "delete",
71 	  "do",
72 	  "double",
73 	  "dynamic_cast",
74 	  "else",
75 	  "enum",
76 	  "explicit",
77 	  "extern",
78 	  "false",
79 	  "float",
80 	  "for",
81 	  "friend",
82 	  "goto",
83 	  "if",
84 	  "inline",
85 	  "int",
86 	  "long",
87 	  "mutable",
88 	  "namespace",
89 	  "new",
90 	  "not",
91 	  "not_eq",
92 	  "operator",
93 	  "or",
94 	  "or_eq",
95 	  "private",
96 	  "protected",
97 	  "public",
98 	  "register",
99 	  "reinterpret_cast",
100 	  "return",
101 	  "short",
102 	  "signed",
103 	  "sizeof",
104 	  "static",
105 	  "static_cast",
106 	  "struct",
107 	  "switch",
108 	  "template",
109 	  "this",
110 	  "throw",
111 	  "true",
112 	  "try",
113 	  "typedef",
114 	  "typename",
115 	  "union",
116 	  "unsigned",
117 	  "virtual",
118 	  "void",
119 	  "volatile",
120 	  "while",
121 	  "xor",
122 	  "xor_eq"
123 	},
124 	*sh_keywords[] =	/* List of known Boure/Korn/zsh/bash keywords... */
125 	{
126 	  "alias",
127 	  "bg",
128 	  "break",
129 	  "case",
130 	  "cd",
131 	  "command",
132 	  "continue",
133 	  "do",
134 	  "done",
135 	  "echo",
136 	  "elif",
137 	  "else",
138 	  "esac",
139 	  "eval",
140 	  "exec",
141 	  "exit",
142 	  "export",
143 	  "fc",
144 	  "fg",
145 	  "fi",
146 	  "for",
147 	  "function",
148 	  "getopts",
149 	  "if",
150 	  "in",
151 	  "jobs",
152 	  "kill",
153 	  "let",
154 	  "limit",
155 	  "newgrp",
156 	  "print",
157 	  "pwd",
158 	  "read",
159 	  "readonly",
160 	  "return",
161 	  "select",
162 	  "set",
163 	  "shift",
164 	  "test",
165 	  "then",
166 	  "time",
167 	  "times",
168 	  "trap",
169 	  "typeset",
170 	  "ulimit",
171 	  "umask",
172 	  "unalias",
173 	  "unlimit",
174 	  "unset",
175 	  "until",
176 	  "wait",
177 	  "whence"
178 	  "while",
179 	},
180 	*csh_keywords[] =	/* List of known csh/tcsh keywords... */
181 	{
182 	  "alias",
183 	  "aliases",
184 	  "bg",
185 	  "bindkey",
186 	  "break",
187 	  "breaksw",
188 	  "builtins",
189 	  "case",
190 	  "cd",
191 	  "chdir",
192 	  "complete",
193 	  "continue",
194 	  "default",
195 	  "dirs",
196 	  "echo",
197 	  "echotc",
198 	  "else",
199 	  "end",
200 	  "endif",
201 	  "eval",
202 	  "exec",
203 	  "exit",
204 	  "fg",
205 	  "foreach",
206 	  "glob",
207 	  "goto",
208 	  "history",
209 	  "if",
210 	  "jobs",
211 	  "kill",
212 	  "limit",
213 	  "login",
214 	  "logout",
215 	  "ls",
216 	  "nice",
217 	  "nohup",
218 	  "notify",
219 	  "onintr",
220 	  "popd",
221 	  "pushd",
222 	  "pwd",
223 	  "rehash",
224 	  "repeat",
225 	  "set",
226 	  "setenv",
227 	  "settc",
228 	  "shift",
229 	  "source",
230 	  "stop",
231 	  "suspend",
232 	  "switch",
233 	  "telltc",
234 	  "then",
235 	  "time",
236 	  "umask",
237 	  "unalias",
238 	  "unbindkey",
239 	  "unhash",
240 	  "unlimit",
241 	  "unset",
242 	  "unsetenv",
243 	  "wait",
244 	  "where",
245 	  "which",
246 	  "while"
247 	},
248 	*perl_keywords[] =	/* List of known perl keywords... */
249 	{
250 	  "abs",
251 	  "accept",
252 	  "alarm",
253 	  "and",
254 	  "atan2",
255 	  "bind",
256 	  "binmode",
257 	  "bless",
258 	  "caller",
259 	  "chdir",
260 	  "chmod",
261 	  "chomp",
262 	  "chop",
263 	  "chown",
264 	  "chr",
265 	  "chroot",
266 	  "closdir",
267 	  "close",
268 	  "connect",
269 	  "continue",
270 	  "cos",
271 	  "crypt",
272 	  "dbmclose",
273 	  "dbmopen",
274 	  "defined",
275 	  "delete",
276 	  "die",
277 	  "do",
278 	  "dump",
279 	  "each",
280 	  "else",
281 	  "elsif",
282 	  "endgrent",
283 	  "endhostent",
284 	  "endnetent",
285 	  "endprotoent",
286 	  "endpwent",
287 	  "endservent",
288 	  "eof",
289 	  "eval",
290 	  "exec",
291 	  "exists",
292 	  "exit",
293 	  "exp",
294 	  "fcntl",
295 	  "fileno",
296 	  "flock",
297 	  "for",
298 	  "foreach",
299 	  "fork",
300 	  "format",
301 	  "formline",
302 	  "getc",
303 	  "getgrent",
304 	  "getgrgid",
305 	  "getgrnam",
306 	  "gethostbyaddr",
307 	  "gethostbyname",
308 	  "gethostent",
309 	  "getlogin",
310 	  "getnetbyaddr",
311 	  "getnetbyname",
312 	  "getnetent",
313 	  "getpeername",
314 	  "getpgrp",
315 	  "getppid",
316 	  "getpriority",
317 	  "getprotobyname",
318 	  "getprotobynumber",
319 	  "getprotoent",
320 	  "getpwent",
321 	  "getpwnam",
322 	  "getpwuid",
323 	  "getservbyname",
324 	  "getservbyport",
325 	  "getservent",
326 	  "getsockname",
327 	  "getsockopt",
328 	  "glob",
329 	  "gmtime",
330 	  "goto",
331 	  "grep",
332 	  "hex",
333 	  "if",
334 	  "import",
335 	  "index",
336 	  "int",
337 	  "ioctl",
338 	  "join",
339 	  "keys",
340 	  "kill",
341 	  "last",
342 	  "lc",
343 	  "lcfirst",
344 	  "length",
345 	  "link",
346 	  "listen",
347 	  "local",
348 	  "localtime",
349 	  "log",
350 	  "lstat",
351 	  "map",
352 	  "mkdir",
353 	  "msgctl",
354 	  "msgget",
355 	  "msgrcv",
356 	  "msgsend",
357 	  "my",
358 	  "next",
359 	  "no",
360 	  "not",
361 	  "oct",
362 	  "open",
363 	  "opendir",
364 	  "or",
365 	  "ord",
366 	  "pack",
367 	  "package",
368 	  "pipe",
369 	  "pop",
370 	  "pos",
371 	  "print",
372 	  "printf",
373 	  "push",
374 	  "quotemeta",
375 	  "rand",
376 	  "read",
377 	  "readdir",
378 	  "readlink",
379 	  "recv",
380 	  "redo",
381 	  "ref",
382 	  "rename",
383 	  "require",
384 	  "reset",
385 	  "return",
386 	  "reverse",
387 	  "rewinddir",
388 	  "rindex",
389 	  "rmdir",
390 	  "scalar",
391 	  "seek",
392 	  "seekdir",
393 	  "select",
394 	  "semctl",
395 	  "semget",
396 	  "semop",
397 	  "send",
398 	  "setgrent",
399 	  "sethostent",
400 	  "setnetent",
401 	  "setpgrp",
402 	  "setpriority",
403 	  "setprotoent",
404 	  "setpwent",
405 	  "setservent",
406 	  "setsockopt",
407 	  "shift",
408 	  "shmctl",
409 	  "shmget",
410 	  "shmread",
411 	  "shmwrite",
412 	  "shutdown",
413 	  "sin",
414 	  "sleep",
415 	  "socket",
416 	  "socketpair",
417 	  "sort",
418 	  "splice",
419 	  "split",
420 	  "sprintf",
421 	  "sqrt",
422 	  "srand",
423 	  "stat",
424 	  "study",
425 	  "sub",
426 	  "substr",
427 	  "symlink",
428 	  "syscall",
429 	  "sysread",
430 	  "sysseek",
431 	  "system",
432 	  "syswrite",
433 	  "tell",
434 	  "telldir",
435 	  "tie",
436 	  "tied",
437 	  "time",
438 	  "times"
439 	  "times",
440 	  "truncate",
441 	  "uc",
442 	  "ucfirst",
443 	  "umask",
444 	  "undef",
445 	  "unless",
446 	  "unlink",
447 	  "unpack",
448 	  "unshift",
449 	  "untie",
450 	  "until",
451 	  "use",
452 	  "utime",
453 	  "values",
454 	  "vec",
455 	  "wait",
456 	  "waitpid",
457 	  "wantarray",
458 	  "warn",
459 	  "while",
460 	  "write"
461 	};
462 
463 
464 /*
465  * Local functions...
466  */
467 
468 static int	compare_keywords(const void *, const void *);
469 static int	getutf8(FILE *fp);
470 
471 
472 /*
473  * 'TextMain()' - Standard main entry for text filters.
474  */
475 
476 int				/* O - Exit status */
TextMain(const char * name,int argc,char * argv[])477 TextMain(const char *name,	/* I - Name of filter */
478          int        argc,	/* I - Number of command-line arguments */
479          char       *argv[])	/* I - Command-line arguments */
480 {
481   FILE		*fp;		/* Print file */
482   ppd_file_t	*ppd;		/* PPD file */
483   int		i,		/* Looping var */
484 		empty,		/* Is the input empty? */
485 		ch,		/* Current char from file */
486 		lastch,		/* Previous char from file */
487 		attr,		/* Current attribute */
488 		line,		/* Current line */
489   		column,		/* Current column */
490   		page_column;	/* Current page column */
491   int		num_options;	/* Number of print options */
492   cups_option_t	*options;	/* Print options */
493   const char	*val;		/* Option value */
494   char		keyword[64],	/* Keyword string */
495 		*keyptr;	/* Pointer into string */
496   int		keycol;		/* Column where keyword starts */
497   enum	{NLstyl=-1, NoCmnt, SNTXstyl}
498   		cmntState;	/* Inside a comment */
499   enum	{StrBeg=-1, NoStr, StrEnd}
500   		strState;	/* Inside a dbl-quoted string */
501 
502 
503  /*
504   * Make sure status messages are not buffered...
505   */
506 
507   setbuf(stderr, NULL);
508 
509  /*
510   * Check command-line...
511   */
512 
513   if (argc < 6 || argc > 7)
514   {
515     fprintf(stderr, "Usage: %s job-id user title copies options [file]\n",
516             name);
517     return (1);
518   }
519 
520  /*
521   * If we have 7 arguments, print the file named on the command-line.
522   * Otherwise, send stdin instead...
523   */
524 
525   if (argc == 6)
526     fp = stdin;
527   else
528   {
529    /*
530     * Try to open the print file...
531     */
532 
533     if ((fp = fopen(argv[6], "rb")) == NULL)
534     {
535       perror("DEBUG: unable to open print file - ");
536       return (1);
537     }
538   }
539 
540  /*
541   * Process command-line options and write the prolog...
542   */
543 
544   options     = NULL;
545   num_options = cupsParseOptions(argv[5], 0, &options);
546 
547   if ((val = cupsGetOption("prettyprint", num_options, options)) != NULL &&
548       strcasecmp(val, "no") && strcasecmp(val, "off") &&
549       strcasecmp(val, "false"))
550   {
551     PageLeft     = 72.0f;
552     PageRight    = PageWidth - 36.0f;
553     PageBottom   = PageBottom > 36.0f ? PageBottom : 36.0f;
554     PageTop      = PageLength - 36.0f;
555     CharsPerInch = 12;
556     LinesPerInch = 8;
557 
558     if ((val = getenv("CONTENT_TYPE")) == NULL)
559     {
560       PrettyPrint = PRETTY_PLAIN;
561       NumKeywords = 0;
562       Keywords    = NULL;
563     }
564     else if (strcasecmp(val, "application/x-cshell") == 0)
565     {
566       PrettyPrint = PRETTY_SHELL;
567       NumKeywords = sizeof(csh_keywords) / sizeof(csh_keywords[0]);
568       Keywords    = csh_keywords;
569     }
570     else if (strcasecmp(val, "application/x-csource") == 0)
571     {
572       PrettyPrint = PRETTY_CODE;
573       NumKeywords = sizeof(code_keywords) / sizeof(code_keywords[0]);
574       Keywords    = code_keywords;
575     }
576     else if (strcasecmp(val, "application/x-perl") == 0)
577     {
578       PrettyPrint = PRETTY_PERL;
579       NumKeywords = sizeof(perl_keywords) / sizeof(perl_keywords[0]);
580       Keywords    = perl_keywords;
581     }
582     else if (strcasecmp(val, "application/x-shell") == 0)
583     {
584       PrettyPrint = PRETTY_SHELL;
585       NumKeywords = sizeof(sh_keywords) / sizeof(sh_keywords[0]);
586       Keywords    = sh_keywords;
587     }
588     else
589     {
590       PrettyPrint = PRETTY_PLAIN;
591       NumKeywords = 0;
592       Keywords    = NULL;
593     }
594   }
595 
596   ppd = SetCommonOptions(num_options, options, 1);
597 
598   if ((val = cupsGetOption("wrap", num_options, options)) == NULL)
599     WrapLines = 1;
600   else
601     WrapLines = !strcasecmp(val, "true") || !strcasecmp(val, "on") ||
602                 !strcasecmp(val, "yes");
603 
604   if ((val = cupsGetOption("columns", num_options, options)) != NULL)
605   {
606     PageColumns = atoi(val);
607 
608     if (PageColumns < 1)
609     {
610       if (fp != stdin)
611         fclose(fp);
612       fprintf(stderr, "ERROR: Bad columns value %d.\n", PageColumns);
613       return (1);
614     }
615   }
616 
617   if ((val = cupsGetOption("cpi", num_options, options)) != NULL)
618   {
619     CharsPerInch = atof(val);
620 
621     if (CharsPerInch <= 0.0)
622     {
623       if (fp != stdin)
624         fclose(fp);
625       fprintf(stderr, "ERROR: Bad cpi value %f.\n", CharsPerInch);
626       return (1);
627     }
628   }
629 
630   if ((val = cupsGetOption("lpi", num_options, options)) != NULL)
631   {
632     LinesPerInch = atof(val);
633 
634     if (LinesPerInch <= 0.0)
635     {
636       if (fp != stdin)
637         fclose(fp);
638       fprintf(stderr, "ERROR: Bad lpi value %f.", LinesPerInch);
639       return (1);
640     }
641   }
642 
643   if (PrettyPrint)
644     PageTop -= 216.0f / LinesPerInch;
645 
646  /*
647   * Allocate memory for the page...
648   */
649 
650   SizeColumns = (PageRight - PageLeft) / 72.0 * CharsPerInch;
651   SizeLines   = (PageTop - PageBottom) / 72.0 * LinesPerInch;
652 
653  /*
654   * Enforce minimum size...
655   */
656   if (SizeColumns < 1)
657     SizeColumns = 1;
658   if (SizeLines < 1)
659     SizeLines = 1;
660 
661   if (SizeLines >= INT_MAX / SizeColumns / sizeof(lchar_t))
662   {
663     fprintf(stderr, "ERROR: bad page size\n");
664     exit(1);
665   }
666 
667   Page    = calloc(sizeof(lchar_t *), SizeLines);
668   if (!Page)
669   {
670     fprintf(stderr, "ERROR: cannot allocate memory for page\n");
671     exit(1);
672   }
673 
674   Page[0] = calloc(sizeof(lchar_t), SizeColumns * SizeLines);
675   if (!Page[0])
676   {
677     free(Page);
678     fprintf(stderr, "ERROR: cannot allocate memory for page\n");
679     exit(1);
680   }
681 
682   for (i = 1; i < SizeLines; i ++)
683     Page[i] = Page[0] + i * SizeColumns;
684 
685   Copies = atoi(argv[4]);
686 
687  /*
688   * Read text from the specified source and print it...
689   */
690 
691   empty        = 1;
692   lastch       = 0;
693   column       = 0;
694   line         = 0;
695   page_column  = 0;
696   attr         = 0;
697   keyptr       = keyword;
698   keycol       = 0;
699   cmntState     = NoCmnt;
700   strState      = NoStr;
701 
702   while ((ch = getutf8(fp)) >= 0)
703   {
704     if (empty)
705     {
706       /* Found the first valid character, write file header */
707       empty = 0;
708       WriteProlog(argv[3], argv[2], getenv("CLASSIFICATION"),
709 		  cupsGetOption("page-label", num_options, options), ppd);
710     }
711 
712    /*
713     * Control codes:
714     *
715     *   BS	Backspace (0x08)
716     *   HT	Horizontal tab; next 8th column (0x09)
717     *   LF	Line feed; forward full line (0x0a)
718     *   VT	Vertical tab; reverse full line (0x0b)
719     *   FF	Form feed (0x0c)
720     *   CR	Carriage return (0x0d)
721     *   ESC 7	Reverse full line (0x1b 0x37)
722     *   ESC 8	Reverse half line (0x1b 0x38)
723     *   ESC 9	Forward half line (0x1b 0x39)
724     */
725 
726     switch (ch)
727     {
728       case 0x08 :		/* BS - backspace for boldface & underline */
729           if (column > 0)
730             column --;
731 
732           keyptr = keyword;
733 	  keycol = column;
734           break;
735 
736       case 0x09 :		/* HT - tab to next 8th column */
737           if (PrettyPrint && keyptr > keyword)
738 	  {
739 	    *keyptr = '\0';
740 	    keyptr  = keyword;
741 
742 	    if (bsearch(&keyptr, Keywords, NumKeywords, sizeof(char *),
743 	                compare_keywords))
744             {
745 	     /*
746 	      * Put keywords in boldface...
747 	      */
748 
749 	      i = page_column * (ColumnWidth + ColumnGutter);
750 
751 	      while (keycol < column)
752 	      {
753 	        Page[line][keycol + i].attr |= ATTR_BOLD;
754 		keycol ++;
755 	      }
756 	    }
757 	  }
758 
759           column = (column + 8) & ~7;
760 
761           if (column >= ColumnWidth && WrapLines)
762           {			/* Wrap text to margins */
763             line ++;
764             column = 0;
765 
766             if (line >= SizeLines)
767             {
768               page_column ++;
769               line = 0;
770 
771               if (page_column >= PageColumns)
772               {
773                 WritePage();
774 		page_column = 0;
775               }
776             }
777           }
778 
779 	  keycol = column;
780 
781           attr &= ~ATTR_BOLD;
782           break;
783 
784       case 0x0d :		/* CR */
785 #ifndef __APPLE__
786          /*
787 	  * All but MacOS/Darwin treat CR as was intended by ANSI
788 	  * folks, namely to move to column 0/1.  Some programs still
789 	  * use this to do boldfacing and underlining...
790 	  */
791 
792           column = 0;
793           break;
794 #else
795          /*
796 	  * MacOS/Darwin still need to treat CR as a line ending.
797 	  */
798 
799           {
800 	    int nextch;
801             if ((nextch = getc(fp)) != 0x0a)
802 	      ungetc(nextch, fp);
803 	    else
804 	      ch = nextch;
805 	  }
806 #endif /* !__APPLE__ */
807 
808       case 0x0a :		/* LF - output current line */
809           if (PrettyPrint && keyptr > keyword)
810 	  {
811 	    *keyptr = '\0';
812 	    keyptr  = keyword;
813 
814 	    if (bsearch(&keyptr, Keywords, NumKeywords, sizeof(char *),
815 	                compare_keywords))
816             {
817 	     /*
818 	      * Put keywords in boldface...
819 	      */
820 
821 	      i = page_column * (ColumnWidth + ColumnGutter);
822 
823 	      while (keycol < column)
824 	      {
825 	        Page[line][keycol + i].attr |= ATTR_BOLD;
826 		keycol ++;
827 	      }
828 	    }
829 	  }
830 
831           line ++;
832           column = 0;
833 	  keycol = 0;
834 
835 	  if (cmntState == NLstyl)
836 	  	cmntState = NoCmnt;
837 
838           if (!cmntState && !strState)
839 	    attr &= ~(ATTR_ITALIC | ATTR_BOLD | ATTR_RED | ATTR_GREEN | ATTR_BLUE);
840 
841           if (line >= SizeLines)
842           {
843             page_column ++;
844             line = 0;
845 
846             if (page_column >= PageColumns)
847             {
848               WritePage();
849 	      page_column = 0;
850             }
851           }
852           break;
853 
854       case 0x0b :		/* VT - move up 1 line */
855           if (line > 0)
856 	    line --;
857 
858           keyptr = keyword;
859 	  keycol = column;
860 
861 	  if (cmntState == NLstyl)
862 	  	cmntState = NoCmnt;
863 
864           if (!cmntState && !strState)
865 	    attr &= ~(ATTR_ITALIC | ATTR_BOLD | ATTR_RED | ATTR_GREEN | ATTR_BLUE);
866           break;
867 
868       case 0x0c :		/* FF - eject current page... */
869           if (PrettyPrint && keyptr > keyword)
870 	  {
871 	    *keyptr = '\0';
872 	    keyptr  = keyword;
873 
874 	    if (bsearch(&keyptr, Keywords, NumKeywords, sizeof(char *),
875 	                compare_keywords))
876             {
877 	     /*
878 	      * Put keywords in boldface...
879 	      */
880 
881 	      i = page_column * (ColumnWidth + ColumnGutter);
882 
883 	      while (keycol < column)
884 	      {
885 	        Page[line][keycol + i].attr |= ATTR_BOLD;
886 		keycol ++;
887 	      }
888 	    }
889 	  }
890 
891           page_column ++;
892 	  column = 0;
893 	  keycol = 0;
894           line   = 0;
895 
896 	  if (cmntState == NLstyl)
897 	  	cmntState = NoCmnt;
898 
899           if (!cmntState && !strState)
900 	    attr &= ~(ATTR_ITALIC | ATTR_BOLD | ATTR_RED | ATTR_GREEN | ATTR_BLUE);
901 
902           if (page_column >= PageColumns)
903           {
904             WritePage();
905             page_column = 0;
906           }
907           break;
908 
909       case 0x1b :		/* Escape sequence */
910           ch = getutf8(fp);
911 	  if (ch == '7')
912 	  {
913 	   /*
914 	    * ESC 7	Reverse full line (0x1b 0x37)
915 	    */
916 
917             if (line > 0)
918 	      line --;
919 	  }
920 	  else if (ch == '8')
921 	  {
922            /*
923 	    *   ESC 8	Reverse half line (0x1b 0x38)
924 	    */
925 
926             if ((attr & ATTR_RAISED) && line > 0)
927 	    {
928 	      attr &= ~ATTR_RAISED;
929               line --;
930 	    }
931 	    else if (attr & ATTR_LOWERED)
932 	      attr &= ~ATTR_LOWERED;
933 	    else
934 	      attr |= ATTR_RAISED;
935 	  }
936 	  else if (ch == '9')
937 	  {
938            /*
939 	    *   ESC 9	Forward half line (0x1b 0x39)
940 	    */
941 
942             if ((attr & ATTR_LOWERED) && line < (SizeLines - 1))
943 	    {
944 	      attr &= ~ATTR_LOWERED;
945               line ++;
946 	    }
947 	    else if (attr & ATTR_RAISED)
948 	      attr &= ~ATTR_RAISED;
949 	    else
950 	      attr |= ATTR_LOWERED;
951 	  }
952 	  break;
953 
954       default :			/* All others... */
955           if (ch < ' ')
956             break;		/* Ignore other control chars */
957 
958           if (PrettyPrint > PRETTY_PLAIN)
959 	  {
960 	   /*
961 	    * Do highlighting of C/C++ keywords, preprocessor commands,
962 	    * and comments...
963 	    */
964 
965 	    if (ch == ' ' && (attr & ATTR_BOLD))
966 	    {
967 	     /*
968 	      * Stop bolding preprocessor command...
969 	      */
970 
971 	      attr &= ~ATTR_BOLD;
972 	    }
973 	    else if (!(isalnum(ch & 255) || ch == '_') && keyptr > keyword)
974 	    {
975 	     /*
976 	      * Look for a keyword...
977 	      */
978 
979 	      *keyptr = '\0';
980 	      keyptr  = keyword;
981 
982 	      if (bsearch(&keyptr, Keywords, NumKeywords, sizeof(char *),
983 	                  compare_keywords))
984               {
985 	       /*
986 	        * Put keywords in boldface...
987 		*/
988 
989 	        i = page_column * (ColumnWidth + ColumnGutter);
990 
991 		while (keycol < column)
992 		{
993 	          Page[line][keycol + i].attr |= ATTR_BOLD;
994 		  keycol ++;
995 		}
996 	      }
997 	    }
998 
999 	   /*
1000 	    * Look for Syntax-transition Starts...
1001 	    */
1002 	    if (!cmntState && !strState)
1003 	    {
1004 	      if ((isalnum(ch & 255) || ch == '_'))
1005 	      {
1006 	       /*
1007 	        * Add characters to the current keyword (if they'll fit).
1008 	        */
1009 
1010 	        if (keyptr == keyword)
1011 	          keycol = column;
1012 
1013 	        if (keyptr < (keyword + sizeof(keyword) - 1))
1014 	          *keyptr++ = ch;
1015 	      }
1016 	      else if (ch == '\"' && lastch != '\\')
1017 	      {
1018 	       /*
1019 	        * Start a dbl-quote string constant...
1020 	        */
1021 
1022 	        strState = StrBeg;
1023 		attr    = ATTR_BLUE;
1024 	      }
1025 	      else if (ch == '*' && lastch == '/' &&
1026 	               PrettyPrint != PRETTY_SHELL)
1027 	      {
1028 	       /*
1029 	        * Start a C-style comment...
1030 	        */
1031 
1032 	        cmntState = SNTXstyl;
1033 	        attr     = ATTR_ITALIC | ATTR_GREEN;
1034 	      }
1035 	      else if (ch == '/' && lastch == '/' &&
1036 	               PrettyPrint == PRETTY_CODE)
1037 	      {
1038 	       /*
1039 	        * Start a C++-style comment...
1040 	        */
1041 
1042 	        cmntState = NLstyl;
1043 	        attr = ATTR_ITALIC | ATTR_GREEN;
1044 	      }
1045 	      else if (ch == '#' && PrettyPrint != PRETTY_CODE)
1046 	      {
1047 	       /*
1048 	        * Start a shell-style comment...
1049 	        */
1050 
1051 	        cmntState = NLstyl;
1052 	        attr = ATTR_ITALIC | ATTR_GREEN;
1053 	      }
1054 	      else if (ch == '#' && column == 0 &&
1055 	               PrettyPrint == PRETTY_CODE)
1056 	      {
1057 	       /*
1058 	        * Start a preprocessor command...
1059 	        */
1060 
1061 	        attr = ATTR_BOLD | ATTR_RED;
1062 	      }
1063 	    }
1064           }
1065 
1066           if (column >= ColumnWidth && WrapLines)
1067           {			/* Wrap text to margins */
1068             column = 0;
1069 	    line ++;
1070 
1071             if (line >= SizeLines)
1072             {
1073               page_column ++;
1074               line = 0;
1075 
1076               if (page_column >= PageColumns)
1077               {
1078         	WritePage();
1079         	page_column = 0;
1080               }
1081             }
1082           }
1083 
1084          /*
1085 	  * Add text to the current column & line...
1086 	  */
1087 
1088           if (column < ColumnWidth)
1089 	  {
1090 	    i = column + page_column * (ColumnWidth + ColumnGutter);
1091 
1092             if (PrettyPrint)
1093               Page[line][i].attr = attr;
1094 
1095 	    if (ch == ' ' && Page[line][i].ch)
1096 	      ch = Page[line][i].ch;
1097             else if (ch == Page[line][i].ch)
1098               Page[line][i].attr |= ATTR_BOLD;
1099             else if (Page[line][i].ch == '_')
1100               Page[line][i].attr |= ATTR_UNDERLINE;
1101             else if (ch == '_')
1102 	    {
1103               Page[line][i].attr |= ATTR_UNDERLINE;
1104 
1105               if (Page[line][i].ch)
1106 	        ch = Page[line][i].ch;
1107 	    }
1108 	    else
1109               Page[line][i].attr = attr;
1110 
1111             Page[line][i].ch = ch;
1112 	  }
1113 
1114           if (PrettyPrint)
1115 	  {
1116 	    if ((ch == '{' || ch == '}') && !cmntState && !strState &&
1117 	        column < ColumnWidth)
1118 	    {
1119 	     /*
1120 	      * Highlight curley braces...
1121 	      */
1122 
1123 	      Page[line][column].attr |= ATTR_BOLD;
1124 	    }
1125 	    else if ((ch == '/' || ch == '*') && lastch == '/' &&
1126 	             column < ColumnWidth && PrettyPrint != PRETTY_SHELL)
1127 	    {
1128 	     /*
1129 	      * Highlight first comment character...
1130 	      */
1131 
1132 	      Page[line][column - 1].attr = attr;
1133 	    }
1134 	    else if (ch == '\"' && lastch != '\\' && !cmntState && strState == StrEnd)
1135 	    {
1136 	     /*
1137 	      * End a dbl-quote string constant...
1138 	      */
1139 
1140 	      strState = NoStr;
1141 	      attr    &= ~ATTR_BLUE;
1142             }
1143 	    else if (ch == '/' && lastch == '*' && cmntState)
1144 	    {
1145 	     /*
1146 	      * End a C-style comment...
1147 	      */
1148 
1149 	      cmntState = NoCmnt;
1150 	      attr     &= ~(ATTR_ITALIC | ATTR_GREEN);
1151 	    }
1152 
1153             if (strState == StrBeg)
1154 	      strState = StrEnd;
1155 	  }
1156 
1157           column ++;
1158           break;
1159     }
1160 
1161    /*
1162     * Save this character for the next cycle.
1163     */
1164 
1165     lastch = ch;
1166   }
1167 
1168   /* Do not write anything if the input file is empty */
1169   if (empty)
1170   {
1171     fprintf(stderr, "DEBUG: Input is empty, outputting empty file.\n");
1172     if (fp != stdin)
1173       fclose(fp);
1174     return 0;
1175   }
1176 
1177  /*
1178   * Write any remaining page data...
1179   */
1180 
1181   if (line > 0 || page_column > 0 || column > 0)
1182     WritePage();
1183 
1184  /*
1185   * Write the epilog and return...
1186   */
1187 
1188   WriteEpilogue();
1189 
1190   if (fp != stdin)
1191     fclose(fp);
1192 
1193   if (ppd != NULL)
1194     ppdClose(ppd);
1195 
1196   free(Page[0]);
1197   free(Page);
1198   return (0);
1199 }
1200 
1201 
1202 /*
1203  * 'compare_keywords()' - Compare two C/C++ keywords.
1204  */
1205 
1206 static int				/* O - Result of strcmp */
compare_keywords(const void * k1,const void * k2)1207 compare_keywords(const void *k1,	/* I - First keyword */
1208                  const void *k2)	/* I - Second keyword */
1209 {
1210   return (strcmp(*((const char **)k1), *((const char **)k2)));
1211 }
1212 
1213 
1214 /*
1215  * 'getutf8()' - Get a UTF-8 encoded wide character...
1216  */
1217 
1218 static int		/* O - Character or -1 on error */
getutf8(FILE * fp)1219 getutf8(FILE *fp)	/* I - File to read from */
1220 {
1221   int	ch;		/* Current character value */
1222   int	next;		/* Next character from file */
1223 
1224 
1225  /*
1226   * Read the first character and process things accordingly...
1227   *
1228   * UTF-8 maps 16-bit characters to:
1229   *
1230   *        0 to 127 = 0xxxxxxx
1231   *     128 to 2047 = 110xxxxx 10yyyyyy (xxxxxyyyyyy)
1232   *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz (xxxxyyyyyyzzzzzz)
1233   *
1234   * We also accept:
1235   *
1236   *      128 to 191 = 10xxxxxx
1237   *
1238   * since this range of values is otherwise undefined unless you are
1239   * in the middle of a multi-byte character...
1240   *
1241   * This code currently does not support anything beyond 16-bit
1242   * characters, in part because PostScript doesn't support more than
1243   * 16-bit characters...
1244   */
1245 
1246   if ((ch = getc(fp)) == EOF)
1247     return (EOF);
1248 
1249   if (ch < 0xc0)			/* One byte character? */
1250     return (ch);
1251   else if ((ch & 0xe0) == 0xc0)
1252   {
1253    /*
1254     * Two byte character...
1255     */
1256 
1257     if ((next = getc(fp)) == EOF)
1258       return (EOF);
1259     else
1260       return (((ch & 0x1f) << 6) | (next & 0x3f));
1261   }
1262   else if ((ch & 0xf0) == 0xe0)
1263   {
1264    /*
1265     * Three byte character...
1266     */
1267 
1268     if ((next = getc(fp)) == EOF)
1269       return (EOF);
1270 
1271     ch = ((ch & 0x0f) << 6) | (next & 0x3f);
1272 
1273     if ((next = getc(fp)) == EOF)
1274       return (EOF);
1275     else
1276       return ((ch << 6) | (next & 0x3f));
1277   }
1278   else
1279   {
1280    /*
1281     * More than three bytes...  We don't support that...
1282     */
1283 
1284     return (EOF);
1285   }
1286 }
1287 
1288