1 /*
2  * unpipe - takes output of pipe and convert to HTML/XML form
3  *
4  * Copyright © 1994-2000 World Wide Web Consortium
5  * See http://www.w3.org/Consortium/Legal/copyright-software
6  *
7  * Author: Bert Bos <bert@w3.org>
8  * Created: 23 May 1999
9  * Version: $Id: hxunpipe.c,v 1.11 2017/11/24 09:50:25 bbos Exp $
10  */
11 #include "config.h"
12 #include <stdio.h>
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdbool.h>
16 #ifdef HAVE_UNISTD_H
17 #  include <unistd.h>
18 #endif
19 #include <assert.h>
20 #ifdef HAVE_STRING_H
21 #  include <string.h>
22 #elif HAVE_STRINGS_H
23 #  include <strings.h>
24 #endif
25 #include "export.h"
26 #include "types.e"
27 #include "heap.e"
28 #include "errexit.e"
29 #include "dict.e"
30 #include "openurl.e"
31 
32 static int nrattrs = 0;
33 static char **attrs = NULL;
34 static bool escape = false;
35 
36 /* put_text -- replace newlines and print text */
put_text(FILE * in)37 static void put_text(FILE *in)
38 {
39   int c, c1, c2;
40 
41   while ((c = getc(in)) != EOF && c != '\n')
42     if (c != '\\') {
43       if (!escape) putchar(c);
44       else if (c == '<') printf("&lt;");
45       else if (c == '>') printf("&gt;");
46       else if (c == '"') printf("&quot;");
47       else if (c == '&') printf("&amp;");
48       else if (c == '\'') printf("&apos;");
49       else putchar(c);
50     }
51     else if ((c = getc(in)) == EOF) return; /* Error */
52     else if (c == '\n') return;		    /* Error */
53     else if (c == 'n') putchar('\n');
54     else if (c == 'r') putchar('\r');
55     else if (c == 't') putchar('\t');
56     else if (c == '#') printf("&#");
57     else if (c < '0' || '7' < c) putchar(c);
58     else if ((c1 = getc(in)) < '0' || '7' < c1) printf("%c%c", c, c1);
59     else if ((c2 = getc(in)) < '0' || '7' < c2) printf("%c%c%c", c, c1, c2);
60     else printf("&#%d;", 64 * (c - '0') + 8 * (c1 - '0') + (c2 - '0'));
61 }
62 
63 /* store_attr -- store attributes temporarily */
store_attr(FILE * in)64 static void store_attr(FILE *in)
65 {
66 # define INC 1014
67   int c, n = 0;
68 
69   renewarray(attrs, nrattrs + 1);
70   attrs[nrattrs] = NULL;
71   renewarray(attrs[nrattrs], INC);
72   while ((c = getc(in)) != EOF && c != '\n') {
73     attrs[nrattrs][n++] = c;
74     if (n % INC == 0) {renewarray(attrs[nrattrs], INC * (n/INC + 1));}
75   }
76   attrs[nrattrs][n] = '\0';
77   nrattrs++;
78 }
79 
80 /* put_attr -- write out attributes */
put_attr(void)81 static void put_attr(void)
82 {
83   int i, j;
84 
85   for (j = 0; j < nrattrs; j++) {
86     for (i = 0; attrs[j][i] && attrs[j][i] != ' '; i++);
87     if (attrs[j][i] != ' ') errexit("Incorrect A (attribute) line\n");
88     if (! eq(attrs[j] + i + 1, "IMPLIED")) {
89       putchar(' ');
90       for (i = 0; attrs[j][i] && attrs[j][i] != ' '; i++) putchar(attrs[j][i]);
91       if (attrs[j][i] != ' ') errexit("Incorrect A (attribute) line\n");
92       putchar('=');
93       for (i++; attrs[j][i] && attrs[j][i] != ' '; i++) ; /* skip type */
94       if (attrs[j][i] != ' ') errexit("Incorrect A (attribute) line\n");
95       putchar('"');
96       for (i++; attrs[j][i]; i++) {
97 	if (attrs[j][i] != '\\') putchar(attrs[j][i]);
98 	else if (attrs[j][i+1]) {
99 	  i++;
100 	  if (attrs[j][i] == 'n') putchar('\n');
101 	  else if (attrs[j][i] == 'r') putchar('\r');
102 	  else if (attrs[j][i] == 't') putchar('\t');
103 	  else if (attrs[j][i] == '#') printf("&#");
104 	  else if ('0' <= attrs[j][i] && attrs[j][i] <= '7' &&
105 		   '0' <= attrs[j][i+1] && attrs[j][i+1] <= '7' &&
106 		   '0' <= attrs[j][i+2] && attrs[j][i+2] <= '7') {
107 	    printf("&#%d;", 64 * (attrs[j][i] - '0') +
108 		   8 * (attrs[j][i+1] - '0') + (attrs[j][i+2] - '0'));
109 	    i += 2;
110 	  } else putchar(attrs[j][i]);
111 	}
112       }
113       putchar('"');
114     }
115     dispose(attrs[j]);
116   }
117   nrattrs = 0;
118 }
119 
120 /* put_decl -- write a DOCTYPE declaration */
put_decl(FILE * in)121 static void put_decl(FILE *in)
122 {
123   int c;
124   bool hasfpi = false;
125 
126   printf("<!DOCTYPE ");
127 
128   /* Write name of root element */
129   while ((c = getc(in)) != EOF && c != '\n' && c != ' ') putchar(c);
130 
131   /* Write FPI if present */
132   while (c == ' ') c = getc(in);
133   if (c == '"') {
134     if ((c = getc(in)) == EOF || c == '\n')
135       errexit("Incorrect DOCTYPE declaration\n");
136     if (c != '"') {
137       hasfpi = true;
138       printf(" PUBLIC \"%c", c);
139       while ((c = getc(in)) != EOF && c != '\n' && c != '"') putchar(c);
140       if (c != '"') errexit("Incorrect DOCTYPE declaration\n");
141       putchar('"');
142     }
143     c = getc(in);
144   }
145 
146   /* Write URL if present */
147   while (c == ' ') c = getc(in);
148   if (c != EOF && c != '\n') {
149     if (hasfpi) printf(" \"%c", c); else printf(" SYSTEM \"%c", c);
150     while ((c = getc(in)) != EOF && c != '\n') putchar(c);
151     putchar('"');
152   }
153 
154   putchar('>');
155 }
156 
157 /* usage -- print usage message and exit */
usage(string prog)158 static void usage(string prog)
159 {
160   fprintf(stderr, "Version %s\nUsage: %s [file_or_url]\n", VERSION, prog);
161   exit(1);
162 }
163 
main(int argc,char * argv[])164 int main(int argc, char *argv[])
165 {
166   int c, status = 200;
167   FILE *in = NULL;
168   bool empty = false;
169 
170   while ((c = getopt(argc, argv, "b")) != -1)
171     switch (c) {
172       case 'b': escape = true; break;
173       default: usage(argv[0]);
174     }
175   if (optind == argc) in = stdin;
176   else if (optind == argc - 1) in = fopenurl(argv[optind], "r", &status);
177   else usage(argv[0]);
178 
179   if (in == NULL) { perror(argv[optind]); exit(2); }
180   if (status != 200) errexit("%s : %s\n", argv[optind], http_strerror(status));
181 
182   while ((c = getc(in)) != EOF) {
183     switch (c) {
184     case '-': put_text(in); break;
185     case '?': printf("<?"); put_text(in); printf(">"); break;
186     case '_': case '*': printf("<!--"); put_text(in); printf("-->"); break;
187     case 'L': break;
188     case 'A': store_attr(in); break;
189     case '(': putchar('<'); put_text(in); put_attr(); putchar('>'); break;
190     case ')':
191       if (!empty) {printf("</"); put_text(in); putchar('>');}
192       else empty = false;
193       break;
194     case '|': putchar('<'); put_text(in); put_attr(); printf(" />"); break;
195     case '!': put_decl(in); break;
196     case 'e': empty = true; break; /* Generated by onsgmls */
197     case 'i': case 'o': break;	   /* Generated by onsgmls */
198     case 'C': break;
199     }
200   }
201   if (! feof(in)) { perror(argv[0]); exit(1); }
202   fclose(in);
203   return 0;
204 }
205