1 /*
2 * unpipe - takes output of pipe and convert to HTML/XML form
3 *
4 * Copyright © 1994-2000 World Wide Web Consortium
5 * See http://www.w3.org/Consortium/Legal/copyright-software
6 *
7 * Author: Bert Bos <bert@w3.org>
8 * Created: 23 May 1999
9 * Version: $Id: hxunpipe.c,v 1.11 2017/11/24 09:50:25 bbos Exp $
10 */
11 #include "config.h"
12 #include <stdio.h>
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdbool.h>
16 #ifdef HAVE_UNISTD_H
17 # include <unistd.h>
18 #endif
19 #include <assert.h>
20 #ifdef HAVE_STRING_H
21 # include <string.h>
22 #elif HAVE_STRINGS_H
23 # include <strings.h>
24 #endif
25 #include "export.h"
26 #include "types.e"
27 #include "heap.e"
28 #include "errexit.e"
29 #include "dict.e"
30 #include "openurl.e"
31
32 static int nrattrs = 0;
33 static char **attrs = NULL;
34 static bool escape = false;
35
36 /* put_text -- replace newlines and print text */
put_text(FILE * in)37 static void put_text(FILE *in)
38 {
39 int c, c1, c2;
40
41 while ((c = getc(in)) != EOF && c != '\n')
42 if (c != '\\') {
43 if (!escape) putchar(c);
44 else if (c == '<') printf("<");
45 else if (c == '>') printf(">");
46 else if (c == '"') printf(""");
47 else if (c == '&') printf("&");
48 else if (c == '\'') printf("'");
49 else putchar(c);
50 }
51 else if ((c = getc(in)) == EOF) return; /* Error */
52 else if (c == '\n') return; /* Error */
53 else if (c == 'n') putchar('\n');
54 else if (c == 'r') putchar('\r');
55 else if (c == 't') putchar('\t');
56 else if (c == '#') printf("&#");
57 else if (c < '0' || '7' < c) putchar(c);
58 else if ((c1 = getc(in)) < '0' || '7' < c1) printf("%c%c", c, c1);
59 else if ((c2 = getc(in)) < '0' || '7' < c2) printf("%c%c%c", c, c1, c2);
60 else printf("&#%d;", 64 * (c - '0') + 8 * (c1 - '0') + (c2 - '0'));
61 }
62
63 /* store_attr -- store attributes temporarily */
store_attr(FILE * in)64 static void store_attr(FILE *in)
65 {
66 # define INC 1014
67 int c, n = 0;
68
69 renewarray(attrs, nrattrs + 1);
70 attrs[nrattrs] = NULL;
71 renewarray(attrs[nrattrs], INC);
72 while ((c = getc(in)) != EOF && c != '\n') {
73 attrs[nrattrs][n++] = c;
74 if (n % INC == 0) {renewarray(attrs[nrattrs], INC * (n/INC + 1));}
75 }
76 attrs[nrattrs][n] = '\0';
77 nrattrs++;
78 }
79
80 /* put_attr -- write out attributes */
put_attr(void)81 static void put_attr(void)
82 {
83 int i, j;
84
85 for (j = 0; j < nrattrs; j++) {
86 for (i = 0; attrs[j][i] && attrs[j][i] != ' '; i++);
87 if (attrs[j][i] != ' ') errexit("Incorrect A (attribute) line\n");
88 if (! eq(attrs[j] + i + 1, "IMPLIED")) {
89 putchar(' ');
90 for (i = 0; attrs[j][i] && attrs[j][i] != ' '; i++) putchar(attrs[j][i]);
91 if (attrs[j][i] != ' ') errexit("Incorrect A (attribute) line\n");
92 putchar('=');
93 for (i++; attrs[j][i] && attrs[j][i] != ' '; i++) ; /* skip type */
94 if (attrs[j][i] != ' ') errexit("Incorrect A (attribute) line\n");
95 putchar('"');
96 for (i++; attrs[j][i]; i++) {
97 if (attrs[j][i] != '\\') putchar(attrs[j][i]);
98 else if (attrs[j][i+1]) {
99 i++;
100 if (attrs[j][i] == 'n') putchar('\n');
101 else if (attrs[j][i] == 'r') putchar('\r');
102 else if (attrs[j][i] == 't') putchar('\t');
103 else if (attrs[j][i] == '#') printf("&#");
104 else if ('0' <= attrs[j][i] && attrs[j][i] <= '7' &&
105 '0' <= attrs[j][i+1] && attrs[j][i+1] <= '7' &&
106 '0' <= attrs[j][i+2] && attrs[j][i+2] <= '7') {
107 printf("&#%d;", 64 * (attrs[j][i] - '0') +
108 8 * (attrs[j][i+1] - '0') + (attrs[j][i+2] - '0'));
109 i += 2;
110 } else putchar(attrs[j][i]);
111 }
112 }
113 putchar('"');
114 }
115 dispose(attrs[j]);
116 }
117 nrattrs = 0;
118 }
119
120 /* put_decl -- write a DOCTYPE declaration */
put_decl(FILE * in)121 static void put_decl(FILE *in)
122 {
123 int c;
124 bool hasfpi = false;
125
126 printf("<!DOCTYPE ");
127
128 /* Write name of root element */
129 while ((c = getc(in)) != EOF && c != '\n' && c != ' ') putchar(c);
130
131 /* Write FPI if present */
132 while (c == ' ') c = getc(in);
133 if (c == '"') {
134 if ((c = getc(in)) == EOF || c == '\n')
135 errexit("Incorrect DOCTYPE declaration\n");
136 if (c != '"') {
137 hasfpi = true;
138 printf(" PUBLIC \"%c", c);
139 while ((c = getc(in)) != EOF && c != '\n' && c != '"') putchar(c);
140 if (c != '"') errexit("Incorrect DOCTYPE declaration\n");
141 putchar('"');
142 }
143 c = getc(in);
144 }
145
146 /* Write URL if present */
147 while (c == ' ') c = getc(in);
148 if (c != EOF && c != '\n') {
149 if (hasfpi) printf(" \"%c", c); else printf(" SYSTEM \"%c", c);
150 while ((c = getc(in)) != EOF && c != '\n') putchar(c);
151 putchar('"');
152 }
153
154 putchar('>');
155 }
156
157 /* usage -- print usage message and exit */
usage(string prog)158 static void usage(string prog)
159 {
160 fprintf(stderr, "Version %s\nUsage: %s [file_or_url]\n", VERSION, prog);
161 exit(1);
162 }
163
main(int argc,char * argv[])164 int main(int argc, char *argv[])
165 {
166 int c, status = 200;
167 FILE *in = NULL;
168 bool empty = false;
169
170 while ((c = getopt(argc, argv, "b")) != -1)
171 switch (c) {
172 case 'b': escape = true; break;
173 default: usage(argv[0]);
174 }
175 if (optind == argc) in = stdin;
176 else if (optind == argc - 1) in = fopenurl(argv[optind], "r", &status);
177 else usage(argv[0]);
178
179 if (in == NULL) { perror(argv[optind]); exit(2); }
180 if (status != 200) errexit("%s : %s\n", argv[optind], http_strerror(status));
181
182 while ((c = getc(in)) != EOF) {
183 switch (c) {
184 case '-': put_text(in); break;
185 case '?': printf("<?"); put_text(in); printf(">"); break;
186 case '_': case '*': printf("<!--"); put_text(in); printf("-->"); break;
187 case 'L': break;
188 case 'A': store_attr(in); break;
189 case '(': putchar('<'); put_text(in); put_attr(); putchar('>'); break;
190 case ')':
191 if (!empty) {printf("</"); put_text(in); putchar('>');}
192 else empty = false;
193 break;
194 case '|': putchar('<'); put_text(in); put_attr(); printf(" />"); break;
195 case '!': put_decl(in); break;
196 case 'e': empty = true; break; /* Generated by onsgmls */
197 case 'i': case 'o': break; /* Generated by onsgmls */
198 case 'C': break;
199 }
200 }
201 if (! feof(in)) { perror(argv[0]); exit(1); }
202 fclose(in);
203 return 0;
204 }
205