1 /*
2 * hxnsxml - convert output of hxxmlns back to normal XML
3 *
4 * To do: handle quotes in Namespace URLs.
5 * To do: handle XML's own Namespace.
6 *
7 * Part of HTML-XML-utils, see:
8 * http://www.w3.org/Tools/HTML-XML-utils/
9 *
10 * Copyright © 1994-2010 World Wide Web Consortium
11 * See http://www.w3.org/Consortium/Legal/copyright-software
12 *
13 * Author: Bert Bos
14 * Created: 12 July 2010
15 *
16 **/
17 #include "config.h"
18 #include <stdio.h>
19 #ifdef HAVE_UNISTD_H
20 # include <unistd.h>
21 #endif
22 #include <ctype.h>
23 #if STDC_HEADERS
24 # include <string.h>
25 #else
26 # ifndef HAVE_STRCHR
27 # define strchr index
28 # define strrchr rindex
29 # endif
30 #endif
31 #include <stdlib.h>
32 #include <assert.h>
33 #include <stdbool.h>
34 #include "export.h"
35 #include "types.e"
36 #include "html.e"
37 #include "scan.e"
38 #include "dict.e"
39 #include "openurl.e"
40 #include "errexit.e"
41
42 #define XML "{http://www.w3.org/XML/1998/namespace}"
43
44 static bool has_error = false;
45 static bool has_ns = false; /* true if Namespaces occur anywhere in document */
46
47
48 /* --------------- implements interface api.h -------------------------- */
49
50 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)51 void handle_error(void *clientdata, const string s, int lineno)
52 {
53 fprintf(stderr, "%d: %s\n", lineno, s);
54 has_error = true;
55 }
56
57 /* start -- called before the first event is reported */
start(void)58 void* start(void)
59 {
60 return NULL;
61 }
62
63 /* end -- called after the last event is reported */
end(void * clientdata)64 void end(void *clientdata)
65 {
66 /* skip */
67 }
68
69 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)70 void handle_comment(void *clientdata, string commenttext)
71 {
72 printf("<!--%s-->", commenttext);
73 }
74
75 /* handle_text -- called after a text chunk is parsed */
handle_text(void * clientdata,string text)76 void handle_text(void *clientdata, string text)
77 {
78 printf("%s", text);
79 }
80
81 /* handle_decl -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)82 void handle_decl(void *clientdata, string gi, string fpi,
83 string url)
84 {
85 printf("<!DOCTYPE %s", gi);
86 if (fpi) printf(" PUBLIC \"%s\"", fpi);
87 if (url) printf(" %s\"%s\"", fpi ? "" : "SYSTEM ", url);
88 printf(">\n");
89 }
90
91 /* handle_pi -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)92 void handle_pi(void *clientdata, string pi_text)
93 {
94 printf("<?%s>", pi_text);
95 }
96
97 /* print_attrs -- print attributes */
print_attrs(const pairlist attribs)98 void print_attrs(const pairlist attribs)
99 {
100 pairlist p;
101 int i, j;
102 char c = 'a';
103
104 for (p = attribs; p; p = p->next) {
105
106 if (p->name[0] != '{') {
107 i = 0;
108 } else {
109 for (i = 1; p->name[i] && p->name[i] != '}'; i++);
110 if (p->name[i]) i++;
111 }
112 if (i > 2) {
113 if (c > 'z') {
114 fprintf(stderr, "Bug: hxnsxml cannot handle > 26 namespaces per element.\n");
115 exit(2);
116 }
117 printf(" xmlns:%c=\"", c);
118 for (j = 1; j < i - 1; j++) putchar(p->name[j]);
119 putchar('\"');
120 printf(" %c:", c);
121 c++;
122 } else {
123 printf(" ");
124 }
125 printf("%s=\"%s\"", p->name + i, p->value);
126 }
127 }
128
129 /* print_tag -- print "<" and the element name, optionally with a namespace */
print_tag(const conststring name)130 static void print_tag(const conststring name)
131 {
132 int i, j;
133
134 if (name[0] != '{') {
135 i = 0;
136 } else {
137 for (i = 1; name[i] && name[i] != '}'; i++);
138 if (name[i]) i++;
139 }
140 printf("<%s", name + i);
141 if (i > 2) { /* Element has a Namespace */
142 printf(" xmlns=\"");
143 for (j = 1; j < i - 1; j++) putchar(name[j]);
144 putchar('"');
145 has_ns = true;
146 } else if (has_ns) { /* Document has Namespaces, this element not */
147 printf(" xmlns=\"\"");
148 }
149 }
150
151 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)152 void handle_starttag(void *clientdata, string name, pairlist attribs)
153 {
154 print_tag(name);
155 print_attrs(attribs);
156 putchar('>');
157 }
158
159 /* handle_emptytag -- called after an empty tag is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)160 void handle_emptytag(void *clientdata, string name, pairlist attribs)
161 {
162 print_tag(name);
163 print_attrs(attribs);
164 printf(" />");
165 }
166
167 /* handle_endtag -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)168 void handle_endtag(void *clientdata, string name)
169 {
170 int i;
171
172 if (name[0] != '{') {
173 i = 0;
174 } else {
175 for (i = 1; name[i] && name[i] != '}'; i++);
176 if (name[i]) i++;
177 }
178 printf("</%s>", name + i);
179 }
180
181 /* --------------------------------------------------------------------- */
182
183 /* usage -- print usage message and exit */
usage(string prog)184 static void usage(string prog)
185 {
186 fprintf(stderr, "Version %s\nUsage: %s [file-or-url]\n", VERSION, prog);
187 exit(2);
188 }
189
main(int argc,char * argv[])190 int main(int argc, char *argv[])
191 {
192 int status = 200;
193
194 /* Bind the parser callback routines to our handlers */
195 set_error_handler(handle_error);
196 set_start_handler(start);
197 set_end_handler(end);
198 set_comment_handler(handle_comment);
199 set_text_handler(handle_text);
200 set_decl_handler(handle_decl);
201 set_pi_handler(handle_pi);
202 set_starttag_handler(handle_starttag);
203 set_emptytag_handler(handle_emptytag);
204 set_endtag_handler(handle_endtag);
205
206 if (argc > 2) usage(argv[0]);
207 else if (argc == 2) yyin = fopenurl(argv[1], "r", &status);
208 else yyin = stdin;
209
210 if (!yyin) {perror(argv[1]); exit(1);}
211 if (status != 200) errexit("%s : %s\n", argv[1], http_strerror(status));
212
213 if (yyparse() != 0) exit(3);
214
215 return has_error ? 1 : 0;
216 }
217