1 /*
2  * hxnsxml - convert output of hxxmlns back to normal XML
3  *
4  * To do: handle quotes in Namespace URLs.
5  * To do: handle XML's own Namespace.
6  *
7  * Part of HTML-XML-utils, see:
8  * http://www.w3.org/Tools/HTML-XML-utils/
9  *
10  * Copyright © 1994-2010 World Wide Web Consortium
11  * See http://www.w3.org/Consortium/Legal/copyright-software
12  *
13  * Author: Bert Bos
14  * Created: 12 July 2010
15  *
16  **/
17 #include "config.h"
18 #include <stdio.h>
19 #ifdef HAVE_UNISTD_H
20 # include <unistd.h>
21 #endif
22 #include <ctype.h>
23 #if STDC_HEADERS
24 # include <string.h>
25 #else
26 # ifndef HAVE_STRCHR
27 #  define strchr index
28 #  define strrchr rindex
29 # endif
30 #endif
31 #include <stdlib.h>
32 #include <assert.h>
33 #include <stdbool.h>
34 #include "export.h"
35 #include "types.e"
36 #include "html.e"
37 #include "scan.e"
38 #include "dict.e"
39 #include "openurl.e"
40 #include "errexit.e"
41 
42 #define XML "{http://www.w3.org/XML/1998/namespace}"
43 
44 static bool has_error = false;
45 static bool has_ns = false;	/* true if Namespaces occur anywhere in document */
46 
47 
48 /* --------------- implements interface api.h -------------------------- */
49 
50 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)51 void handle_error(void *clientdata, const string s, int lineno)
52 {
53   fprintf(stderr, "%d: %s\n", lineno, s);
54   has_error = true;
55 }
56 
57 /* start -- called before the first event is reported */
start(void)58 void* start(void)
59 {
60   return NULL;
61 }
62 
63 /* end -- called after the last event is reported */
end(void * clientdata)64 void end(void *clientdata)
65 {
66   /* skip */
67 }
68 
69 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)70 void handle_comment(void *clientdata, string commenttext)
71 {
72   printf("<!--%s-->", commenttext);
73 }
74 
75 /* handle_text -- called after a text chunk is parsed */
handle_text(void * clientdata,string text)76 void handle_text(void *clientdata, string text)
77 {
78   printf("%s", text);
79 }
80 
81 /* handle_decl -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)82 void handle_decl(void *clientdata, string gi, string fpi,
83 		 string url)
84 {
85   printf("<!DOCTYPE %s", gi);
86   if (fpi) printf(" PUBLIC \"%s\"", fpi);
87   if (url) printf(" %s\"%s\"", fpi ? "" : "SYSTEM ", url);
88   printf(">\n");
89 }
90 
91 /* handle_pi -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)92 void handle_pi(void *clientdata, string pi_text)
93 {
94   printf("<?%s>", pi_text);
95 }
96 
97 /* print_attrs -- print attributes */
print_attrs(const pairlist attribs)98 void print_attrs(const pairlist attribs)
99 {
100   pairlist p;
101   int i, j;
102   char c = 'a';
103 
104   for (p = attribs; p; p = p->next) {
105 
106     if (p->name[0] != '{') {
107       i = 0;
108     } else {
109       for (i = 1; p->name[i] && p->name[i] != '}'; i++);
110       if (p->name[i]) i++;
111     }
112     if (i > 2) {
113       if (c > 'z') {
114 	fprintf(stderr, "Bug: hxnsxml cannot handle > 26 namespaces per element.\n");
115 	exit(2);
116       }
117       printf(" xmlns:%c=\"", c);
118       for (j = 1; j < i - 1; j++) putchar(p->name[j]);
119       putchar('\"');
120       printf(" %c:", c);
121       c++;
122     } else {
123       printf(" ");
124     }
125     printf("%s=\"%s\"", p->name + i, p->value);
126   }
127 }
128 
129 /* print_tag -- print "<" and the element name, optionally with a namespace */
print_tag(const conststring name)130 static void print_tag(const conststring name)
131 {
132   int i, j;
133 
134   if (name[0] != '{') {
135     i = 0;
136   } else {
137     for (i = 1; name[i] && name[i] != '}'; i++);
138     if (name[i]) i++;
139   }
140   printf("<%s", name + i);
141   if (i > 2) {			/* Element has a Namespace */
142     printf(" xmlns=\"");
143     for (j = 1; j < i - 1; j++) putchar(name[j]);
144     putchar('"');
145     has_ns = true;
146   } else if (has_ns) {		/* Document has Namespaces, this element not */
147     printf(" xmlns=\"\"");
148   }
149 }
150 
151 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)152 void handle_starttag(void *clientdata, string name, pairlist attribs)
153 {
154   print_tag(name);
155   print_attrs(attribs);
156   putchar('>');
157 }
158 
159 /* handle_emptytag -- called after an empty tag is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)160 void handle_emptytag(void *clientdata, string name, pairlist attribs)
161 {
162   print_tag(name);
163   print_attrs(attribs);
164   printf(" />");
165 }
166 
167 /* handle_endtag -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)168 void handle_endtag(void *clientdata, string name)
169 {
170   int i;
171 
172   if (name[0] != '{') {
173     i = 0;
174   } else {
175     for (i = 1; name[i] && name[i] != '}'; i++);
176     if (name[i]) i++;
177   }
178   printf("</%s>", name + i);
179 }
180 
181 /* --------------------------------------------------------------------- */
182 
183 /* usage -- print usage message and exit */
usage(string prog)184 static void usage(string prog)
185 {
186   fprintf(stderr, "Version %s\nUsage: %s [file-or-url]\n", VERSION, prog);
187   exit(2);
188 }
189 
main(int argc,char * argv[])190 int main(int argc, char *argv[])
191 {
192   int status = 200;
193 
194   /* Bind the parser callback routines to our handlers */
195   set_error_handler(handle_error);
196   set_start_handler(start);
197   set_end_handler(end);
198   set_comment_handler(handle_comment);
199   set_text_handler(handle_text);
200   set_decl_handler(handle_decl);
201   set_pi_handler(handle_pi);
202   set_starttag_handler(handle_starttag);
203   set_emptytag_handler(handle_emptytag);
204   set_endtag_handler(handle_endtag);
205 
206   if (argc > 2) usage(argv[0]);
207   else if (argc == 2) yyin = fopenurl(argv[1], "r", &status);
208   else yyin = stdin;
209 
210   if (!yyin) {perror(argv[1]); exit(1);}
211   if (status != 200) errexit("%s : %s\n", argv[1], http_strerror(status));
212 
213   if (yyparse() != 0) exit(3);
214 
215   return has_error ? 1 : 0;
216 }
217