1 /*
2 * hxxmlns - expand XML Namespace prefixes
3 *
4 * Expand all element and attribute names to "global names" by
5 * expanding the prefix. All names will be printed as "{URL}name".
6 * Attribute names without a prefix will have an empty namespace part:
7 * "{}name".
8 *
9 * Copyright © 1994-2000 World Wide Web Consortium
10 * See http://www.w3.org/Consortium/Legal/copyright-software
11 *
12 * Author: Bert Bos
13 * Created: 22 Mar 2000
14 * Version: $Id: hxxmlns.c,v 1.8 2017/11/24 09:50:25 bbos Exp $
15 *
16 **/
17 #include "config.h"
18 #include <stdio.h>
19 #ifdef HAVE_UNISTD_H
20 # include <unistd.h>
21 #endif
22 #include <ctype.h>
23 #if STDC_HEADERS
24 # include <string.h>
25 #else
26 # ifndef HAVE_STRCHR
27 # define strchr index
28 # define strrchr rindex
29 # endif
30 #endif
31 #include <stdlib.h>
32 #include <assert.h>
33 #include <stdbool.h>
34 #include "export.h"
35 #include "types.e"
36 #include "heap.e"
37 #include "html.e"
38 #include "scan.e"
39 #include "dict.e"
40 #include "openurl.e"
41 #include "errexit.e"
42
43 extern int yylineno; /* From scan.l */
44
45 /* The symbol table is a chain of prefix/uri pairs. Every time an
46 * element starts, the prefixes defined by it are added at the end. To
47 * expand a prefix, the most recently added prefix/uri pair is used.
48 * When en element ends, the chain is reduced to what it was when the
49 * element started. The stack keeps track of where the chain ended at
50 * the start of the element.
51 *
52 * ToDo: should we hash the prefixes? or is linear search good enough?
53 **/
54 typedef struct _Symbol {
55 string prefix;
56 string uri;
57 struct _Symbol *next;
58 } Symbol, *SymbolTable;
59
60 typedef struct _StackElt {
61 Symbol *frame;
62 struct _StackElt *next;
63 } *Stack;
64
65 static Symbol xml = {"xml", "http://www.w3.org/XML/1998/namespace", NULL};
66 static bool has_error = false;
67 static SymbolTable symtable = &xml;
68 static Stack stack = NULL;
69 static bool do_decls = true; /* Print decl, comment, PI? */
70
71
72 /* print_globalname -- print a name with expanded prefix */
print_globalname(string name,bool use_default)73 static void print_globalname(string name, bool use_default)
74 {
75 string h, prefix, local;
76 Symbol *s;
77
78 /* Split the name */
79 h = strchr(name, ':');
80 if (!h && !use_default) { /* No prefix & no default ns */
81 printf("%s", name);
82 return;
83 }
84 if (h) {
85 *h = '\0';
86 prefix = name;
87 local = h + 1;
88 } else {
89 prefix = "";
90 local = name;
91 }
92 /* Find the prefix in the symbol table */
93 for (s = symtable; s && !eq(prefix, s->prefix); s = s->next) ;
94
95 if (!s && !eq(prefix, "")) {
96 fprintf(stderr, "%d: prefix \"%s\" not defined\n", yylineno, prefix);
97 has_error = true;
98 /* To do: do we report anything if the default prefix is undefined? */
99 }
100 /* ToDo: check that any '}' in uri is escaped */
101 printf("{%s}%s", s ? s->uri : (string)"", local);
102 }
103
104 /* do_tag -- print a start or empty tag expanded */
do_tag(string name,pairlist attribs,bool empty)105 static void do_tag(string name, pairlist attribs, bool empty)
106 {
107 Stack h;
108 pairlist p;
109 Symbol *sym;
110
111 /* Mark the current end of the symbol table */
112 new(h);
113 h->next = stack;
114 h->frame = symtable;
115 stack = h;
116
117 /* Scan the attributes for namespace definitions and store them */
118 for (p = attribs; p; p = p->next) {
119 if (strncmp(p->name, "xmlns", 5) == 0) {
120 new(sym);
121 sym->prefix = newstring(p->name + (p->name[5] ? 6 : 5));
122 sym->uri = newstring(p->value);
123 sym->next = symtable;
124 symtable = sym;
125 }
126 }
127 /* Print the tag with prefixes expanded */
128 putchar('<');
129 print_globalname(name, true);
130 for (p = attribs; p; p = p->next) {
131 if (strncmp(p->name, "xmlns", 5) != 0) {
132 putchar(' ');
133 print_globalname(p->name, false);
134 printf("=\"%s\"", p->value);
135 }
136 }
137 printf(empty ? "/>" : ">");
138 }
139
140 /* pop_symboltable -- unwind the symbol table to previous mark */
pop_symboltable(string name)141 static void pop_symboltable(string name)
142 {
143 Symbol *h;
144 Stack p;
145
146 if (!stack) {
147 if (! has_error) fprintf(stderr, "%d: too many end tags\n", yylineno);
148 has_error = true;
149 return;
150 }
151 /* Remove entries from symbol table chain until last mark */
152 while (symtable != stack->frame) {
153 h = symtable;
154 symtable = symtable->next;
155 dispose(h->prefix);
156 dispose(h->uri);
157 dispose(h);
158 }
159 /* Pop stack itself */
160 p = stack;
161 stack = stack->next;
162 dispose(p);
163 }
164
165 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)166 void handle_error(void *clientdata, const string s, int lineno)
167 {
168 fprintf(stderr, "%d: %s\n", lineno, s);
169 has_error = true;
170 }
171
172 /* start -- called before the first event is reported */
start(void)173 void* start(void)
174 {
175 return NULL;
176 }
177
178 /* end -- called after the last event is reported */
end(void * clientdata)179 void end(void *clientdata)
180 {
181 /* skip */
182 }
183
184 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)185 void handle_comment(void *clientdata, string commenttext)
186 {
187 if (do_decls) printf("<!--%s-->", commenttext);
188 free(commenttext);
189 }
190
191 /* handle_text -- called after a text chunk is parsed */
handle_text(void * clientdata,string text)192 void handle_text(void *clientdata, string text)
193 {
194 printf("%s", text);
195 free(text);
196 }
197
198 /* handle_decl -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)199 void handle_decl(void *clientdata, string gi, string fpi, string url)
200 {
201 if (do_decls) {
202 printf("<!DOCTYPE %s", gi);
203 if (fpi) printf(" PUBLIC \"%s\">", fpi);
204 if (url) printf(" %s\"%s\">", fpi ? "" : "SYSTEM ", url);
205 printf(">");
206 }
207 free(gi);
208 if (fpi) free(fpi);
209 if (url) free(url);
210 }
211
212 /* handle_pi -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)213 void handle_pi(void *clientdata, string pi_text)
214 {
215 if (do_decls) printf("<?%s>", pi_text);
216 free(pi_text);
217 }
218
219 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)220 void handle_starttag(void *clientdata, string name, pairlist attribs)
221 {
222 do_tag(name, attribs, false);
223 free(name);
224 pairlist_delete(attribs);
225 }
226
227 /* handle_emptytag -- called after an empty tag is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)228 void handle_emptytag(void *clientdata, string name, pairlist attribs)
229 {
230 do_tag(name, attribs, true);
231 pop_symboltable(name);
232 free(name);
233 pairlist_delete(attribs);
234 }
235
236 /* handle_endtag -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)237 void handle_endtag(void *clientdata, string name)
238 {
239 /* Printf the end tag */
240 printf("</");
241 print_globalname(name, true);
242 putchar('>');
243
244 /* Unwind the symbol table */
245 pop_symboltable(name);
246 free(name);
247 }
248
249 /* usage -- print usage message and exit */
usage(string prog)250 static void usage(string prog)
251 {
252 fprintf(stderr, "Version %s\nUsage: %s [-d] [xml-file-or-url]\n", VERSION, prog);
253 exit(2);
254 }
255
main(int argc,char * argv[])256 int main(int argc, char *argv[])
257 {
258 int i, status = 200;
259
260 /* Bind the parser callback routines to our handlers */
261 set_error_handler(handle_error);
262 set_start_handler(start);
263 set_end_handler(end);
264 set_comment_handler(handle_comment);
265 set_text_handler(handle_text);
266 set_decl_handler(handle_decl);
267 set_pi_handler(handle_pi);
268 set_starttag_handler(handle_starttag);
269 set_emptytag_handler(handle_emptytag);
270 set_endtag_handler(handle_endtag);
271
272 /* Parse command line arguments */
273 for (i = 1; i < argc && argv[i][0] == '-' && !eq(argv[i], "--"); i++) {
274 switch (argv[i][1]) {
275 case 'd': do_decls = false; break;
276 default: usage(argv[0]);
277 }
278 }
279 if (i < argc && eq(argv[i], "--")) i++;
280
281 if (i == argc) yyin = stdin;
282 else if (i == argc - 1) yyin = fopenurl(argv[i], "r", &status);
283 else usage(argv[0]);
284
285 if (yyin == NULL) {perror(argv[i]); exit(1);}
286 if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status));
287
288 if (yyparse() != 0) exit(3);
289
290 return has_error ? 1 : 0;
291 }
292