1 /*
2  * hxxmlns - expand XML Namespace prefixes
3  *
4  * Expand all element and attribute names to "global names" by
5  * expanding the prefix. All names will be printed as "{URL}name".
6  * Attribute names without a prefix will have an empty namespace part:
7  * "{}name".
8  *
9  * Copyright © 1994-2000 World Wide Web Consortium
10  * See http://www.w3.org/Consortium/Legal/copyright-software
11  *
12  * Author: Bert Bos
13  * Created: 22 Mar 2000
14  * Version: $Id: hxxmlns.c,v 1.8 2017/11/24 09:50:25 bbos Exp $
15  *
16  **/
17 #include "config.h"
18 #include <stdio.h>
19 #ifdef HAVE_UNISTD_H
20 #  include <unistd.h>
21 #endif
22 #include <ctype.h>
23 #if STDC_HEADERS
24 # include <string.h>
25 #else
26 # ifndef HAVE_STRCHR
27 #  define strchr index
28 #  define strrchr rindex
29 # endif
30 #endif
31 #include <stdlib.h>
32 #include <assert.h>
33 #include <stdbool.h>
34 #include "export.h"
35 #include "types.e"
36 #include "heap.e"
37 #include "html.e"
38 #include "scan.e"
39 #include "dict.e"
40 #include "openurl.e"
41 #include "errexit.e"
42 
43 extern int yylineno;				/* From scan.l */
44 
45 /* The symbol table is a chain of prefix/uri pairs. Every time an
46  * element starts, the prefixes defined by it are added at the end. To
47  * expand a prefix, the most recently added prefix/uri pair is used.
48  * When en element ends, the chain is reduced to what it was when the
49  * element started. The stack keeps track of where the chain ended at
50  * the start of the element.
51  *
52  * ToDo: should we hash the prefixes? or is linear search good enough?
53  **/
54 typedef struct _Symbol {
55   string prefix;
56   string uri;
57   struct _Symbol *next;
58 } Symbol, *SymbolTable;
59 
60 typedef struct _StackElt {
61   Symbol *frame;
62   struct _StackElt *next;
63 } *Stack;
64 
65 static Symbol xml = {"xml", "http://www.w3.org/XML/1998/namespace", NULL};
66 static bool has_error = false;
67 static SymbolTable symtable = &xml;
68 static Stack stack = NULL;
69 static bool do_decls = true;			/* Print decl, comment, PI? */
70 
71 
72 /* print_globalname -- print a name with expanded prefix */
print_globalname(string name,bool use_default)73 static void print_globalname(string name, bool use_default)
74 {
75   string h, prefix, local;
76   Symbol *s;
77 
78   /* Split the name */
79   h = strchr(name, ':');
80   if (!h && !use_default) {			/* No prefix & no default ns */
81     printf("%s", name);
82     return;
83   }
84   if (h) {
85     *h = '\0';
86     prefix = name;
87     local = h + 1;
88   } else {
89     prefix = "";
90     local = name;
91   }
92   /* Find the prefix in the symbol table */
93   for (s = symtable; s && !eq(prefix, s->prefix); s = s->next) ;
94 
95   if (!s && !eq(prefix, "")) {
96     fprintf(stderr, "%d: prefix \"%s\" not defined\n", yylineno, prefix);
97     has_error = true;
98     /* To do: do we report anything if the default prefix is undefined? */
99   }
100   /* ToDo: check that any '}' in uri is escaped */
101   printf("{%s}%s", s ? s->uri : (string)"", local);
102 }
103 
104 /* do_tag -- print a start or empty tag expanded */
do_tag(string name,pairlist attribs,bool empty)105 static void do_tag(string name, pairlist attribs, bool empty)
106 {
107   Stack h;
108   pairlist p;
109   Symbol *sym;
110 
111   /* Mark the current end of the symbol table */
112   new(h);
113   h->next = stack;
114   h->frame = symtable;
115   stack = h;
116 
117   /* Scan the attributes for namespace definitions and store them */
118   for (p = attribs; p; p = p->next) {
119     if (strncmp(p->name, "xmlns", 5) == 0) {
120       new(sym);
121       sym->prefix = newstring(p->name + (p->name[5] ? 6 : 5));
122       sym->uri = newstring(p->value);
123       sym->next = symtable;
124       symtable = sym;
125     }
126   }
127   /* Print the tag with prefixes expanded */
128   putchar('<');
129   print_globalname(name, true);
130   for (p = attribs; p; p = p->next) {
131     if (strncmp(p->name, "xmlns", 5) != 0) {
132       putchar(' ');
133       print_globalname(p->name, false);
134       printf("=\"%s\"", p->value);
135     }
136   }
137   printf(empty ? "/>" : ">");
138 }
139 
140 /* pop_symboltable -- unwind the symbol table to previous mark */
pop_symboltable(string name)141 static void pop_symboltable(string name)
142 {
143   Symbol *h;
144   Stack p;
145 
146   if (!stack) {
147     if (! has_error) fprintf(stderr, "%d: too many end tags\n", yylineno);
148     has_error = true;
149     return;
150   }
151   /* Remove entries from symbol table chain until last mark */
152   while (symtable != stack->frame) {
153     h = symtable;
154     symtable = symtable->next;
155     dispose(h->prefix);
156     dispose(h->uri);
157     dispose(h);
158   }
159   /* Pop stack itself */
160   p = stack;
161   stack = stack->next;
162   dispose(p);
163 }
164 
165 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)166 void handle_error(void *clientdata, const string s, int lineno)
167 {
168   fprintf(stderr, "%d: %s\n", lineno, s);
169   has_error = true;
170 }
171 
172 /* start -- called before the first event is reported */
start(void)173 void* start(void)
174 {
175   return NULL;
176 }
177 
178 /* end -- called after the last event is reported */
end(void * clientdata)179 void end(void *clientdata)
180 {
181   /* skip */
182 }
183 
184 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)185 void handle_comment(void *clientdata, string commenttext)
186 {
187   if (do_decls) printf("<!--%s-->", commenttext);
188   free(commenttext);
189 }
190 
191 /* handle_text -- called after a text chunk is parsed */
handle_text(void * clientdata,string text)192 void handle_text(void *clientdata, string text)
193 {
194   printf("%s", text);
195   free(text);
196 }
197 
198 /* handle_decl -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)199 void handle_decl(void *clientdata, string gi, string fpi, string url)
200 {
201   if (do_decls) {
202     printf("<!DOCTYPE %s", gi);
203     if (fpi) printf(" PUBLIC \"%s\">", fpi);
204     if (url) printf(" %s\"%s\">", fpi ? "" : "SYSTEM ", url);
205     printf(">");
206   }
207   free(gi);
208   if (fpi) free(fpi);
209   if (url) free(url);
210 }
211 
212 /* handle_pi -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)213 void handle_pi(void *clientdata, string pi_text)
214 {
215   if (do_decls) printf("<?%s>", pi_text);
216   free(pi_text);
217 }
218 
219 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)220 void handle_starttag(void *clientdata, string name, pairlist attribs)
221 {
222   do_tag(name, attribs, false);
223   free(name);
224   pairlist_delete(attribs);
225 }
226 
227 /* handle_emptytag -- called after an empty tag is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)228 void handle_emptytag(void *clientdata, string name, pairlist attribs)
229 {
230   do_tag(name, attribs, true);
231   pop_symboltable(name);
232   free(name);
233   pairlist_delete(attribs);
234 }
235 
236 /* handle_endtag -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)237 void handle_endtag(void *clientdata, string name)
238 {
239   /* Printf the end tag */
240   printf("</");
241   print_globalname(name, true);
242   putchar('>');
243 
244   /* Unwind the symbol table */
245   pop_symboltable(name);
246   free(name);
247 }
248 
249 /* usage -- print usage message and exit */
usage(string prog)250 static void usage(string prog)
251 {
252   fprintf(stderr, "Version %s\nUsage: %s [-d] [xml-file-or-url]\n", VERSION, prog);
253   exit(2);
254 }
255 
main(int argc,char * argv[])256 int main(int argc, char *argv[])
257 {
258   int i, status = 200;
259 
260   /* Bind the parser callback routines to our handlers */
261   set_error_handler(handle_error);
262   set_start_handler(start);
263   set_end_handler(end);
264   set_comment_handler(handle_comment);
265   set_text_handler(handle_text);
266   set_decl_handler(handle_decl);
267   set_pi_handler(handle_pi);
268   set_starttag_handler(handle_starttag);
269   set_emptytag_handler(handle_emptytag);
270   set_endtag_handler(handle_endtag);
271 
272   /* Parse command line arguments */
273   for (i = 1; i < argc && argv[i][0] == '-' && !eq(argv[i], "--"); i++) {
274     switch (argv[i][1]) {
275       case 'd': do_decls = false; break;
276       default: usage(argv[0]);
277     }
278   }
279   if (i < argc && eq(argv[i], "--")) i++;
280 
281   if (i == argc) yyin = stdin;
282   else if (i == argc - 1) yyin = fopenurl(argv[i], "r", &status);
283   else usage(argv[0]);
284 
285   if (yyin == NULL) {perror(argv[i]); exit(1);}
286   if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status));
287 
288   if (yyparse() != 0) exit(3);
289 
290   return has_error ? 1 : 0;
291 }
292