1 /*
2  * Add an ID to selected elements
3  *
4  * Copyright © 2000-2012 World Wide Web Consortium
5  * See http://www.w3.org/Consortium/Legal/copyright-software
6  *
7  * Author: Bert Bos <bert@w3.org>
8  * Created: 20 Aug 2000
9  * Version: $Id: hxaddid.c,v 1.8 2017/11/24 09:50:25 bbos Exp $
10  *
11  **/
12 #include "config.h"
13 #include <assert.h>
14 #include <ctype.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <time.h>
18 #include <stdbool.h>
19 #if STDC_HEADERS
20 # include <string.h>
21 #else
22 # ifndef HAVE_STRCHR
23 #  define strchr index
24 #  define strrchr rindex
25 # endif
26 # ifndef HAVE_STRSTR
27 #  include "strstr.e"
28 # endif
29 #endif
30 
31 #ifdef HAVE_ERRNO_H
32 #  include <errno.h>
33 #endif
34 #include "export.h"
35 #include "types.e"
36 #include "heap.e"
37 #include "tree.e"
38 #include "html.e"
39 #include "scan.e"
40 #include "dict.e"
41 #include "openurl.e"
42 #include "errexit.e"
43 #include "genid.e"
44 #include "class.e"
45 
46 static Tree tree;
47 static bool xml = false;			/* Use <empty /> convention */
48 static string targetelement = NULL;		/* Element to extract */
49 static string targetclass = NULL;		/* Class to extract */
50 
51 
52 /* is_match check whether the element matches the target element and class */
is_match(const string name,pairlist attribs)53 static bool is_match(const string name, pairlist attribs)
54 {
55   if (xml)
56     return ((!targetelement || strcasecmp(name, targetelement) == 0)
57 	    && (!targetclass || has_class(attribs, targetclass)));
58   else
59     return ((!targetelement || strcmp(name, targetelement) == 0)
60 	    && (!targetclass || has_class(attribs, targetclass)));
61 }
62 
63 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)64 static void handle_error(void *clientdata, const string s, int lineno)
65 {
66   fprintf(stderr, "%d: %s\n", lineno, s);
67 }
68 
69 /* start -- called before the first event is reported */
start(void)70 static void* start(void)
71 {
72   tree = create();
73   return NULL;
74 }
75 
76 /* end -- called after the last event is reported */
end(void * clientdata)77 static void end(void *clientdata)
78 {
79   /* skip */
80 }
81 
82 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)83 static void handle_comment(void *clientdata, string commenttext)
84 {
85   tree = append_comment(tree, commenttext);
86 }
87 
88 /* handle_text -- called after a tex chunk is parsed */
handle_text(void * clientdata,string text)89 static void handle_text(void *clientdata, string text)
90 {
91   tree = append_text(tree, text);
92 }
93 
94 /* handle_declaration -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)95 static void handle_decl(void *clientdata, string gi,
96 			string fpi, string url)
97 {
98   tree = append_declaration(tree, gi, fpi, url);
99 }
100 
101 /* handle_proc_instr -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)102 static void handle_pi(void *clientdata, string pi_text)
103 {
104   tree = append_procins(tree, pi_text);
105 }
106 
107 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)108 static void handle_starttag(void *clientdata, string name, pairlist attribs)
109 {
110   conststring id;
111 
112   tree = html_push(tree, name, attribs);
113 
114   /* If it has an ID, store it (so we don't accidentally generate it) */
115   if ((id = pairlist_get(attribs, "id"))) storeID(id);
116 }
117 
118 /* handle_emptytag -- called after an empty tag is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)119 static void handle_emptytag(void *clientdata, string name, pairlist attribs)
120 {
121   handle_starttag(clientdata, name, attribs);
122 }
123 
124 /* handle_endtag -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)125 static void handle_endtag(void *clientdata, string name)
126 {
127   tree = html_pop(tree, name);
128 }
129 
130 /* expand -- write the tree, inserting ID's at matching elements */
expand(Tree t)131 static void expand(Tree t)
132 {
133   Tree h;
134   pairlist a;
135 
136   for (h = t->children; h != NULL; h = h->sister) {
137     switch (h->tp) {
138       case Text: printf("%s", h->text); break;
139       case Comment: printf("<!--%s-->", h->text); break;
140       case Declaration:
141 	printf("<!DOCTYPE %s", h->name);
142 	if (h->text) printf(" PUBLIC \"%s\"", h->text);
143 	if (h->url) printf(" %s\"%s\"", h->text ? "" : "SYSTEM ", h->url);
144 	printf(">");
145 	break;
146       case Procins: printf("<?%s>", h->text); break;
147       case Element:
148 	if (is_match(h->name, h->attribs) && !get_attrib(h, "id"))
149 	  set_attrib(h, "id", gen_id(h));
150 	printf("<%s", h->name);
151 	for (a = h->attribs; a != NULL; a = a->next) {
152 	  printf(" %s", a->name);
153 	  if (a->value != NULL) printf("=\"%s\"", a->value);
154 	}
155 	if (is_empty(h->name)) {
156 	  printf(xml ? " />" : ">");
157 	} else {
158 	  printf(">");
159 	  expand(h);
160 	  printf("</%s>", h->name);
161 	}
162 	break;
163       case Root: assert(! "Cannot happen"); break;
164       default: assert(! "Cannot happen");
165     }
166   }
167 }
168 
169 /* usage -- print usage message and exit */
usage(string name)170 static void usage(string name)
171 {
172   errexit("Usage: %s [-x] [-v] [--] elem|.class|elem.class [html-file]\n",
173 	  name);
174 }
175 
176 
main(int argc,char * argv[])177 int main(int argc, char *argv[])
178 {
179   char *p;
180   int i, status = 200;
181 
182   /* Bind the parser callback routines to our handlers */
183   set_error_handler(handle_error);
184   set_start_handler(start);
185   set_end_handler(end);
186   set_comment_handler(handle_comment);
187   set_text_handler(handle_text);
188   set_decl_handler(handle_decl);
189   set_pi_handler(handle_pi);
190   set_starttag_handler(handle_starttag);
191   set_emptytag_handler(handle_emptytag);
192   set_endtag_handler(handle_endtag);
193 
194   /* Parse command line options */
195   for (i = 1; i < argc && argv[i][0] == '-' && !eq(argv[i], "--"); i++) {
196     switch (argv[i][1]) {
197       case 'x': xml = true; break;
198       case 'v': printf("Version: %s %s\n", PACKAGE, VERSION); return 0;
199       default: usage(argv[0]);
200     }
201   }
202   if (i < argc && eq(argv[i], "--")) i++;
203 
204   if (i == argc) usage(argv[0]);
205   if (argv[i][0] == '.') {			/* Class name */
206     targetclass = argv[i] + 1;
207   } else {					/* Element name */
208     targetelement = argv[i];
209     if ((p = strchr(targetelement, '.'))) {
210       *p = '\0';
211       targetclass = p + 1;
212     }
213   }
214   i++;
215   if (i == argc) yyin = stdin;
216   else if (i == argc - 1 && eq(argv[i], "-")) yyin = stdin;
217   else if (i == argc - 1) yyin = fopenurl(argv[i], "r", &status);
218   else usage(argv[0]);
219 
220   if (yyin == NULL) {perror(argv[i]); exit(1);}
221   if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status));
222 
223   if (yyparse() != 0) exit(3);
224 
225   tree = get_root(tree);
226   expand(tree);
227   tree_delete(tree);				/* Just to test memory mgmt */
228   return 0;
229 }
230