1 /*
2 * Add an ID to selected elements
3 *
4 * Copyright © 2000-2012 World Wide Web Consortium
5 * See http://www.w3.org/Consortium/Legal/copyright-software
6 *
7 * Author: Bert Bos <bert@w3.org>
8 * Created: 20 Aug 2000
9 * Version: $Id: hxaddid.c,v 1.8 2017/11/24 09:50:25 bbos Exp $
10 *
11 **/
12 #include "config.h"
13 #include <assert.h>
14 #include <ctype.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <time.h>
18 #include <stdbool.h>
19 #if STDC_HEADERS
20 # include <string.h>
21 #else
22 # ifndef HAVE_STRCHR
23 # define strchr index
24 # define strrchr rindex
25 # endif
26 # ifndef HAVE_STRSTR
27 # include "strstr.e"
28 # endif
29 #endif
30
31 #ifdef HAVE_ERRNO_H
32 # include <errno.h>
33 #endif
34 #include "export.h"
35 #include "types.e"
36 #include "heap.e"
37 #include "tree.e"
38 #include "html.e"
39 #include "scan.e"
40 #include "dict.e"
41 #include "openurl.e"
42 #include "errexit.e"
43 #include "genid.e"
44 #include "class.e"
45
46 static Tree tree;
47 static bool xml = false; /* Use <empty /> convention */
48 static string targetelement = NULL; /* Element to extract */
49 static string targetclass = NULL; /* Class to extract */
50
51
52 /* is_match check whether the element matches the target element and class */
is_match(const string name,pairlist attribs)53 static bool is_match(const string name, pairlist attribs)
54 {
55 if (xml)
56 return ((!targetelement || strcasecmp(name, targetelement) == 0)
57 && (!targetclass || has_class(attribs, targetclass)));
58 else
59 return ((!targetelement || strcmp(name, targetelement) == 0)
60 && (!targetclass || has_class(attribs, targetclass)));
61 }
62
63 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)64 static void handle_error(void *clientdata, const string s, int lineno)
65 {
66 fprintf(stderr, "%d: %s\n", lineno, s);
67 }
68
69 /* start -- called before the first event is reported */
start(void)70 static void* start(void)
71 {
72 tree = create();
73 return NULL;
74 }
75
76 /* end -- called after the last event is reported */
end(void * clientdata)77 static void end(void *clientdata)
78 {
79 /* skip */
80 }
81
82 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)83 static void handle_comment(void *clientdata, string commenttext)
84 {
85 tree = append_comment(tree, commenttext);
86 }
87
88 /* handle_text -- called after a tex chunk is parsed */
handle_text(void * clientdata,string text)89 static void handle_text(void *clientdata, string text)
90 {
91 tree = append_text(tree, text);
92 }
93
94 /* handle_declaration -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)95 static void handle_decl(void *clientdata, string gi,
96 string fpi, string url)
97 {
98 tree = append_declaration(tree, gi, fpi, url);
99 }
100
101 /* handle_proc_instr -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)102 static void handle_pi(void *clientdata, string pi_text)
103 {
104 tree = append_procins(tree, pi_text);
105 }
106
107 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)108 static void handle_starttag(void *clientdata, string name, pairlist attribs)
109 {
110 conststring id;
111
112 tree = html_push(tree, name, attribs);
113
114 /* If it has an ID, store it (so we don't accidentally generate it) */
115 if ((id = pairlist_get(attribs, "id"))) storeID(id);
116 }
117
118 /* handle_emptytag -- called after an empty tag is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)119 static void handle_emptytag(void *clientdata, string name, pairlist attribs)
120 {
121 handle_starttag(clientdata, name, attribs);
122 }
123
124 /* handle_endtag -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)125 static void handle_endtag(void *clientdata, string name)
126 {
127 tree = html_pop(tree, name);
128 }
129
130 /* expand -- write the tree, inserting ID's at matching elements */
expand(Tree t)131 static void expand(Tree t)
132 {
133 Tree h;
134 pairlist a;
135
136 for (h = t->children; h != NULL; h = h->sister) {
137 switch (h->tp) {
138 case Text: printf("%s", h->text); break;
139 case Comment: printf("<!--%s-->", h->text); break;
140 case Declaration:
141 printf("<!DOCTYPE %s", h->name);
142 if (h->text) printf(" PUBLIC \"%s\"", h->text);
143 if (h->url) printf(" %s\"%s\"", h->text ? "" : "SYSTEM ", h->url);
144 printf(">");
145 break;
146 case Procins: printf("<?%s>", h->text); break;
147 case Element:
148 if (is_match(h->name, h->attribs) && !get_attrib(h, "id"))
149 set_attrib(h, "id", gen_id(h));
150 printf("<%s", h->name);
151 for (a = h->attribs; a != NULL; a = a->next) {
152 printf(" %s", a->name);
153 if (a->value != NULL) printf("=\"%s\"", a->value);
154 }
155 if (is_empty(h->name)) {
156 printf(xml ? " />" : ">");
157 } else {
158 printf(">");
159 expand(h);
160 printf("</%s>", h->name);
161 }
162 break;
163 case Root: assert(! "Cannot happen"); break;
164 default: assert(! "Cannot happen");
165 }
166 }
167 }
168
169 /* usage -- print usage message and exit */
usage(string name)170 static void usage(string name)
171 {
172 errexit("Usage: %s [-x] [-v] [--] elem|.class|elem.class [html-file]\n",
173 name);
174 }
175
176
main(int argc,char * argv[])177 int main(int argc, char *argv[])
178 {
179 char *p;
180 int i, status = 200;
181
182 /* Bind the parser callback routines to our handlers */
183 set_error_handler(handle_error);
184 set_start_handler(start);
185 set_end_handler(end);
186 set_comment_handler(handle_comment);
187 set_text_handler(handle_text);
188 set_decl_handler(handle_decl);
189 set_pi_handler(handle_pi);
190 set_starttag_handler(handle_starttag);
191 set_emptytag_handler(handle_emptytag);
192 set_endtag_handler(handle_endtag);
193
194 /* Parse command line options */
195 for (i = 1; i < argc && argv[i][0] == '-' && !eq(argv[i], "--"); i++) {
196 switch (argv[i][1]) {
197 case 'x': xml = true; break;
198 case 'v': printf("Version: %s %s\n", PACKAGE, VERSION); return 0;
199 default: usage(argv[0]);
200 }
201 }
202 if (i < argc && eq(argv[i], "--")) i++;
203
204 if (i == argc) usage(argv[0]);
205 if (argv[i][0] == '.') { /* Class name */
206 targetclass = argv[i] + 1;
207 } else { /* Element name */
208 targetelement = argv[i];
209 if ((p = strchr(targetelement, '.'))) {
210 *p = '\0';
211 targetclass = p + 1;
212 }
213 }
214 i++;
215 if (i == argc) yyin = stdin;
216 else if (i == argc - 1 && eq(argv[i], "-")) yyin = stdin;
217 else if (i == argc - 1) yyin = fopenurl(argv[i], "r", &status);
218 else usage(argv[0]);
219
220 if (yyin == NULL) {perror(argv[i]); exit(1);}
221 if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status));
222
223 if (yyparse() != 0) exit(3);
224
225 tree = get_root(tree);
226 expand(tree);
227 tree_delete(tree); /* Just to test memory mgmt */
228 return 0;
229 }
230