1 /*
2 * Remove subtrees which have a certain class attribute.
3 *
4 * Copyright © 1994-2000 World Wide Web Consortium
5 * See http://www.w3.org/Consortium/Legal/copyright-software
6 *
7 * Bert Bos <bert@w3.org>
8 * Created Feb 2000
9 * $Id: hxprune.c,v 1.8 2017/11/24 09:50:25 bbos Exp $
10 *
11 **/
12 /* #include <mcheck.h> */
13 #include "config.h"
14 #include <assert.h>
15 #include <ctype.h>
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <time.h>
19 #include <stdbool.h>
20 #if STDC_HEADERS
21 # include <string.h>
22 #else
23 # ifndef HAVE_STRCHR
24 # define strchr index
25 # define strrchr rindex
26 # endif
27 # ifndef HAVE_STRSTR
28 # include "strstr.e"
29 # endif
30 #endif
31 #include "export.h"
32 #include "types.e"
33 #include "tree.e"
34 #include "html.e"
35 #include "scan.e"
36 #include "dict.e"
37 #include "openurl.e"
38 #include "class.e"
39 #include "errexit.e"
40
41 #define EXCLUDE_CLASS "exclude" /* Default value for class */
42
43 static Tree tree;
44 static bool xml = false; /* Use <empty /> convention */
45
46
47 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)48 static void handle_error(void *clientdata, const string s, int lineno)
49 {
50 fprintf(stderr, "%d: %s\n", lineno, s);
51 }
52
53 /* start -- called before the first event is reported */
start(void)54 static void* start(void)
55 {
56 tree = create();
57 return NULL;
58 }
59
60 /* end -- called after the last event is reported */
end(void * clientdata)61 static void end(void *clientdata)
62 {
63 /* skip */
64 }
65
66 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)67 static void handle_comment(void *clientdata, string commenttext)
68 {
69 tree = append_comment(tree, commenttext);
70 }
71
72 /* handle_text -- called after a tex chunk is parsed */
handle_text(void * clientdata,string text)73 static void handle_text(void *clientdata, string text)
74 {
75 tree = append_text(tree, text);
76 }
77
78 /* handle_declaration -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)79 static void handle_decl(void *clientdata, string gi,
80 string fpi, string url)
81 {
82 tree = append_declaration(tree, gi, fpi, url);
83 }
84
85 /* handle_proc_instr -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)86 static void handle_pi(void *clientdata, string pi_text)
87 {
88 tree = append_procins(tree, pi_text);
89 }
90
91 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)92 static void handle_starttag(void *clientdata, string name, pairlist attribs)
93 {
94 tree = html_push(tree, name, attribs);
95 }
96
97 /* handle_emptytag -- called after an empty tag is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)98 static void handle_emptytag(void *clientdata, string name, pairlist attribs)
99 {
100 tree = html_push(tree, name, attribs);
101 }
102
103 /* handle_pop -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)104 static void handle_endtag(void *clientdata, string name)
105 {
106 tree = html_pop(tree, name);
107 }
108
109 /* prune -- write the tree, suppressing elements with a certain class */
prune(Tree t,const string class)110 static void prune(Tree t, const string class)
111 {
112 Tree h;
113 pairlist a;
114
115 for (h = t->children; h != NULL; h = h->sister) {
116 switch (h->tp) {
117 case Text:
118 printf("%s", h->text);
119 break;
120 case Comment:
121 printf("<!--%s-->", h->text);
122 break;
123 case Declaration:
124 printf("<!DOCTYPE %s", h->name);
125 if (h->text) printf(" PUBLIC \"%s\"", h->text);
126 if (h->url) printf(" %s\"%s\"", h->text ? "" : "SYSTEM ", h->url);
127 printf(">");
128 break;
129 case Procins:
130 printf("<?%s>", h->text);
131 break;
132 case Element:
133 if (! has_class(h->attribs, class)) {
134 printf("<%s", h->name);
135 for (a = h->attribs; a != NULL; a = a->next) {
136 printf(" %s", a->name);
137 if (a->value != NULL) printf("=\"%s\"", a->value);
138 else if (xml) printf("=\"%s\"", a->name);
139 }
140 if (is_empty(h->name)) {
141 assert(h->children == NULL);
142 printf(xml ? " />" : ">");
143 } else {
144 printf(">");
145 prune(h, class);
146 printf("</%s>", h->name);
147 }
148 }
149 break;
150 case Root:
151 assert(! "Cannot happen");
152 break;
153 default:
154 assert(! "Cannot happen");
155 }
156 }
157 }
158
159 /* usage -- print usage message and exit */
usage(string name)160 static void usage(string name)
161 {
162 fprintf(stderr, "Usage: %s [-c class] [-x] [html-file]\n", name);
163 exit(1);
164 }
165
166
main(int argc,char * argv[])167 int main(int argc, char *argv[])
168 {
169 int i, status;
170 string class = EXCLUDE_CLASS;
171
172 /* mtrace(); */
173
174 /* Bind the parser callback routines to our handlers */
175 set_error_handler(handle_error);
176 set_start_handler(start);
177 set_end_handler(end);
178 set_comment_handler(handle_comment);
179 set_text_handler(handle_text);
180 set_decl_handler(handle_decl);
181 set_pi_handler(handle_pi);
182 set_starttag_handler(handle_starttag);
183 set_emptytag_handler(handle_emptytag);
184 set_endtag_handler(handle_endtag);
185
186 yyin = stdin;
187 for (i = 1; i < argc; i++) {
188 if (eq(argv[i], "-c")) {
189 if (i >= argc - 1) usage(argv[0]);
190 class = argv[++i];
191 } else if (eq(argv[i], "-x")) {
192 xml = true;
193 } else {
194 yyin = fopenurl(argv[i], "r", &status);
195 if (yyin == NULL) {perror(argv[1]); exit(2);}
196 if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status));
197 }
198 }
199
200 if (yyparse() != 0) {
201 exit(3);
202 }
203 tree = get_root(tree);
204 prune(tree, class);
205 tree_delete(tree); /* Just to test memory mgmt */
206 return 0;
207 }
208