1 /*
2  * Remove subtrees which have a certain class attribute.
3  *
4  * Copyright © 1994-2000 World Wide Web Consortium
5  * See http://www.w3.org/Consortium/Legal/copyright-software
6  *
7  * Bert Bos <bert@w3.org>
8  * Created Feb 2000
9  * $Id: hxprune.c,v 1.8 2017/11/24 09:50:25 bbos Exp $
10  *
11  **/
12 /* #include <mcheck.h> */
13 #include "config.h"
14 #include <assert.h>
15 #include <ctype.h>
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <time.h>
19 #include <stdbool.h>
20 #if STDC_HEADERS
21 # include <string.h>
22 #else
23 # ifndef HAVE_STRCHR
24 #  define strchr index
25 #  define strrchr rindex
26 # endif
27 # ifndef HAVE_STRSTR
28 #  include "strstr.e"
29 # endif
30 #endif
31 #include "export.h"
32 #include "types.e"
33 #include "tree.e"
34 #include "html.e"
35 #include "scan.e"
36 #include "dict.e"
37 #include "openurl.e"
38 #include "class.e"
39 #include "errexit.e"
40 
41 #define EXCLUDE_CLASS "exclude"			/* Default value for class */
42 
43 static Tree tree;
44 static bool xml = false;			/* Use <empty /> convention */
45 
46 
47 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)48 static void handle_error(void *clientdata, const string s, int lineno)
49 {
50   fprintf(stderr, "%d: %s\n", lineno, s);
51 }
52 
53 /* start -- called before the first event is reported */
start(void)54 static void* start(void)
55 {
56   tree = create();
57   return NULL;
58 }
59 
60 /* end -- called after the last event is reported */
end(void * clientdata)61 static void end(void *clientdata)
62 {
63   /* skip */
64 }
65 
66 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)67 static void handle_comment(void *clientdata, string commenttext)
68 {
69   tree = append_comment(tree, commenttext);
70 }
71 
72 /* handle_text -- called after a tex chunk is parsed */
handle_text(void * clientdata,string text)73 static void handle_text(void *clientdata, string text)
74 {
75   tree = append_text(tree, text);
76 }
77 
78 /* handle_declaration -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)79 static void handle_decl(void *clientdata, string gi,
80 			string fpi, string url)
81 {
82   tree = append_declaration(tree, gi, fpi, url);
83 }
84 
85 /* handle_proc_instr -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)86 static void handle_pi(void *clientdata, string pi_text)
87 {
88   tree = append_procins(tree, pi_text);
89 }
90 
91 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)92 static void handle_starttag(void *clientdata, string name, pairlist attribs)
93 {
94   tree = html_push(tree, name, attribs);
95 }
96 
97 /* handle_emptytag -- called after an empty tag is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)98 static void handle_emptytag(void *clientdata, string name, pairlist attribs)
99 {
100   tree = html_push(tree, name, attribs);
101 }
102 
103 /* handle_pop -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)104 static void handle_endtag(void *clientdata, string name)
105 {
106   tree = html_pop(tree, name);
107 }
108 
109 /* prune -- write the tree, suppressing elements with a certain class */
prune(Tree t,const string class)110 static void prune(Tree t, const string class)
111 {
112   Tree h;
113   pairlist a;
114 
115   for (h = t->children; h != NULL; h = h->sister) {
116     switch (h->tp) {
117       case Text:
118 	printf("%s", h->text);
119 	break;
120       case Comment:
121 	printf("<!--%s-->", h->text);
122 	break;
123       case Declaration:
124 	printf("<!DOCTYPE %s", h->name);
125 	if (h->text) printf(" PUBLIC \"%s\"", h->text);
126 	if (h->url) printf(" %s\"%s\"", h->text ? "" : "SYSTEM ", h->url);
127 	printf(">");
128 	break;
129       case Procins:
130 	printf("<?%s>", h->text);
131 	break;
132       case Element:
133 	if (! has_class(h->attribs, class)) {
134 	  printf("<%s", h->name);
135 	  for (a = h->attribs; a != NULL; a = a->next) {
136 	    printf(" %s", a->name);
137 	    if (a->value != NULL) printf("=\"%s\"", a->value);
138 	    else if (xml) printf("=\"%s\"", a->name);
139 	  }
140 	  if (is_empty(h->name)) {
141 	    assert(h->children == NULL);
142 	    printf(xml ? " />" : ">");
143 	  } else {
144 	    printf(">");
145 	    prune(h, class);
146 	    printf("</%s>", h->name);
147 	  }
148 	}
149 	break;
150       case Root:
151 	assert(! "Cannot happen");
152 	break;
153       default:
154 	assert(! "Cannot happen");
155     }
156   }
157 }
158 
159 /* usage -- print usage message and exit */
usage(string name)160 static void usage(string name)
161 {
162   fprintf(stderr, "Usage: %s [-c class] [-x] [html-file]\n", name);
163   exit(1);
164 }
165 
166 
main(int argc,char * argv[])167 int main(int argc, char *argv[])
168 {
169   int i, status;
170   string class = EXCLUDE_CLASS;
171 
172   /*  mtrace(); */
173 
174   /* Bind the parser callback routines to our handlers */
175   set_error_handler(handle_error);
176   set_start_handler(start);
177   set_end_handler(end);
178   set_comment_handler(handle_comment);
179   set_text_handler(handle_text);
180   set_decl_handler(handle_decl);
181   set_pi_handler(handle_pi);
182   set_starttag_handler(handle_starttag);
183   set_emptytag_handler(handle_emptytag);
184   set_endtag_handler(handle_endtag);
185 
186   yyin = stdin;
187   for (i = 1; i < argc; i++) {
188     if (eq(argv[i], "-c")) {
189       if (i >= argc - 1) usage(argv[0]);
190       class = argv[++i];
191     } else if (eq(argv[i], "-x")) {
192       xml = true;
193     } else {
194       yyin = fopenurl(argv[i], "r", &status);
195       if (yyin == NULL) {perror(argv[1]); exit(2);}
196       if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status));
197     }
198   }
199 
200   if (yyparse() != 0) {
201     exit(3);
202   }
203   tree = get_root(tree);
204   prune(tree, class);
205   tree_delete(tree);				/* Just to test memory mgmt */
206   return 0;
207 }
208