1 /*
2 * Add a numbered list of links at the end of an HTML file
3 *
4 * Copyright © 2001-2015 World Wide Web Consortium
5 * See http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
6 *
7 * Created 23 Jan 2015 (based on a Perl version from 1 Feb 2001)
8 * Bert Bos <bert@w3.org>
9 */
10 #include "config.h"
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <stdbool.h>
14 #ifdef HAVE_STRING_H
15 # include <string.h>
16 #elif HAVE_STRINGS_H
17 # include <strings.h>
18 #endif
19 #ifdef HAVE_UNISTD_H
20 # include <unistd.h>
21 #endif
22 #include "types.e"
23 #include "dict.e"
24 #include "openurl.e"
25 #include "errexit.e"
26 #include "heap.e"
27 #include "html.e"
28 #include "scan.e"
29 #include "url.e"
30
31
32 static conststring attname[] = { /* Attributes that contain URLs: */
33 "src", "href", "data", "longdesc", "cite", "action", "profile",
34 "background", "usemap", "classid", "codebase"};
35 static pairlist list = NULL; /* Stored list of URLs */
36 static bool has_error = false; /* Parsing errors occurred */
37 static conststring base = NULL; /* Make URLs relative to this base */
38
39
40 /* pairlist_push -- insert a name/value pair at the start of a list */
pairlist_push(pairlist * p,const conststring name,const conststring val)41 static void pairlist_push(pairlist *p, const conststring name, const conststring val)
42 {
43 pairlist h;
44
45 new(h);
46 h->name = newstring(name);
47 h->value = newstring(val);
48 h->next = *p;
49 *p = h;
50 }
51
52
53 /* print_list_recursive -- print LI items for all entries in list */
print_list_recursive(const pairlist list)54 static void print_list_recursive(const pairlist list)
55 {
56 conststring url;
57
58 /* ToDo: Escape double quotes */
59 if (list) {
60 print_list_recursive(list->next);
61 url = base ? URL_s_absolutize(base, list->name) : list->name;
62 printf("<li><a class=\"%s\" href=\"%s\">%s</a></li>\n",
63 list->value, url, url);
64 }
65 }
66
67
68 /* print_list -- print an OL with the entries of list */
print_list(const pairlist list)69 static void print_list(const pairlist list)
70 {
71 if (list) {
72 printf("<ol class=\"urllist\">\n");
73 print_list_recursive(list);
74 printf("</ol>\n");
75 }
76 }
77
78
79 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)80 void handle_error(void *clientdata, const string s, int lineno)
81 {
82 fprintf(stderr, "%d: %s\n", lineno, s);
83 has_error = true;
84 }
85
86
87 /* start -- called before the first event is reported */
start(void)88 void* start(void)
89 {
90 return NULL;
91 }
92
93
94 /* end -- called after the last event is reported */
end(void * clientdata)95 void end(void *clientdata)
96 {
97 /* If we still have a list, print it here */
98 if (list) {
99 print_list(list);
100 pairlist_delete(list);
101 list = NULL;
102 }
103 }
104
105
106 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)107 void handle_comment(void *clientdata, string commenttext)
108 {
109 printf("<!--%s-->", commenttext);
110 }
111
112
113 /* handle_text -- called after a text chunk is parsed */
handle_text(void * clientdata,string text)114 void handle_text(void *clientdata, string text)
115 {
116 printf("%s", text);
117 }
118
119
120 /* handle_decl -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)121 void handle_decl(void *clientdata, string gi, string fpi,
122 string url)
123 {
124 if (fpi && url)
125 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", gi, fpi, url);
126 else if (fpi)
127 printf("<!DOCTYPE %s PUBLIC \"%s\">\n", gi, fpi);
128 else if (url)
129 printf("<!DOCTYPE %s SYSTEM \"%s\">\n", gi, url);
130 else
131 printf("<!DOCTYPE %s>\n", gi);
132 }
133
134
135 /* handle_pi -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)136 void handle_pi(void *clientdata, string pi_text)
137 {
138 printf("<?%s>", pi_text);
139 }
140
141
142 /* print_attrs -- print attributes */
print_attrs(const pairlist attribs)143 static void print_attrs(const pairlist attribs)
144 {
145 pairlist p;
146
147 /* ToDo: Distinguish SGML (a NULL value means that the name is the
148 value and the actual attribute name is implicit) and XML? */
149 for (p = attribs; p; p = p->next)
150 printf(" %s=\"%s\"", p->name, p->value ? p->value : p->name);
151 }
152
153
154 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)155 void handle_starttag(void *clientdata, string name, pairlist attribs)
156 {
157 int i;
158 conststring url;
159
160 /* Store any URLs from attributes */
161 for (i = 0; i < sizeof(attname)/sizeof(*attname); i++)
162 if ((url = pairlist_get(attribs, attname[i])))
163 pairlist_push(&list, url, attname[i]);
164
165 printf("<%s", name);
166 print_attrs(attribs);
167 printf(">");
168 }
169
170
171 /* handle_emptytag -- called after an empty element is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)172 void handle_emptytag(void *clientdata, string name, pairlist attribs)
173 {
174 int i;
175 conststring url;
176
177 /* Store any URLs from attributes */
178 for (i = 0; i < sizeof(attname)/sizeof(*attname); i++)
179 if ((url = pairlist_get(attribs, attname[i])))
180 pairlist_push(&list, url, attname[i]);
181
182 printf("<%s", name);
183 print_attrs(attribs);
184 printf(" />");
185 }
186
187
188 /* handle_endtag -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)189 void handle_endtag(void *clientdata, string name)
190 {
191 /* Just before </body>, print the list. or if we see </html> and
192 haven't printed the list yet, print it there */
193 if (list && (strcasecmp(name,"body") == 0 || strcasecmp(name,"html") == 0)) {
194 print_list(list);
195 pairlist_delete(list);
196 list = NULL;
197 }
198 printf("</%s>", name);
199 }
200
201
202 /* usage -- print usage message and exit */
usage(string prog)203 static void usage(string prog)
204 {
205 fprintf(stderr, "Usage: %s [html-file-or-url]\n", prog);
206 exit(2);
207 }
208
209
210 /* main -- main body */
main(int argc,char * argv[])211 int main(int argc, char *argv[])
212 {
213 int c, status = 200;
214
215 while ((c = getopt(argc, argv, "b:")) != -1)
216 switch (c) {
217 case 'b': base = optarg; break;
218 default: usage(argv[0]);
219 }
220
221 if (optind < argc - 1) usage(argv[0]);
222 else if (optind > argc - 1 || eq(argv[optind], "-")) yyin = stdin;
223 else yyin = fopenurl(argv[optind], "r", &status);
224
225 if (!yyin) {perror(argv[optind]); exit(2);}
226 if (status != 200) errexit("%s : %s\n", argv[optind], http_strerror(status));
227
228 /* Bind the parser callback routines to our handlers */
229 set_error_handler(handle_error);
230 set_start_handler(start);
231 set_end_handler(end);
232 set_comment_handler(handle_comment);
233 set_text_handler(handle_text);
234 set_decl_handler(handle_decl);
235 set_pi_handler(handle_pi);
236 set_starttag_handler(handle_starttag);
237 set_emptytag_handler(handle_emptytag);
238 set_endtag_handler(handle_endtag);
239
240 if (yyparse() != 0) {exit(3);}
241 return has_error ? 1 : 0;
242 }
243