1 /*
2  * Add a numbered list of links at the end of an HTML file
3  *
4  * Copyright © 2001-2015 World Wide Web Consortium
5  * See http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
6  *
7  * Created 23 Jan 2015 (based on a Perl version from 1 Feb 2001)
8  * Bert Bos <bert@w3.org>
9  */
10 #include "config.h"
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <stdbool.h>
14 #ifdef HAVE_STRING_H
15 #  include <string.h>
16 #elif HAVE_STRINGS_H
17 #  include <strings.h>
18 #endif
19 #ifdef HAVE_UNISTD_H
20 #  include <unistd.h>
21 #endif
22 #include "types.e"
23 #include "dict.e"
24 #include "openurl.e"
25 #include "errexit.e"
26 #include "heap.e"
27 #include "html.e"
28 #include "scan.e"
29 #include "url.e"
30 
31 
32 static conststring attname[] = {	/* Attributes that contain URLs: */
33   "src", "href", "data", "longdesc", "cite", "action", "profile",
34   "background", "usemap", "classid", "codebase"};
35 static pairlist list = NULL;		/* Stored list of URLs */
36 static bool has_error = false;		/* Parsing errors occurred */
37 static conststring base = NULL;		/* Make URLs relative to this base */
38 
39 
40 /* pairlist_push -- insert a name/value pair at the start of a list */
pairlist_push(pairlist * p,const conststring name,const conststring val)41 static void pairlist_push(pairlist *p, const conststring name, const conststring val)
42 {
43   pairlist h;
44 
45   new(h);
46   h->name = newstring(name);
47   h->value = newstring(val);
48   h->next = *p;
49   *p = h;
50 }
51 
52 
53 /* print_list_recursive -- print LI items for all entries in list */
print_list_recursive(const pairlist list)54 static void print_list_recursive(const pairlist list)
55 {
56   conststring url;
57 
58   /* ToDo: Escape double quotes */
59   if (list) {
60     print_list_recursive(list->next);
61     url = base ? URL_s_absolutize(base, list->name) : list->name;
62     printf("<li><a class=\"%s\" href=\"%s\">%s</a></li>\n",
63 	   list->value, url, url);
64   }
65 }
66 
67 
68 /* print_list -- print an OL with the entries of list */
print_list(const pairlist list)69 static void print_list(const pairlist list)
70 {
71   if (list) {
72     printf("<ol class=\"urllist\">\n");
73     print_list_recursive(list);
74     printf("</ol>\n");
75   }
76 }
77 
78 
79 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)80 void handle_error(void *clientdata, const string s, int lineno)
81 {
82   fprintf(stderr, "%d: %s\n", lineno, s);
83   has_error = true;
84 }
85 
86 
87 /* start -- called before the first event is reported */
start(void)88 void* start(void)
89 {
90   return NULL;
91 }
92 
93 
94 /* end -- called after the last event is reported */
end(void * clientdata)95 void end(void *clientdata)
96 {
97   /* If we still have a list, print it here */
98   if (list) {
99     print_list(list);
100     pairlist_delete(list);
101     list = NULL;
102   }
103 }
104 
105 
106 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)107 void handle_comment(void *clientdata, string commenttext)
108 {
109   printf("<!--%s-->", commenttext);
110 }
111 
112 
113 /* handle_text -- called after a text chunk is parsed */
handle_text(void * clientdata,string text)114 void handle_text(void *clientdata, string text)
115 {
116   printf("%s", text);
117 }
118 
119 
120 /* handle_decl -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)121 void handle_decl(void *clientdata, string gi, string fpi,
122 		 string url)
123 {
124   if (fpi && url)
125     printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", gi, fpi, url);
126   else if (fpi)
127     printf("<!DOCTYPE %s PUBLIC \"%s\">\n", gi, fpi);
128   else if (url)
129     printf("<!DOCTYPE %s SYSTEM \"%s\">\n", gi, url);
130   else
131     printf("<!DOCTYPE %s>\n", gi);
132 }
133 
134 
135 /* handle_pi -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)136 void handle_pi(void *clientdata, string pi_text)
137 {
138   printf("<?%s>", pi_text);
139 }
140 
141 
142 /* print_attrs -- print attributes */
print_attrs(const pairlist attribs)143 static void print_attrs(const pairlist attribs)
144 {
145   pairlist p;
146 
147   /* ToDo: Distinguish SGML (a NULL value means that the name is the
148      value and the actual attribute name is implicit) and XML? */
149   for (p = attribs; p; p = p->next)
150     printf(" %s=\"%s\"", p->name, p->value ? p->value : p->name);
151 }
152 
153 
154 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)155 void handle_starttag(void *clientdata, string name, pairlist attribs)
156 {
157   int i;
158   conststring url;
159 
160   /* Store any URLs from attributes */
161   for (i = 0; i < sizeof(attname)/sizeof(*attname); i++)
162     if ((url = pairlist_get(attribs, attname[i])))
163       pairlist_push(&list, url, attname[i]);
164 
165   printf("<%s", name);
166   print_attrs(attribs);
167   printf(">");
168 }
169 
170 
171 /* handle_emptytag -- called after an empty element is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)172 void handle_emptytag(void *clientdata, string name, pairlist attribs)
173 {
174   int i;
175   conststring url;
176 
177   /* Store any URLs from attributes */
178   for (i = 0; i < sizeof(attname)/sizeof(*attname); i++)
179     if ((url = pairlist_get(attribs, attname[i])))
180       pairlist_push(&list, url, attname[i]);
181 
182   printf("<%s", name);
183   print_attrs(attribs);
184   printf(" />");
185 }
186 
187 
188 /* handle_endtag -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)189 void handle_endtag(void *clientdata, string name)
190 {
191   /* Just before </body>, print the list. or if we see </html> and
192      haven't printed the list yet, print it there */
193   if (list && (strcasecmp(name,"body") == 0 || strcasecmp(name,"html") == 0)) {
194     print_list(list);
195     pairlist_delete(list);
196     list = NULL;
197   }
198   printf("</%s>", name);
199 }
200 
201 
202 /* usage -- print usage message and exit */
usage(string prog)203 static void usage(string prog)
204 {
205   fprintf(stderr, "Usage: %s [html-file-or-url]\n", prog);
206   exit(2);
207 }
208 
209 
210 /* main -- main body */
main(int argc,char * argv[])211 int main(int argc, char *argv[])
212 {
213   int c, status = 200;
214 
215   while ((c = getopt(argc, argv, "b:")) != -1)
216     switch (c) {
217     case 'b': base = optarg; break;
218     default: usage(argv[0]);
219     }
220 
221   if (optind < argc - 1) usage(argv[0]);
222   else if (optind > argc - 1 || eq(argv[optind], "-")) yyin = stdin;
223   else yyin = fopenurl(argv[optind], "r", &status);
224 
225   if (!yyin) {perror(argv[optind]); exit(2);}
226   if (status != 200) errexit("%s : %s\n", argv[optind], http_strerror(status));
227 
228   /* Bind the parser callback routines to our handlers */
229   set_error_handler(handle_error);
230   set_start_handler(start);
231   set_end_handler(end);
232   set_comment_handler(handle_comment);
233   set_text_handler(handle_text);
234   set_decl_handler(handle_decl);
235   set_pi_handler(handle_pi);
236   set_starttag_handler(handle_starttag);
237   set_emptytag_handler(handle_emptytag);
238   set_endtag_handler(handle_endtag);
239 
240   if (yyparse() != 0) {exit(3);}
241   return has_error ? 1 : 0;
242 }
243