1 /*
2 * Count elements and attributes.
3 *
4 * This counts occurrences of elements and element/attribute pairs.
5 * This is just an example of how to use the parser.
6 * No attempt is made to count efficiently.
7 *
8 * Copyright © 1994-2000 World Wide Web Consortium
9 * See http://www.w3.org/Consortium/Legal/copyright-software
10 *
11 * Bert Bos
12 * Created Nov 1998
13 * $Id: hxcount.c,v 1.5 2017/11/24 09:50:25 bbos Exp $
14 */
15 #include "config.h"
16 #include <stdio.h>
17 #ifdef HAVE_UNISTD_H
18 # include <unistd.h>
19 #endif
20 #include <ctype.h>
21 #if STDC_HEADERS
22 # include <string.h>
23 #else
24 # ifndef HAVE_STRCHR
25 # define strchr index
26 # define strrchr rindex
27 # endif
28 # ifndef HAVE_STRdup
29 # include "strdup.e"
30 # endif
31 #endif
32 #include <stdlib.h>
33 #include <assert.h>
34 #include <stdbool.h>
35 #include "export.h"
36 #include "types.e"
37 #include "html.e"
38 #include "scan.e"
39 #include "dict.e"
40 #include "openurl.e"
41 #include "errexit.e"
42
43 typedef struct _pair {
44 char *name;
45 int count;
46 } pair;
47
48 static pair *freq = NULL;
49 static int nrelems = 0;
50 static bool has_errors = false;
51
52
53 /* countstring -- add 1 to number of occurences for s (case-insensitively) */
countstring(char * s)54 static void countstring(char *s)
55 {
56 int i;
57
58 i = 0;
59 while (i < nrelems && strcasecmp(freq[i].name, s) != 0) i++;
60 if (i == nrelems) {
61 nrelems++;
62 freq = realloc(freq, nrelems * sizeof(freq[0]));
63 if (freq == NULL) {fprintf(stderr, "Out of memory\n"); exit(4);}
64 freq[i].name = strdup(s);
65 freq[i].count = 0;
66 }
67 freq[i].count++;
68 }
69
70 /* count -- count element types and their attributes */
count(char * name,pairlist attribs)71 static void count(char *name, pairlist attribs)
72 {
73 /* Count element name */
74 countstring(name);
75
76 /* Count attribute names (or rather, the strings "elem/attrib") */
77 for (; attribs != NULL; attribs = attribs->next) {
78 char *s = malloc(strlen(name) + strlen(attribs->name) + 2);
79 if (s == NULL) {fprintf(stderr, "Out of memory\n"); exit(4);}
80 strcat(strcat(strcpy(s, name), "/"), attribs->name);
81 countstring(s);
82 free(s);
83 }
84 }
85
86 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)87 void handle_error(void *clientdata, const string s, int lineno)
88 {
89 fprintf(stderr, "%d: %s\n", lineno, s);
90 has_errors = true;
91 }
92
93 /* start -- called before the first event is reported */
start(void)94 void* start(void) {return NULL;}
95
96 /* end -- called after the last event is reported */
end(void * clientdata)97 void end(void *clientdata) {}
98
99 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)100 void handle_comment(void *clientdata, string commenttext) {}
101
102 /* handle_text -- called after a tex chunk is parsed */
handle_text(void * clientdata,string text)103 void handle_text(void *clientdata, string text) {}
104
105 /* handle_declaration -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)106 void handle_decl(void *clientdata, string gi,
107 string fpi, string url) {}
108
109 /* handle_proc_instr -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)110 void handle_pi(void *clientdata, string pi_text) {}
111
112 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)113 void handle_starttag(void *clientdata, string name, pairlist attribs)
114 {
115 count(name, attribs);
116 }
117
118 /* handle_emptytag -- called after am empty tag is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)119 extern void handle_emptytag(void *clientdata, string name, pairlist attribs)
120 {
121 count(name, attribs);
122 }
123
124 /* handle_pop -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)125 extern void handle_endtag(void *clientdata, string name) {}
126
127 /* usage -- print usage message and exit */
usage(string prog)128 static void usage(string prog)
129 {
130 fprintf(stderr, "Version %s\n", VERSION);
131 fprintf(stderr, "Usage: %s [html-file]\n", prog);
132 exit(2);
133 }
134
135 /* main -- parse input, count elements and attributes of each type */
main(int argc,char * argv[])136 int main(int argc, char *argv[])
137 {
138 int i, status = 200;
139
140 /* Bind the parser callback routines to our handlers */
141 set_error_handler(handle_error);
142 set_start_handler(start);
143 set_end_handler(end);
144 set_comment_handler(handle_comment);
145 set_text_handler(handle_text);
146 set_decl_handler(handle_decl);
147 set_pi_handler(handle_pi);
148 set_starttag_handler(handle_starttag);
149 set_emptytag_handler(handle_emptytag);
150 set_endtag_handler(handle_endtag);
151
152 if (argc == 1) yyin = stdin;
153 else if (argc == 2) yyin = fopenurl(argv[1], "r", &status);
154 else usage(argv[0]);
155
156 if (yyin == NULL) {perror(argv[1]); exit(1);}
157 if (status != 200) errexit("%s : %s\n", argv[1], http_strerror(status));
158
159 /* Parse input */
160 if (yyparse() != 0) exit(3);
161
162 /* Print results */
163 for (i = 0; i < nrelems; i++)
164 printf("%6d\t%s\n", freq[i].count, freq[i].name);
165
166 return has_errors ? 1 : 0;
167 }
168