1 /*
2  * Count elements and attributes.
3  *
4  * This counts occurrences of elements and element/attribute pairs.
5  * This is just an example of how to use the parser.
6  * No attempt is made to count efficiently.
7  *
8  * Copyright © 1994-2000 World Wide Web Consortium
9  * See http://www.w3.org/Consortium/Legal/copyright-software
10  *
11  * Bert Bos
12  * Created Nov 1998
13  * $Id: hxcount.c,v 1.5 2017/11/24 09:50:25 bbos Exp $
14  */
15 #include "config.h"
16 #include <stdio.h>
17 #ifdef HAVE_UNISTD_H
18 #  include <unistd.h>
19 #endif
20 #include <ctype.h>
21 #if STDC_HEADERS
22 # include <string.h>
23 #else
24 # ifndef HAVE_STRCHR
25 #  define strchr index
26 #  define strrchr rindex
27 # endif
28 # ifndef HAVE_STRdup
29 #  include "strdup.e"
30 # endif
31 #endif
32 #include <stdlib.h>
33 #include <assert.h>
34 #include <stdbool.h>
35 #include "export.h"
36 #include "types.e"
37 #include "html.e"
38 #include "scan.e"
39 #include "dict.e"
40 #include "openurl.e"
41 #include "errexit.e"
42 
43 typedef struct _pair {
44   char *name;
45   int count;
46 } pair;
47 
48 static pair *freq = NULL;
49 static int nrelems = 0;
50 static bool has_errors = false;
51 
52 
53 /* countstring -- add 1 to number of occurences for s (case-insensitively) */
countstring(char * s)54 static void countstring(char *s)
55 {
56   int i;
57 
58   i = 0;
59   while (i < nrelems && strcasecmp(freq[i].name, s) != 0) i++;
60   if (i == nrelems) {
61     nrelems++;
62     freq = realloc(freq, nrelems * sizeof(freq[0]));
63     if (freq == NULL) {fprintf(stderr, "Out of memory\n"); exit(4);}
64     freq[i].name = strdup(s);
65     freq[i].count = 0;
66   }
67   freq[i].count++;
68 }
69 
70 /* count -- count element types and their attributes */
count(char * name,pairlist attribs)71 static void count(char *name, pairlist attribs)
72 {
73   /* Count element name */
74   countstring(name);
75 
76   /* Count attribute names (or rather, the strings "elem/attrib") */
77   for (; attribs != NULL; attribs = attribs->next) {
78     char *s = malloc(strlen(name) + strlen(attribs->name) + 2);
79     if (s == NULL) {fprintf(stderr, "Out of memory\n"); exit(4);}
80     strcat(strcat(strcpy(s, name), "/"), attribs->name);
81     countstring(s);
82     free(s);
83   }
84 }
85 
86 /* handle_error -- called when a parse error occurred */
handle_error(void * clientdata,const string s,int lineno)87 void handle_error(void *clientdata, const string s, int lineno)
88 {
89   fprintf(stderr, "%d: %s\n", lineno, s);
90   has_errors = true;
91 }
92 
93 /* start -- called before the first event is reported */
start(void)94 void* start(void) {return NULL;}
95 
96 /* end -- called after the last event is reported */
end(void * clientdata)97 void end(void *clientdata) {}
98 
99 /* handle_comment -- called after a comment is parsed */
handle_comment(void * clientdata,string commenttext)100 void handle_comment(void *clientdata, string commenttext) {}
101 
102 /* handle_text -- called after a tex chunk is parsed */
handle_text(void * clientdata,string text)103 void handle_text(void *clientdata, string text) {}
104 
105 /* handle_declaration -- called after a declaration is parsed */
handle_decl(void * clientdata,string gi,string fpi,string url)106 void handle_decl(void *clientdata, string gi,
107 		 string fpi, string url) {}
108 
109 /* handle_proc_instr -- called after a PI is parsed */
handle_pi(void * clientdata,string pi_text)110 void handle_pi(void *clientdata, string pi_text) {}
111 
112 /* handle_starttag -- called after a start tag is parsed */
handle_starttag(void * clientdata,string name,pairlist attribs)113 void handle_starttag(void *clientdata, string name, pairlist attribs)
114 {
115   count(name, attribs);
116 }
117 
118 /* handle_emptytag -- called after am empty tag is parsed */
handle_emptytag(void * clientdata,string name,pairlist attribs)119 extern void handle_emptytag(void *clientdata, string name, pairlist attribs)
120 {
121   count(name, attribs);
122 }
123 
124 /* handle_pop -- called after an endtag is parsed (name may be "") */
handle_endtag(void * clientdata,string name)125 extern void handle_endtag(void *clientdata, string name) {}
126 
127 /* usage -- print usage message and exit */
usage(string prog)128 static void usage(string prog)
129 {
130   fprintf(stderr, "Version %s\n", VERSION);
131   fprintf(stderr, "Usage: %s [html-file]\n", prog);
132   exit(2);
133 }
134 
135 /* main -- parse input, count elements and attributes of each type */
main(int argc,char * argv[])136 int main(int argc, char *argv[])
137 {
138   int i, status = 200;
139 
140   /* Bind the parser callback routines to our handlers */
141   set_error_handler(handle_error);
142   set_start_handler(start);
143   set_end_handler(end);
144   set_comment_handler(handle_comment);
145   set_text_handler(handle_text);
146   set_decl_handler(handle_decl);
147   set_pi_handler(handle_pi);
148   set_starttag_handler(handle_starttag);
149   set_emptytag_handler(handle_emptytag);
150   set_endtag_handler(handle_endtag);
151 
152   if (argc == 1) yyin = stdin;
153   else if (argc == 2) yyin = fopenurl(argv[1], "r", &status);
154   else usage(argv[0]);
155 
156   if (yyin == NULL) {perror(argv[1]); exit(1);}
157   if (status != 200) errexit("%s : %s\n", argv[1], http_strerror(status));
158 
159   /* Parse input */
160   if (yyparse() != 0) exit(3);
161 
162   /* Print results */
163   for (i = 0; i < nrelems; i++)
164     printf("%6d\t%s\n", freq[i].count, freq[i].name);
165 
166   return has_errors ? 1 : 0;
167 }
168