1 %option 8bit nodefault noyywrap nounput
2 /* %option yylineno */
3
4 %{
5 /*
6 * Copyright © 1997-2017 World Wide Web Consortium
7 * See http://www.w3.org/Consortium/Legal/copyright-software
8 *
9 * Author: Bert Bos <bert@w3.org>
10 * Created: 1997
11 **/
12 #include "config.h"
13 #include <assert.h>
14
15 #if HAVE_STRING_H
16 # include <string.h>
17 #elif HAVE_STRINGS_H
18 # include <strings.h>
19 #endif
20 #if !HAVE_STRDUP
21 # include "strdup.e"
22 #endif
23 #include <stdlib.h>
24 #include <ctype.h>
25 #include <stdbool.h>
26 #include "export.h"
27 #include "types.e"
28 #include "heap.e"
29 #include "html.h"
30 #include "html.e"
31 #include "errexit.e"
32
33
34 EXPORT extern FILE *yyin;
35 string yyin_name = NULL;
36
37 string cur_cdata_element = NULL;
38
39 typedef struct _Stack {
40 YY_BUFFER_STATE buf;
41 FILE *f;
42 string name;
43 struct _Stack *next;
44 } *Stack;
45
46 static Stack stack = NULL;
47
48
49 /* set_yyin -- routine to set yyin and store its file name */
set_yyin(FILE * f,const conststring name)50 EXPORT void set_yyin(FILE *f, const conststring name)
51 {
52 yyin = f;
53 free(yyin_name);
54 yyin_name = newstring(name);
55 }
56
57 /* get_yyin_name -- return the name of the current input, if known */
get_yyin_name(void)58 EXPORT conststring get_yyin_name(void)
59 {
60 return yyin_name;
61 }
62
63 /* include_file -- stack current file and switch to another one */
include_file(FILE * f,const conststring name)64 EXPORT void include_file(FILE *f, const conststring name)
65 {
66 Stack h;
67
68 new(h);
69 h->buf = YY_CURRENT_BUFFER;
70 h->f = f;
71 h->name = yyin_name;
72 h->next = stack;
73 stack = h;
74 yyin_name = newstring(name);
75 yy_switch_to_buffer(yy_create_buffer(f, YY_BUF_SIZE));
76 }
77
78 /* pop_file -- back to previous input file */
pop_file(void)79 static bool pop_file(void)
80 {
81 Stack h;
82
83 if (!stack) {
84 return false;
85 } else {
86 h = stack;
87 yy_delete_buffer(YY_CURRENT_BUFFER);
88 fclose(h->f);
89 free(yyin_name);
90 yyin_name = h->name;
91 yy_switch_to_buffer(h->buf);
92 stack = h->next;
93 dispose(h);
94 return true;
95 }
96 }
97
98 /* esc -- remove outer quotes, escape ", remove \n, return malloc'ed string */
esc(string s)99 static string esc(string s)
100 {
101 int i, j;
102 string u;
103
104 /* Find new length */
105 for (i = 0, j = 1; s[j] != s[0]; i++, j++) {
106 if (s[j] == '"' || s[j] == '<' || s[j] == '>') i+= 4;
107 }
108 /* Copy and expand */
109 u = malloc(i + 1);
110 if (!u) errexit("Out of memory\n");
111 for (i = 0, j = 1; s[j] != s[0]; i++, j++) {
112 if (s[j] == '"') {strcpy(u + i, """); i += 4;}
113 else if (s[j] == '<') {strcpy(u + i, "<"); i += 4;}
114 else if (s[j] == '>') {strcpy(u + i, ">"); i += 4;}
115 else if (s[j] == '\n') u[i] = ' '; /* \n */
116 else if (s[j] == '\r' && s[j+1] == '\n') {u[i] = ' '; j++;} /* \r\n */
117 else if (s[j] == '\r') {u[i] = ' ';} /* \r */
118 else u[i] = s[j];
119 }
120 u[i] = '\0';
121 return u;
122 }
123
124 #ifndef HAVE_STRNDUP
125
126 /* strndup -- allocate a string, copy n characters into it and add \0 */
strndup(const string s,size_t n)127 static string strndup(const string s, size_t n)
128 {
129 string t = malloc(n + 1);
130 if (!t) errexit("Out of memory\n");
131 strncpy(t, s, n);
132 t[n] = '\0';
133 return t;
134 }
135
136 #else
137 # ifndef strndup
138
139 /* We know strndup() exists (HAVE_STRNDUP) and it is not defined as a
140 macro (!strndup), but older versions of string.h do not provide the
141 declaration, so let's declare it here to be sure. */
142
143 extern char *strndup(const char *s, size_t n);
144
145 # endif
146 #endif
147
148 /* lns -- count newlines */
lns(const string t)149 static void lns(const string t)
150 {
151 string s = t;
152
153 while (*s) {
154 if (*s == '\n') lineno++;
155 else if (*s != '\r') ;
156 else if (*(s+1) == '\n') {lineno++; s++;}
157 else lineno++;
158 s++;
159 }
160 }
161
162 %}
163
164 /* thing is rather too permissive, but it will accept <img src=/path>... */
165
166 nondelim [^ \t\r\n\f"'<>]
167 name (\{[^} \t\r\n\f]*\})?[a-zA-Z0-9:._\200-\377-]+
168 thing {nondelim}+
169 comment "<!--"([^-]|-[^-]|--[^>])*"-->"
170 data [^<\r\n]+
171 doctype <![Dd][Oo][Cc][Tt][Yy][Pp][Ee][ \t\r\n\f]
172 nl \n|\r\n|\r
173 cdata <!\[[Cc][Dd][Aa][Tt][Aa]\[([^]]|\][^]]|\]\][^>])*\]\]>
174
175 %s MARKUP VALUE DECL INIT CDATA
176
177 %%
178
179
180 <INITIAL>\357\273\277 {BEGIN(INIT); /* Byte Order Mark is ignored */}
181
182 <INITIAL,INIT>"<"{name} {BEGIN(MARKUP); yylval.s=strdup(yytext+1); return START;}
183 <INITIAL,INIT>"</"({name})? {BEGIN(MARKUP); yylval.s=strdup(yytext+2); return END;}
184 <INITIAL,INIT>{data} {yylval.s=strdup(yytext); return TEXT;}
185 <INITIAL,INIT>{cdata} {yylval.s=strdup(yytext); lns(yytext); return TEXT;}
186 <INITIAL,INIT>{nl} {yylval.s=strdup(yytext); lineno++; return TEXT;}
187 <INITIAL,INIT>{comment} {yylval.s=strndup(yytext+4,yyleng-7); lns(yytext); return COMMENT;}
188 <INITIAL,INIT>{doctype} {BEGIN(DECL); lns(yytext+9); return DOCTYPE;}
189 <INITIAL,INIT>"<?"[^>]*">" {yylval.s=strndup(yytext+2,yyleng-3); lns(yytext); return PROCINS;}
190 <INITIAL,INIT>"<" {yylval.s=strdup("<"); return TEXT;}
191
192 <MARKUP>{name} {yylval.s = strdup(yytext); return NAME;}
193 <MARKUP>"=" {BEGIN(VALUE); return '=';}
194 <MARKUP>[ \t\f]+ {; /* skip */}
195 <MARKUP>{nl} {lineno++; /* skip */}
196 <MARKUP>">" {BEGIN(INIT); return '>';}
197 <MARKUP>"/>" {BEGIN(INIT); return EMPTYEND;}
198 <MARKUP>"<" {BEGIN(INIT); yyless(0); return '>'; /* Implicit ">" */}
199
200 <VALUE>[ \t\f]+ {; /* skip */}
201 <VALUE>{nl} {lineno++; /* skip */}
202 <VALUE>{thing} {BEGIN(MARKUP); yylval.s=strdup(yytext); return NAME;}
203 <VALUE>\"[^"]*\" |
204 <VALUE>\'[^']*\' {BEGIN(MARKUP); yylval.s=esc(yytext); lns(yytext); return STRING;}
205
206 <DECL>{name} {yylval.s = strdup(yytext); return NAME;}
207 <DECL>[ \t\f]+ {; /* skip */}
208 <DECL>{nl} {lineno++; /* skip */}
209 <DECL>\"[^"]*\" |
210 <DECL>\'[^']*\' {lns(yytext); yylval.s = esc(yytext); return STRING;}
211 <DECL>">" {BEGIN(INIT); return '>';}
212
213 <CDATA>([^<]|\<[^/]|\<\/[^{a-zA-Z:._-])* {lns(yytext); yylval.s = strdup(yytext); return TEXT;}
214 <CDATA>"</"{name} {lns(yytext);
215 if (strcasecmp(yytext+2, cur_cdata_element) == 0) {
216 BEGIN(MARKUP);
217 yylval.s = strdup(yytext+2);
218 return END;
219 } else {
220 yylval.s = strdup(yytext);
221 return TEXT;
222 }
223 }
224
225 . {return *yytext; /* illegal char, in fact */}
226
227 <<EOF>> {if (pop_file()) return ENDINCL; else yyterminate();}
228
229 %%
230
231 /* set_cdata_element -- set parsing rule for an element with CDATA content */
232 EXPORT void set_cdata_element(const conststring e)
233 {
234 dispose(cur_cdata_element);
235 cur_cdata_element = newstring(e);
236 BEGIN(CDATA);
237 }
238
239 /*
240 * Local variables:
241 * mode: indented-text
242 * End:
243 */
244