1 %option 8bit nodefault noyywrap nounput
2 /* %option yylineno */
3 
4 %{
5 /*
6  * Copyright © 1997-2017 World Wide Web Consortium
7  * See http://www.w3.org/Consortium/Legal/copyright-software
8  *
9  * Author: Bert Bos <bert@w3.org>
10  * Created: 1997
11  **/
12 #include "config.h"
13 #include <assert.h>
14 
15 #if HAVE_STRING_H
16 #  include <string.h>
17 #elif HAVE_STRINGS_H
18 #  include <strings.h>
19 #endif
20 #if !HAVE_STRDUP
21 #  include "strdup.e"
22 #endif
23 #include <stdlib.h>
24 #include <ctype.h>
25 #include <stdbool.h>
26 #include "export.h"
27 #include "types.e"
28 #include "heap.e"
29 #include "html.h"
30 #include "html.e"
31 #include "errexit.e"
32 
33 
34 EXPORT extern FILE *yyin;
35 string yyin_name = NULL;
36 
37 string cur_cdata_element = NULL;
38 
39 typedef struct _Stack {
40   YY_BUFFER_STATE buf;
41   FILE *f;
42   string name;
43   struct _Stack *next;
44 } *Stack;
45 
46 static Stack stack = NULL;
47 
48 
49 /* set_yyin -- routine to set yyin and store its file name */
set_yyin(FILE * f,const conststring name)50 EXPORT void set_yyin(FILE *f, const conststring name)
51 {
52   yyin = f;
53   free(yyin_name);
54   yyin_name = newstring(name);
55 }
56 
57 /* get_yyin_name -- return the name of the current input, if known */
get_yyin_name(void)58 EXPORT conststring get_yyin_name(void)
59 {
60   return yyin_name;
61 }
62 
63 /* include_file -- stack current file and switch to another one */
include_file(FILE * f,const conststring name)64 EXPORT void include_file(FILE *f, const conststring name)
65 {
66   Stack h;
67 
68   new(h);
69   h->buf = YY_CURRENT_BUFFER;
70   h->f = f;
71   h->name = yyin_name;
72   h->next = stack;
73   stack = h;
74   yyin_name = newstring(name);
75   yy_switch_to_buffer(yy_create_buffer(f, YY_BUF_SIZE));
76 }
77 
78 /* pop_file -- back to previous input file */
pop_file(void)79 static bool pop_file(void)
80 {
81   Stack h;
82 
83   if (!stack) {
84     return false;
85   } else {
86     h = stack;
87     yy_delete_buffer(YY_CURRENT_BUFFER);
88     fclose(h->f);
89     free(yyin_name);
90     yyin_name = h->name;
91     yy_switch_to_buffer(h->buf);
92     stack = h->next;
93     dispose(h);
94     return true;
95   }
96 }
97 
98 /* esc -- remove outer quotes, escape ", remove \n, return malloc'ed string */
esc(string s)99 static string esc(string s)
100 {
101   int i, j;
102   string u;
103 
104   /* Find new length */
105   for (i = 0, j = 1; s[j] != s[0]; i++, j++) {
106     if (s[j] == '"' || s[j] == '<' || s[j] == '>') i+= 4;
107   }
108   /* Copy and expand */
109   u = malloc(i + 1);
110   if (!u) errexit("Out of memory\n");
111   for (i = 0, j = 1; s[j] != s[0]; i++, j++) {
112     if (s[j] == '"')  {strcpy(u + i, "&#34;"); i += 4;}
113     else if (s[j] == '<')  {strcpy(u + i, "&#60;"); i += 4;}
114     else if (s[j] == '>')  {strcpy(u + i, "&#62;"); i += 4;}
115     else if (s[j] == '\n') u[i] = ' ';		/* \n */
116     else if (s[j] == '\r' && s[j+1] == '\n') {u[i] = ' '; j++;}	/* \r\n */
117     else if (s[j] == '\r') {u[i] = ' ';}	/* \r */
118     else u[i] = s[j];
119   }
120   u[i] = '\0';
121   return u;
122 }
123 
124 #ifndef HAVE_STRNDUP
125 
126 /* strndup -- allocate a string, copy n characters into it and add \0 */
strndup(const string s,size_t n)127 static string strndup(const string s, size_t n)
128 {
129   string t = malloc(n + 1);
130   if (!t) errexit("Out of memory\n");
131   strncpy(t, s, n);
132   t[n] = '\0';
133   return t;
134 }
135 
136 #else
137 # ifndef strndup
138 
139 /* We know strndup() exists (HAVE_STRNDUP) and it is not defined as a
140 macro (!strndup), but older versions of string.h do not provide the
141 declaration, so let's declare it here to be sure. */
142 
143 extern char *strndup(const char *s, size_t n);
144 
145 # endif
146 #endif
147 
148 /* lns -- count newlines */
lns(const string t)149 static void lns(const string t)
150 {
151   string s = t;
152 
153   while (*s) {
154     if (*s == '\n') lineno++;
155     else if (*s != '\r') ;
156     else if (*(s+1) == '\n') {lineno++; s++;}
157     else lineno++;
158     s++;
159   }
160 }
161 
162 %}
163 
164 /* thing is rather too permissive, but it will accept <img src=/path>... */
165 
166 nondelim	[^ \t\r\n\f"'<>]
167 name		(\{[^} \t\r\n\f]*\})?[a-zA-Z0-9:._\200-\377-]+
168 thing		{nondelim}+
169 comment		"<!--"([^-]|-[^-]|--[^>])*"-->"
170 data		[^<\r\n]+
171 doctype		<![Dd][Oo][Cc][Tt][Yy][Pp][Ee][ \t\r\n\f]
172 nl		\n|\r\n|\r
173 cdata		<!\[[Cc][Dd][Aa][Tt][Aa]\[([^]]|\][^]]|\]\][^>])*\]\]>
174 
175 %s MARKUP VALUE DECL INIT CDATA
176 
177 %%
178 
179 
180 <INITIAL>\357\273\277		{BEGIN(INIT); /* Byte Order Mark is ignored */}
181 
182 <INITIAL,INIT>"<"{name}		{BEGIN(MARKUP); yylval.s=strdup(yytext+1); return START;}
183 <INITIAL,INIT>"</"({name})?	{BEGIN(MARKUP); yylval.s=strdup(yytext+2); return END;}
184 <INITIAL,INIT>{data}		{yylval.s=strdup(yytext); return TEXT;}
185 <INITIAL,INIT>{cdata}		{yylval.s=strdup(yytext); lns(yytext); return TEXT;}
186 <INITIAL,INIT>{nl}		{yylval.s=strdup(yytext); lineno++; return TEXT;}
187 <INITIAL,INIT>{comment}	{yylval.s=strndup(yytext+4,yyleng-7); lns(yytext); return COMMENT;}
188 <INITIAL,INIT>{doctype}	{BEGIN(DECL); lns(yytext+9); return DOCTYPE;}
189 <INITIAL,INIT>"<?"[^>]*">"	{yylval.s=strndup(yytext+2,yyleng-3); lns(yytext); return PROCINS;}
190 <INITIAL,INIT>"<"		{yylval.s=strdup("&lt;"); return TEXT;}
191 
192 <MARKUP>{name}		{yylval.s = strdup(yytext); return NAME;}
193 <MARKUP>"="		{BEGIN(VALUE); return '=';}
194 <MARKUP>[ \t\f]+	{; /* skip */}
195 <MARKUP>{nl}		{lineno++; /* skip */}
196 <MARKUP>">"		{BEGIN(INIT); return '>';}
197 <MARKUP>"/>"		{BEGIN(INIT); return EMPTYEND;}
198 <MARKUP>"<"		{BEGIN(INIT); yyless(0); return '>'; /* Implicit ">" */}
199 
200 <VALUE>[ \t\f]+		{; /* skip */}
201 <VALUE>{nl}		{lineno++; /* skip */}
202 <VALUE>{thing}		{BEGIN(MARKUP); yylval.s=strdup(yytext); return NAME;}
203 <VALUE>\"[^"]*\"	|
204 <VALUE>\'[^']*\'	{BEGIN(MARKUP); yylval.s=esc(yytext); lns(yytext); return STRING;}
205 
206 <DECL>{name}		{yylval.s = strdup(yytext); return NAME;}
207 <DECL>[ \t\f]+		{; /* skip */}
208 <DECL>{nl}		{lineno++; /* skip */}
209 <DECL>\"[^"]*\"		|
210 <DECL>\'[^']*\'		{lns(yytext); yylval.s = esc(yytext); return STRING;}
211 <DECL>">"		{BEGIN(INIT); return '>';}
212 
213 <CDATA>([^<]|\<[^/]|\<\/[^{a-zA-Z:._-])* {lns(yytext); yylval.s = strdup(yytext); return TEXT;}
214 <CDATA>"</"{name}	{lns(yytext);
215 			 if (strcasecmp(yytext+2, cur_cdata_element) == 0) {
216 			   BEGIN(MARKUP);
217 			   yylval.s = strdup(yytext+2);
218 			   return END;
219 			 } else {
220 			   yylval.s = strdup(yytext);
221 			   return TEXT;
222 			 }
223 			}
224 
225 .			{return *yytext; /* illegal char, in fact */}
226 
227 <<EOF>>			{if (pop_file()) return ENDINCL; else yyterminate();}
228 
229 %%
230 
231 /* set_cdata_element -- set parsing rule for an element with CDATA content */
232 EXPORT void set_cdata_element(const conststring e)
233 {
234   dispose(cur_cdata_element);
235   cur_cdata_element = newstring(e);
236   BEGIN(CDATA);
237 }
238 
239 /*
240  * Local variables:
241  * mode: indented-text
242  * End:
243  */
244