1 /* inputstack.h
2 
3    Copyright 2009 Taco Hoekwater <taco@luatex.org>
4 
5    This file is part of LuaTeX.
6 
7    LuaTeX is free software; you can redistribute it and/or modify it under
8    the terms of the GNU General Public License as published by the Free
9    Software Foundation; either version 2 of the License, or (at your
10    option) any later version.
11 
12    LuaTeX is distributed in the hope that it will be useful, but WITHOUT
13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15    License for more details.
16 
17    You should have received a copy of the GNU General Public License along
18    with LuaTeX; if not, see <http://www.gnu.org/licenses/>. */
19 
20 
21 #ifndef INPUTSTACK_H
22 #  define INPUTSTACK_H 1
23 
24 /*
25   The state of \TeX's input mechanism appears in the input stack, whose
26   entries are records with six fields, called |state|, |index|, |start|, |loc|,
27   |limit|, and |name|.
28 */
29 
30 typedef struct in_state_record {
31     halfword start_field;
32     halfword loc_field;
33     halfword limit_field;
34     halfword name_field;
35     int synctex_tag_field;      /* stack the tag of the current file */
36     signed int cattable_field:16;       /* category table used by the current line (see textoken.c) */
37     quarterword state_field:8;
38     quarterword index_field:8;
39     boolean partial_field:8;    /* is the current line partial? (see textoken.c) */
40     boolean nofilter_field:8;   /* used by token filtering */
41 } in_state_record;
42 
43 extern in_state_record *input_stack;
44 extern int input_ptr;
45 extern int max_in_stack;
46 extern in_state_record cur_input;       /* the ``top'' input state */
47 
48 #  define  iloc cur_input.loc_field     /* location of first unread character in |buffer| */
49 #  define  istate cur_input.state_field /* current scanner state */
50 #  define  iindex cur_input.index_field /* reference for buffer information */
51 #  define  istart cur_input.start_field /* starting position in |buffer| */
52 #  define  ilimit cur_input.limit_field /* end of current line in |buffer| */
53 #  define  iname cur_input.name_field   /* name of the current file  */
54 #  define  nofilter cur_input.nofilter_field    /* is token filtering explicitly disallowed? */
55 #  define  synctex_tag cur_input.synctex_tag_field      /* tag of the current file */
56 #  define  line_catcode_table cur_input.cattable_field
57 #  define  line_partial cur_input.partial_field
58 
59 /*
60 Let's look more closely now at the control variables
61 (|state|,~|index|,~|start|,~|loc|,~|limit|,~|name|),
62 assuming that \TeX\ is reading a line of characters that have been input
63 from some file or from the user's terminal. There is an array called
64 |buffer| that acts as a stack of all lines of characters that are
65 currently being read from files, including all lines on subsidiary
66 levels of the input stack that are not yet completed. \TeX\ will return to
67 the other lines when it is finished with the present input file.
68 
69 (Incidentally, on a machine with byte-oriented addressing, it might be
70 appropriate to combine |buffer| with the |str_pool| array,
71 letting the buffer entries grow downward from the top of the string pool
72 and checking that these two tables don't bump into each other.)
73 
74 The line we are currently working on begins in position |start| of the
75 buffer; the next character we are about to read is |buffer[loc]|; and
76 |limit| is the location of the last character present.  If |loc>limit|,
77 the line has been completely read. Usually |buffer[limit]| is the
78 |end_line_char|, denoting the end of a line, but this is not
79 true if the current line is an insertion that was entered on the user's
80 terminal in response to an error message.
81 
82 The |name| variable is a string number that designates the name of
83 the current file, if we are reading a text file. It is zero if we
84 are reading from the terminal; it is |n+1| if we are reading from
85 input stream |n|, where |0<=n<=16|. (Input stream 16 stands for
86 an invalid stream number; in such cases the input is actually from
87 the terminal, under control of the procedure |read_toks|.)
88 Finally |18<=name<=20| indicates that we are reading a pseudo file
89 created by the \.{\\scantokens} or \.{\\scantextokens} command.
90 
91 The |state| variable has one of three values, when we are scanning such
92 files:
93 $$\baselineskip 15pt\vbox{\halign{#\hfil\cr
94 1) |state=mid_line| is the normal state.\cr
95 2) |state=skip_blanks| is like |mid_line|, but blanks are ignored.\cr
96 3) |state=new_line| is the state at the beginning of a line.\cr}}$$
97 These state values are assigned numeric codes so that if we add the state
98 code to the next character's command code, we get distinct values. For
99 example, `|mid_line+spacer|' stands for the case that a blank
100 space character occurs in the middle of a line when it is not being
101 ignored; after this case is processed, the next value of |state| will
102 be |skip_blanks|.
103 */
104 
105 #  define max_char_code 15      /* largest catcode for individual characters */
106 
107 typedef enum {
108     mid_line = 1,               /* |state| code when scanning a line of characters */
109     skip_blanks = 2 + max_char_code,    /* |state| code when ignoring blanks */
110     new_line = 3 + max_char_code + max_char_code,       /* |state| code at start of line */
111 } state_codes;
112 
113 /*
114 Additional information about the current line is available via the
115 |index| variable, which counts how many lines of characters are present
116 in the buffer below the current level. We have |index=0| when reading
117 from the terminal and prompting the user for each line; then if the user types,
118 e.g., `\.{\\input paper}', we will have |index=1| while reading
119 the file \.{paper.tex}. However, it does not follow that |index| is the
120 same as the input stack pointer, since many of the levels on the input
121 stack may come from token lists. For example, the instruction `\.{\\input
122 paper}' might occur in a token list.
123 
124 The global variable |in_open| is equal to the |index|
125 value of the highest non-token-list level. Thus, the number of partially read
126 lines in the buffer is |in_open+1|, and we have |in_open=index|
127 when we are not reading a token list.
128 
129 If we are not currently reading from the terminal, or from an input
130 stream, we are reading from the file variable |input_file[index]|. We use
131 the notation |terminal_input| as a convenient abbreviation for |name=0|,
132 and |cur_file| as an abbreviation for |input_file[index]|.
133 
134 The global variable |line| contains the line number in the topmost
135 open file, for use in error messages. If we are not reading from
136 the terminal, |line_stack[index]| holds the line number for the
137 enclosing level, so that |line| can be restored when the current
138 file has been read. Line numbers should never be negative, since the
139 negative of the current line number is used to identify the user's output
140 routine in the |mode_line| field of the semantic nest entries.
141 
142 If more information about the input state is needed, it can be
143 included in small arrays like those shown here. For example,
144 the current page or segment number in the input file might be
145 put into a variable |@!page|, maintained for enclosing levels in
146 `\ignorespaces|@!page_stack:array[1..max_in_open] of integer|\unskip'
147 by analogy with |line_stack|.
148 @^system dependencies@>
149 */
150 
151 #  define terminal_input (iname==0)     /* are we reading from the terminal? */
152 #  define cur_file input_file[iindex]   /* the current |alpha_file| variable */
153 
154 extern int in_open;
155 extern int open_parens;
156 extern alpha_file *input_file;
157 extern int line;
158 extern int *line_stack;
159 extern str_number *source_filename_stack;
160 extern char **full_source_filename_stack;
161 
162 /*
163 Users of \TeX\ sometimes forget to balance left and right braces properly,
164 and one of the ways \TeX\ tries to spot such errors is by considering an
165 input file as broken into subfiles by control sequences that
166 are declared to be \.{\\outer}.
167 
168 A variable called |scanner_status| tells \TeX\ whether or not to complain
169 when a subfile ends. This variable has six possible values:
170 
171 \yskip\hang|normal|, means that a subfile can safely end here without incident.
172 
173 \yskip\hang|skipping|, means that a subfile can safely end here, but not a file,
174 because we're reading past some conditional text that was not selected.
175 
176 \yskip\hang|defining|, means that a subfile shouldn't end now because a
177 macro is being defined.
178 
179 \yskip\hang|matching|, means that a subfile shouldn't end now because a
180 macro is being used and we are searching for the end of its arguments.
181 
182 \yskip\hang|aligning|, means that a subfile shouldn't end now because we are
183 not finished with the preamble of an \.{\\halign} or \.{\\valign}.
184 
185 \yskip\hang|absorbing|, means that a subfile shouldn't end now because we are
186 reading a balanced token list for \.{\\message}, \.{\\write}, etc.
187 
188 \yskip\noindent
189 If the |scanner_status| is not |normal|, the variable |warning_index| points
190 to the |eqtb| location for the relevant control sequence name to print
191 in an error message.
192 */
193 
194 typedef enum {
195     skipping = 1,               /* |scanner_status| when passing conditional text */
196     defining = 2,               /* |scanner_status| when reading a macro definition */
197     matching = 3,               /* |scanner_status| when reading macro arguments */
198     aligning = 4,               /* |scanner_status| when reading an alignment preamble */
199     absorbing = 5,              /* |scanner_status| when reading a balanced text */
200 } scanner_states;
201 
202 extern int scanner_status;
203 extern pointer warning_index;
204 extern pointer def_ref;
205 
206 extern void runaway(void);
207 
208 /*
209 However, the discussion about input state really applies only to the
210 case that we are inputting from a file. There is another important case,
211 namely when we are currently getting input from a token list. In this case
212 |state=token_list|, and the conventions about the other state variables
213 are different:
214 
215 \yskip\hang|loc| is a pointer to the current node in the token list, i.e.,
216 the node that will be read next. If |loc=null|, the token list has been
217 fully read.
218 
219 \yskip\hang|start| points to the first node of the token list; this node
220 may or may not contain a reference count, depending on the type of token
221 list involved.
222 
223 \yskip\hang|token_type|, which takes the place of |index| in the
224 discussion above, is a code number that explains what kind of token list
225 is being scanned.
226 
227 \yskip\hang|name| points to the |eqtb| address of the control sequence
228 being expanded, if the current token list is a macro.
229 
230 \yskip\hang|param_start|, which takes the place of |limit|, tells where
231 the parameters of the current macro begin in the |param_stack|, if the
232 current token list is a macro.
233 
234 \yskip\noindent The |token_type| can take several values, depending on
235 where the current token list came from:
236 
237 \yskip\hang|parameter|, if a parameter is being scanned;
238 
239 \hang|u_template|, if the \<u_j> part of an alignment
240 template is being scanned;
241 
242 \hang|v_template|, if the \<v_j> part of an alignment
243 template is being scanned;
244 
245 \hang|backed_up|, if the token list being scanned has been inserted as
246 `to be read again'.
247 
248 \hang|inserted|, if the token list being scanned has been inserted as
249 the text expansion of a \.{\\count} or similar variable;
250 
251 \hang|macro|, if a user-defined control sequence is being scanned;
252 
253 \hang|output_text|, if an \.{\\output} routine is being scanned;
254 
255 \hang|every_par_text|, if the text of \.{\\everypar} is being scanned;
256 
257 \hang|every_math_text|, if the text of \.{\\everymath} is being scanned;
258 
259 \hang|every_display_text|, if the text of \.{\\everydisplay} is being scanned;
260 
261 \hang|every_hbox_text|, if the text of \.{\\everyhbox} is being scanned;
262 
263 \hang|every_vbox_text|, if the text of \.{\\everyvbox} is being scanned;
264 
265 \hang|every_job_text|, if the text of \.{\\everyjob} is being scanned;
266 
267 \hang|every_cr_text|, if the text of \.{\\everycr} is being scanned;
268 
269 \hang|mark_text|, if the text of a \.{\\mark} is being scanned;
270 
271 \hang|write_text|, if the text of a \.{\\write} is being scanned.
272 
273 \yskip\noindent
274 The codes for |output_text|, |every_par_text|, etc., are equal to a constant
275 plus the corresponding codes for token list parameters |output_routine_loc|,
276 |every_par_loc|, etc.
277 
278 The token list begins with a reference count if and
279 only if |token_type>=macro|.
280 @^reference counts@>
281 
282 Since \eTeX's additional token list parameters precede |toks_base|, the
283 corresponding token types must precede |write_text|.
284 */
285 
286 #  define token_list 0          /* |state| code when scanning a token list */
287 #  define token_type iindex     /* type of current token list */
288 #  define param_start ilimit    /* base of macro parameters in |param_stack| */
289 
290 
291 typedef enum {
292     parameter = 0,              /* |token_type| code for parameter */
293     u_template = 1,             /* |token_type| code for \<u_j> template */
294     v_template = 2,             /* |token_type| code for \<v_j> template */
295     backed_up = 3,              /* |token_type| code for text to be reread */
296     inserted = 4,               /* |token_type| code for inserted texts */
297     macro = 5,                  /* |token_type| code for defined control sequences */
298     output_text = 6,            /* |token_type| code for output routines */
299     every_par_text = 7,         /* |token_type| code for \.{\\everypar} */
300     every_math_text = 8,        /* |token_type| code for \.{\\everymath} */
301     every_display_text = 9,     /* |token_type| code for \.{\\everydisplay} */
302     every_hbox_text = 10,       /* |token_type| code for \.{\\everyhbox} */
303     every_vbox_text = 11,       /* |token_type| code for \.{\\everyvbox} */
304     every_job_text = 12,        /* |token_type| code for \.{\\everyjob} */
305     every_cr_text = 13,         /* |token_type| code for \.{\\everycr} */
306     mark_text = 14,             /* |token_type| code for \.{\\topmark}, etc. */
307     every_eof_text = 15,        /* |token_type| code for \.{\\everyeof} */
308     write_text = 16,            /* |token_type| code for \.{\\write} */
309 } token_types;
310 
311 extern pointer *param_stack;
312 extern int param_ptr;
313 extern int max_param_stack;
314 
315 extern int align_state;
316 
317 extern int base_ptr;
318 
319 extern void show_context(void);
320 extern void set_trick_count(void);
321 
322 /* leave an input level, re-enter the old */
323 #  define pop_input() cur_input=input_stack[--input_ptr]
324 
325 extern void push_input(void);
326 
327 #  define back_list(A) begin_token_list(A,backed_up)    /* backs up a simple token list */
328 #  define ins_list(A) begin_token_list(A,inserted)      /* inserts a simple token list */
329 
330 extern void begin_token_list(halfword p, quarterword t);
331 extern void end_token_list(void);
332 extern void back_input(void);
333 extern int reinsert_token(boolean a, halfword pp);
334 extern void begin_file_reading(void);
335 extern void end_file_reading(void);
336 extern void clear_for_error_prompt(void);
337 
338 extern void initialize_inputstack(void);
339 
340 extern halfword pseudo_files;
341 extern void pseudo_from_string(void);
342 extern void pseudo_start(void);
343 extern void lua_string_start(void);
344 extern boolean pseudo_input(void);
345 extern void pseudo_close(void);
346 
347 
348 #endif
349