1 /* inputstack.h 2 3 Copyright 2009 Taco Hoekwater <taco@luatex.org> 4 5 This file is part of LuaTeX. 6 7 LuaTeX is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 2 of the License, or (at your 10 option) any later version. 11 12 LuaTeX is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15 License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with LuaTeX; if not, see <http://www.gnu.org/licenses/>. */ 19 20 21 #ifndef INPUTSTACK_H 22 # define INPUTSTACK_H 1 23 24 /* 25 The state of \TeX's input mechanism appears in the input stack, whose 26 entries are records with six fields, called |state|, |index|, |start|, |loc|, 27 |limit|, and |name|. 28 */ 29 30 typedef struct in_state_record { 31 halfword start_field; 32 halfword loc_field; 33 halfword limit_field; 34 halfword name_field; 35 int synctex_tag_field; /* stack the tag of the current file */ 36 signed int cattable_field:16; /* category table used by the current line (see textoken.c) */ 37 quarterword state_field:8; 38 quarterword index_field:8; 39 boolean partial_field:8; /* is the current line partial? (see textoken.c) */ 40 boolean nofilter_field:8; /* used by token filtering */ 41 } in_state_record; 42 43 extern in_state_record *input_stack; 44 extern int input_ptr; 45 extern int max_in_stack; 46 extern in_state_record cur_input; /* the ``top'' input state */ 47 48 # define iloc cur_input.loc_field /* location of first unread character in |buffer| */ 49 # define istate cur_input.state_field /* current scanner state */ 50 # define iindex cur_input.index_field /* reference for buffer information */ 51 # define istart cur_input.start_field /* starting position in |buffer| */ 52 # define ilimit cur_input.limit_field /* end of current line in |buffer| */ 53 # define iname cur_input.name_field /* name of the current file */ 54 # define nofilter cur_input.nofilter_field /* is token filtering explicitly disallowed? */ 55 # define synctex_tag cur_input.synctex_tag_field /* tag of the current file */ 56 # define line_catcode_table cur_input.cattable_field 57 # define line_partial cur_input.partial_field 58 59 /* 60 Let's look more closely now at the control variables 61 (|state|,~|index|,~|start|,~|loc|,~|limit|,~|name|), 62 assuming that \TeX\ is reading a line of characters that have been input 63 from some file or from the user's terminal. There is an array called 64 |buffer| that acts as a stack of all lines of characters that are 65 currently being read from files, including all lines on subsidiary 66 levels of the input stack that are not yet completed. \TeX\ will return to 67 the other lines when it is finished with the present input file. 68 69 (Incidentally, on a machine with byte-oriented addressing, it might be 70 appropriate to combine |buffer| with the |str_pool| array, 71 letting the buffer entries grow downward from the top of the string pool 72 and checking that these two tables don't bump into each other.) 73 74 The line we are currently working on begins in position |start| of the 75 buffer; the next character we are about to read is |buffer[loc]|; and 76 |limit| is the location of the last character present. If |loc>limit|, 77 the line has been completely read. Usually |buffer[limit]| is the 78 |end_line_char|, denoting the end of a line, but this is not 79 true if the current line is an insertion that was entered on the user's 80 terminal in response to an error message. 81 82 The |name| variable is a string number that designates the name of 83 the current file, if we are reading a text file. It is zero if we 84 are reading from the terminal; it is |n+1| if we are reading from 85 input stream |n|, where |0<=n<=16|. (Input stream 16 stands for 86 an invalid stream number; in such cases the input is actually from 87 the terminal, under control of the procedure |read_toks|.) 88 Finally |18<=name<=20| indicates that we are reading a pseudo file 89 created by the \.{\\scantokens} or \.{\\scantextokens} command. 90 91 The |state| variable has one of three values, when we are scanning such 92 files: 93 $$\baselineskip 15pt\vbox{\halign{#\hfil\cr 94 1) |state=mid_line| is the normal state.\cr 95 2) |state=skip_blanks| is like |mid_line|, but blanks are ignored.\cr 96 3) |state=new_line| is the state at the beginning of a line.\cr}}$$ 97 These state values are assigned numeric codes so that if we add the state 98 code to the next character's command code, we get distinct values. For 99 example, `|mid_line+spacer|' stands for the case that a blank 100 space character occurs in the middle of a line when it is not being 101 ignored; after this case is processed, the next value of |state| will 102 be |skip_blanks|. 103 */ 104 105 # define max_char_code 15 /* largest catcode for individual characters */ 106 107 typedef enum { 108 mid_line = 1, /* |state| code when scanning a line of characters */ 109 skip_blanks = 2 + max_char_code, /* |state| code when ignoring blanks */ 110 new_line = 3 + max_char_code + max_char_code, /* |state| code at start of line */ 111 } state_codes; 112 113 /* 114 Additional information about the current line is available via the 115 |index| variable, which counts how many lines of characters are present 116 in the buffer below the current level. We have |index=0| when reading 117 from the terminal and prompting the user for each line; then if the user types, 118 e.g., `\.{\\input paper}', we will have |index=1| while reading 119 the file \.{paper.tex}. However, it does not follow that |index| is the 120 same as the input stack pointer, since many of the levels on the input 121 stack may come from token lists. For example, the instruction `\.{\\input 122 paper}' might occur in a token list. 123 124 The global variable |in_open| is equal to the |index| 125 value of the highest non-token-list level. Thus, the number of partially read 126 lines in the buffer is |in_open+1|, and we have |in_open=index| 127 when we are not reading a token list. 128 129 If we are not currently reading from the terminal, or from an input 130 stream, we are reading from the file variable |input_file[index]|. We use 131 the notation |terminal_input| as a convenient abbreviation for |name=0|, 132 and |cur_file| as an abbreviation for |input_file[index]|. 133 134 The global variable |line| contains the line number in the topmost 135 open file, for use in error messages. If we are not reading from 136 the terminal, |line_stack[index]| holds the line number for the 137 enclosing level, so that |line| can be restored when the current 138 file has been read. Line numbers should never be negative, since the 139 negative of the current line number is used to identify the user's output 140 routine in the |mode_line| field of the semantic nest entries. 141 142 If more information about the input state is needed, it can be 143 included in small arrays like those shown here. For example, 144 the current page or segment number in the input file might be 145 put into a variable |@!page|, maintained for enclosing levels in 146 `\ignorespaces|@!page_stack:array[1..max_in_open] of integer|\unskip' 147 by analogy with |line_stack|. 148 @^system dependencies@> 149 */ 150 151 # define terminal_input (iname==0) /* are we reading from the terminal? */ 152 # define cur_file input_file[iindex] /* the current |alpha_file| variable */ 153 154 extern int in_open; 155 extern int open_parens; 156 extern alpha_file *input_file; 157 extern int line; 158 extern int *line_stack; 159 extern str_number *source_filename_stack; 160 extern char **full_source_filename_stack; 161 162 /* 163 Users of \TeX\ sometimes forget to balance left and right braces properly, 164 and one of the ways \TeX\ tries to spot such errors is by considering an 165 input file as broken into subfiles by control sequences that 166 are declared to be \.{\\outer}. 167 168 A variable called |scanner_status| tells \TeX\ whether or not to complain 169 when a subfile ends. This variable has six possible values: 170 171 \yskip\hang|normal|, means that a subfile can safely end here without incident. 172 173 \yskip\hang|skipping|, means that a subfile can safely end here, but not a file, 174 because we're reading past some conditional text that was not selected. 175 176 \yskip\hang|defining|, means that a subfile shouldn't end now because a 177 macro is being defined. 178 179 \yskip\hang|matching|, means that a subfile shouldn't end now because a 180 macro is being used and we are searching for the end of its arguments. 181 182 \yskip\hang|aligning|, means that a subfile shouldn't end now because we are 183 not finished with the preamble of an \.{\\halign} or \.{\\valign}. 184 185 \yskip\hang|absorbing|, means that a subfile shouldn't end now because we are 186 reading a balanced token list for \.{\\message}, \.{\\write}, etc. 187 188 \yskip\noindent 189 If the |scanner_status| is not |normal|, the variable |warning_index| points 190 to the |eqtb| location for the relevant control sequence name to print 191 in an error message. 192 */ 193 194 typedef enum { 195 skipping = 1, /* |scanner_status| when passing conditional text */ 196 defining = 2, /* |scanner_status| when reading a macro definition */ 197 matching = 3, /* |scanner_status| when reading macro arguments */ 198 aligning = 4, /* |scanner_status| when reading an alignment preamble */ 199 absorbing = 5, /* |scanner_status| when reading a balanced text */ 200 } scanner_states; 201 202 extern int scanner_status; 203 extern pointer warning_index; 204 extern pointer def_ref; 205 206 extern void runaway(void); 207 208 /* 209 However, the discussion about input state really applies only to the 210 case that we are inputting from a file. There is another important case, 211 namely when we are currently getting input from a token list. In this case 212 |state=token_list|, and the conventions about the other state variables 213 are different: 214 215 \yskip\hang|loc| is a pointer to the current node in the token list, i.e., 216 the node that will be read next. If |loc=null|, the token list has been 217 fully read. 218 219 \yskip\hang|start| points to the first node of the token list; this node 220 may or may not contain a reference count, depending on the type of token 221 list involved. 222 223 \yskip\hang|token_type|, which takes the place of |index| in the 224 discussion above, is a code number that explains what kind of token list 225 is being scanned. 226 227 \yskip\hang|name| points to the |eqtb| address of the control sequence 228 being expanded, if the current token list is a macro. 229 230 \yskip\hang|param_start|, which takes the place of |limit|, tells where 231 the parameters of the current macro begin in the |param_stack|, if the 232 current token list is a macro. 233 234 \yskip\noindent The |token_type| can take several values, depending on 235 where the current token list came from: 236 237 \yskip\hang|parameter|, if a parameter is being scanned; 238 239 \hang|u_template|, if the \<u_j> part of an alignment 240 template is being scanned; 241 242 \hang|v_template|, if the \<v_j> part of an alignment 243 template is being scanned; 244 245 \hang|backed_up|, if the token list being scanned has been inserted as 246 `to be read again'. 247 248 \hang|inserted|, if the token list being scanned has been inserted as 249 the text expansion of a \.{\\count} or similar variable; 250 251 \hang|macro|, if a user-defined control sequence is being scanned; 252 253 \hang|output_text|, if an \.{\\output} routine is being scanned; 254 255 \hang|every_par_text|, if the text of \.{\\everypar} is being scanned; 256 257 \hang|every_math_text|, if the text of \.{\\everymath} is being scanned; 258 259 \hang|every_display_text|, if the text of \.{\\everydisplay} is being scanned; 260 261 \hang|every_hbox_text|, if the text of \.{\\everyhbox} is being scanned; 262 263 \hang|every_vbox_text|, if the text of \.{\\everyvbox} is being scanned; 264 265 \hang|every_job_text|, if the text of \.{\\everyjob} is being scanned; 266 267 \hang|every_cr_text|, if the text of \.{\\everycr} is being scanned; 268 269 \hang|mark_text|, if the text of a \.{\\mark} is being scanned; 270 271 \hang|write_text|, if the text of a \.{\\write} is being scanned. 272 273 \yskip\noindent 274 The codes for |output_text|, |every_par_text|, etc., are equal to a constant 275 plus the corresponding codes for token list parameters |output_routine_loc|, 276 |every_par_loc|, etc. 277 278 The token list begins with a reference count if and 279 only if |token_type>=macro|. 280 @^reference counts@> 281 282 Since \eTeX's additional token list parameters precede |toks_base|, the 283 corresponding token types must precede |write_text|. 284 */ 285 286 # define token_list 0 /* |state| code when scanning a token list */ 287 # define token_type iindex /* type of current token list */ 288 # define param_start ilimit /* base of macro parameters in |param_stack| */ 289 290 291 typedef enum { 292 parameter = 0, /* |token_type| code for parameter */ 293 u_template = 1, /* |token_type| code for \<u_j> template */ 294 v_template = 2, /* |token_type| code for \<v_j> template */ 295 backed_up = 3, /* |token_type| code for text to be reread */ 296 inserted = 4, /* |token_type| code for inserted texts */ 297 macro = 5, /* |token_type| code for defined control sequences */ 298 output_text = 6, /* |token_type| code for output routines */ 299 every_par_text = 7, /* |token_type| code for \.{\\everypar} */ 300 every_math_text = 8, /* |token_type| code for \.{\\everymath} */ 301 every_display_text = 9, /* |token_type| code for \.{\\everydisplay} */ 302 every_hbox_text = 10, /* |token_type| code for \.{\\everyhbox} */ 303 every_vbox_text = 11, /* |token_type| code for \.{\\everyvbox} */ 304 every_job_text = 12, /* |token_type| code for \.{\\everyjob} */ 305 every_cr_text = 13, /* |token_type| code for \.{\\everycr} */ 306 mark_text = 14, /* |token_type| code for \.{\\topmark}, etc. */ 307 every_eof_text = 15, /* |token_type| code for \.{\\everyeof} */ 308 write_text = 16, /* |token_type| code for \.{\\write} */ 309 } token_types; 310 311 extern pointer *param_stack; 312 extern int param_ptr; 313 extern int max_param_stack; 314 315 extern int align_state; 316 317 extern int base_ptr; 318 319 extern void show_context(void); 320 extern void set_trick_count(void); 321 322 /* leave an input level, re-enter the old */ 323 # define pop_input() cur_input=input_stack[--input_ptr] 324 325 extern void push_input(void); 326 327 # define back_list(A) begin_token_list(A,backed_up) /* backs up a simple token list */ 328 # define ins_list(A) begin_token_list(A,inserted) /* inserts a simple token list */ 329 330 extern void begin_token_list(halfword p, quarterword t); 331 extern void end_token_list(void); 332 extern void back_input(void); 333 extern int reinsert_token(boolean a, halfword pp); 334 extern void begin_file_reading(void); 335 extern void end_file_reading(void); 336 extern void clear_for_error_prompt(void); 337 338 extern void initialize_inputstack(void); 339 340 extern halfword pseudo_files; 341 extern void pseudo_from_string(void); 342 extern void pseudo_start(void); 343 extern void lua_string_start(void); 344 extern boolean pseudo_input(void); 345 extern void pseudo_close(void); 346 347 348 #endif 349