1% textoken.w 2% 3% Copyright 2006-2011 Taco Hoekwater <taco@@luatex.org> 4% 5% This file is part of LuaTeX. 6% 7% LuaTeX is free software; you can redistribute it and/or modify it under 8% the terms of the GNU General Public License as published by the Free 9% Software Foundation; either version 2 of the License, or (at your 10% option) any later version. 11% 12% LuaTeX is distributed in the hope that it will be useful, but WITHOUT 13% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14% FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15% License for more details. 16% 17% You should have received a copy of the GNU General Public License along 18% with LuaTeX; if not, see <http://www.gnu.org/licenses/>. 19 20@ @c 21 22 23#include "ptexlib.h" 24 25@ @c 26#define pausing int_par(pausing_code) 27#define cat_code_table int_par(cat_code_table_code) 28#define tracing_nesting int_par(tracing_nesting_code) 29#define suppress_outer_error int_par(suppress_outer_error_code) 30#define suppress_mathpar_error int_par(suppress_mathpar_error_code) 31 32 33#define every_eof equiv(every_eof_loc) 34#define box(A) equiv(box_base+(A)) 35 36#define detokenized_line() (line_catcode_table==NO_CAT_TABLE) 37 38#define do_get_cat_code(a,b) do { \ 39 if (line_catcode_table!=DEFAULT_CAT_TABLE) \ 40 a=get_cat_code(line_catcode_table,b); \ 41 else \ 42 a=get_cat_code(cat_code_table,b); \ 43 } while (0) 44 45 46@ The \TeX\ system does nearly all of its own memory allocation, so that it 47can readily be transported into environments that do not have automatic 48facilities for strings, garbage collection, etc., and so that it can be in 49control of what error messages the user receives. The dynamic storage 50requirements of \TeX\ are handled by providing two large arrays called 51|fixmem| and |varmem| in which consecutive blocks of words are used as 52nodes by the \TeX\ routines. 53 54Pointer variables are indices into this array, or into another array 55called |eqtb| that will be explained later. A pointer variable might 56also be a special flag that lies outside the bounds of |mem|, so we 57allow pointers to assume any |halfword| value. The minimum halfword 58value represents a null pointer. \TeX\ does not assume that |mem[null]| exists. 59 60 61 62@ Locations in |fixmem| are used for storing one-word records; a conventional 63\.{AVAIL} stack is used for allocation in this array. 64 65@c 66smemory_word *fixmem; /* the big dynamic storage area */ 67unsigned fix_mem_min; /* the smallest location of one-word memory in use */ 68unsigned fix_mem_max; /* the largest location of one-word memory in use */ 69 70 71@ In order to study the memory requirements of particular applications, it 72is possible to prepare a version of \TeX\ that keeps track of current and 73maximum memory usage. When code between the delimiters |@!stat| $\ldots$ 74|tats| is not ``commented out,'' \TeX\ will run a bit slower but it will 75report these statistics when |tracing_stats| is sufficiently large. 76 77@c 78int dyn_used; /* how much memory is in use */ 79 80halfword avail; /* head of the list of available one-word nodes */ 81unsigned fix_mem_end; /* the last one-word node used in |mem| */ 82 83halfword garbage; /* head of a junk list, write only */ 84halfword temp_token_head; /* head of a temporary list of some kind */ 85halfword hold_token_head; /* head of a temporary list of another kind */ 86halfword omit_template; /* a constant token list */ 87halfword null_list; /* permanently empty list */ 88halfword backup_head; /* head of token list built by |scan_keyword| */ 89 90@ @c 91void initialize_tokens(void) 92{ 93 halfword p; 94 avail = null; 95 fix_mem_end = 0; 96 p = get_avail(); 97 temp_token_head = p; 98 set_token_info(temp_token_head, 0); 99 p = get_avail(); 100 hold_token_head = p; 101 set_token_info(hold_token_head, 0); 102 p = get_avail(); 103 omit_template = p; 104 set_token_info(omit_template, 0); 105 p = get_avail(); 106 null_list = p; 107 set_token_info(null_list, 0); 108 p = get_avail(); 109 backup_head = p; 110 set_token_info(backup_head, 0); 111 p = get_avail(); 112 garbage = p; 113 set_token_info(garbage, 0); 114 dyn_used = 0; /* initialize statistics */ 115} 116 117@ The function |get_avail| returns a pointer to a new one-word node whose 118|link| field is null. However, \TeX\ will halt if there is no more room left. 119@^inner loop@> 120 121If the available-space list is empty, i.e., if |avail=null|, 122we try first to increase |fix_mem_end|. If that cannot be done, i.e., if 123|fix_mem_end=fix_mem_max|, we try to reallocate array |fixmem|. 124If, that doesn't work, we have to quit. 125 126@c 127halfword get_avail(void) 128{ /* single-word node allocation */ 129 unsigned p; /* the new node being got */ 130 unsigned t; 131 p = (unsigned) avail; /* get top location in the |avail| stack */ 132 if (p != null) { 133 avail = token_link(avail); /* and pop it off */ 134 } else if (fix_mem_end < fix_mem_max) { /* or go into virgin territory */ 135 incr(fix_mem_end); 136 p = fix_mem_end; 137 } else { 138 smemory_word *new_fixmem; /* the big dynamic storage area */ 139 t = (fix_mem_max / 5); 140 new_fixmem = 141 fixmemcast(realloc 142 (fixmem, sizeof(smemory_word) * (fix_mem_max + t + 1))); 143 if (new_fixmem == NULL) { 144 runaway(); /* if memory is exhausted, display possible runaway text */ 145 overflow("token memory size", fix_mem_max); 146 } else { 147 fixmem = new_fixmem; 148 } 149 memset(voidcast(fixmem + fix_mem_max + 1), 0, t * sizeof(smemory_word)); 150 fix_mem_max += t; 151 p = ++fix_mem_end; 152 } 153 token_link(p) = null; /* provide an oft-desired initialization of the new node */ 154 incr(dyn_used); /* maintain statistics */ 155 return (halfword) p; 156} 157 158 159@ The procedure |flush_list(p)| frees an entire linked list of 160one-word nodes that starts at position |p|. 161@^inner loop@> 162 163@c 164void flush_list(halfword p) 165{ /* makes list of single-word nodes available */ 166 halfword q, r; /* list traversers */ 167 if (p != null) { 168 r = p; 169 do { 170 q = r; 171 r = token_link(r); 172 decr(dyn_used); 173 } while (r != null); /* now |q| is the last node on the list */ 174 token_link(q) = avail; 175 avail = p; 176 } 177} 178 179@ A \TeX\ token is either a character or a control sequence, and it is 180@^token@> 181represented internally in one of two ways: (1)~A character whose ASCII 182code number is |c| and whose command code is |m| is represented as the 183number $2^{21}m+c$; the command code is in the range |1<=m<=14|. (2)~A control 184sequence whose |eqtb| address is |p| is represented as the number 185|cs_token_flag+p|. Here |cs_token_flag=@t$2^{25}-1$@>| is larger than 186$2^{21}m+c$, yet it is small enough that |cs_token_flag+p< max_halfword|; 187thus, a token fits comfortably in a halfword. 188 189A token |t| represents a |left_brace| command if and only if 190|t<left_brace_limit|; it represents a |right_brace| command if and only if 191we have |left_brace_limit<=t<right_brace_limit|; and it represents a |match| or 192|end_match| command if and only if |match_token<=t<=end_match_token|. 193The following definitions take care of these token-oriented constants 194and a few others. 195 196@ A token list is a singly linked list of one-word nodes in |mem|, where 197each word contains a token and a link. Macro definitions, output-routine 198definitions, marks, \.{\\write} texts, and a few other things 199are remembered by \TeX\ in the form 200of token lists, usually preceded by a node with a reference count in its 201|token_ref_count| field. The token stored in location |p| is called 202|info(p)|. 203 204Three special commands appear in the token lists of macro definitions. 205When |m=match|, it means that \TeX\ should scan a parameter 206for the current macro; when |m=end_match|, it means that parameter 207matching should end and \TeX\ should start reading the macro text; and 208when |m=out_param|, it means that \TeX\ should insert parameter 209number |c| into the text at this point. 210 211The enclosing \.{\char'173} and \.{\char'175} characters of a macro 212definition are omitted, but the final right brace of an output routine 213is included at the end of its token list. 214 215Here is an example macro definition that illustrates these conventions. 216After \TeX\ processes the text 217$$\.{\\def\\mac a\#1\#2 \\b \{\#1\\-a \#\#1\#2 \#2\}}$$ 218the definition of \.{\\mac} is represented as a token list containing 219$$\def\,{\hskip2pt} 220\vbox{\halign{\hfil#\hfil\cr 221(reference count), |letter|\,\.a, |match|\,\#, |match|\,\#, |spacer|\,\.\ , 222\.{\\b}, |end_match|,\cr 223|out_param|\,1, \.{\\-}, |letter|\,\.a, |spacer|\,\.\ , |mac_param|\,\#, 224|other_char|\,\.1,\cr 225|out_param|\,2, |spacer|\,\.\ , |out_param|\,2.\cr}}$$ 226The procedure |scan_toks| builds such token lists, and |macro_call| 227does the parameter matching. 228@^reference counts@> 229 230Examples such as 231$$\.{\\def\\m\{\\def\\m\{a\}\ b\}}$$ 232explain why reference counts would be needed even if \TeX\ had no \.{\\let} 233operation: When the token list for \.{\\m} is being read, the redefinition of 234\.{\\m} changes the |eqtb| entry before the token list has been fully 235consumed, so we dare not simply destroy a token list when its 236control sequence is being redefined. 237 238If the parameter-matching part of a definition ends with `\.{\#\{}', 239the corresponding token list will have `\.\{' just before the `|end_match|' 240and also at the very end. The first `\.\{' is used to delimit the parameter; the 241second one keeps the first from disappearing. 242 243The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in 244symbolic form, including the expansion of a macro or mark. 245 246@c 247void print_meaning(void) 248{ 249 print_cmd_chr((quarterword) cur_cmd, cur_chr); 250 if (cur_cmd >= call_cmd) { 251 print_char(':'); 252 print_ln(); 253 token_show(cur_chr); 254 } else { 255 /* Show the meaning of a mark node */ 256 if ((cur_cmd == top_bot_mark_cmd) && (cur_chr < marks_code)) { 257 print_char(':'); 258 print_ln(); 259 switch (cur_chr) { 260 case first_mark_code: 261 token_show(first_mark(0)); 262 break; 263 case bot_mark_code: 264 token_show(bot_mark(0)); 265 break; 266 case split_first_mark_code: 267 token_show(split_first_mark(0)); 268 break; 269 case split_bot_mark_code: 270 token_show(split_bot_mark(0)); 271 break; 272 default: 273 token_show(top_mark(0)); 274 break; 275 } 276 } 277 } 278} 279 280 281@ The procedure |show_token_list|, which prints a symbolic form of 282the token list that starts at a given node |p|, illustrates these 283conventions. The token list being displayed should not begin with a reference 284count. However, the procedure is intended to be robust, so that if the 285memory links are awry or if |p| is not really a pointer to a token list, 286nothing catastrophic will happen. 287 288An additional parameter |q| is also given; this parameter is either null 289or it points to a node in the token list where a certain magic computation 290takes place that will be explained later. (Basically, |q| is non-null when 291we are printing the two-line context information at the time of an error 292message; |q| marks the place corresponding to where the second line 293should begin.) 294 295For example, if |p| points to the node containing the first \.a in the 296token list above, then |show_token_list| will print the string 297$$\hbox{`\.{a\#1\#2\ \\b\ ->\#1\\-a\ \#\#1\#2\ \#2}';}$$ 298and if |q| points to the node containing the second \.a, 299the magic computation will be performed just before the second \.a is printed. 300 301The generation will stop, and `\.{\\ETC.}' will be printed, if the length 302of printing exceeds a given limit~|l|. Anomalous entries are printed in the 303form of control sequences that are not followed by a blank space, e.g., 304`\.{\\BAD.}'; this cannot be confused with actual control sequences because 305a real control sequence named \.{BAD} would come out `\.{\\BAD\ }'. 306 307@c 308void show_token_list(int p, int q, int l) 309{ 310 int m, c; /* pieces of a token */ 311 ASCII_code match_chr; /* character used in a `|match|' */ 312 ASCII_code n; /* the highest parameter number, as an ASCII digit */ 313 match_chr = '#'; 314 n = '0'; 315 tally = 0; 316 if (l < 0) 317 l = 0x3FFFFFFF; 318 while ((p != null) && (tally < l)) { 319 if (p == q) { 320 /* Do magic computation */ 321 set_trick_count(); 322 } 323 /* Display token |p|, and |return| if there are problems */ 324 if ((p < (int) fix_mem_min) || (p > (int) fix_mem_end)) { 325 tprint_esc("CLOBBERED."); 326 return; 327 } 328 if (token_info(p) >= cs_token_flag) { 329 if (!((inhibit_par_tokens) && (token_info(p) == par_token))) 330 print_cs(token_info(p) - cs_token_flag); 331 } else { 332 m = token_cmd(token_info(p)); 333 c = token_chr(token_info(p)); 334 if (token_info(p) < 0) { 335 tprint_esc("BAD."); 336 } else { 337 /* Display the token $(|m|,|c|)$ */ 338 /* The procedure usually ``learns'' the character code used for macro 339 parameters by seeing one in a |match| command before it runs into any 340 |out_param| commands. */ 341 switch (m) { 342 case left_brace_cmd: 343 case right_brace_cmd: 344 case math_shift_cmd: 345 case tab_mark_cmd: 346 case sup_mark_cmd: 347 case sub_mark_cmd: 348 case spacer_cmd: 349 case letter_cmd: 350 case other_char_cmd: 351 print(c); 352 break; 353 case mac_param_cmd: 354 if (!in_lua_escape) 355 print(c); 356 print(c); 357 break; 358 case out_param_cmd: 359 print(match_chr); 360 if (c <= 9) { 361 print_char(c + '0'); 362 } else { 363 print_char('!'); 364 return; 365 } 366 break; 367 case match_cmd: 368 match_chr = c; 369 print(c); 370 incr(n); 371 print_char(n); 372 if (n > '9') 373 return; 374 break; 375 case end_match_cmd: 376 if (c == 0) 377 tprint("->"); 378 break; 379 default: 380 tprint_esc("BAD."); 381 break; 382 } 383 } 384 } 385 p = token_link(p); 386 } 387 if (p != null) 388 tprint_esc("ETC."); 389} 390 391@ @c 392#define do_buffer_to_unichar(a,b) do { \ 393 a = (halfword)str2uni(buffer+b); \ 394 b += utf8_size(a); \ 395 } while (0) 396 397 398@ Here's the way we sometimes want to display a token list, given a pointer 399to its reference count; the pointer may be null. 400 401@c 402void token_show(halfword p) 403{ 404 if (p != null) 405 show_token_list(token_link(p), null, 10000000); 406} 407 408 409 410@ |delete_token_ref|, is called when 411a pointer to a token list's reference count is being removed. This means 412that the token list should disappear if the reference count was |null|, 413otherwise the count should be decreased by one. 414@^reference counts@> 415 416@c 417void delete_token_ref(halfword p) 418{ /* |p| points to the reference count 419 of a token list that is losing one reference */ 420 assert(token_ref_count(p) >= 0); 421 if (token_ref_count(p) == 0) 422 flush_list(p); 423 else 424 decr(token_ref_count(p)); 425} 426 427@ @c 428int get_char_cat_code(int curchr) 429{ 430 int a; 431 do_get_cat_code(a,curchr); 432 return a; 433} 434 435@ @c 436static void invalid_character_error(void) 437{ 438 const char *hlp[] = 439 { "A funny symbol that I can't read has just been input.", 440 "Continue, and I'll forget that it ever happened.", 441 NULL 442 }; 443 deletions_allowed = false; 444 tex_error("Text line contains an invalid character", hlp); 445 deletions_allowed = true; 446} 447 448@ @c 449static boolean process_sup_mark(void); /* below */ 450 451static int scan_control_sequence(void); /* below */ 452 453typedef enum { next_line_ok, next_line_return, 454 next_line_restart 455} next_line_retval; 456 457static next_line_retval next_line(void); /* below */ 458 459 460@ In case you are getting bored, here is a slightly less trivial routine: 461 Given a string of lowercase letters, like `\.{pt}' or `\.{plus}' or 462 `\.{width}', the |scan_keyword| routine checks to see whether the next 463 tokens of input match this string. The match must be exact, except that 464 uppercase letters will match their lowercase counterparts; uppercase 465 equivalents are determined by subtracting |"a"-"A"|, rather than using the 466 |uc_code| table, since \TeX\ uses this routine only for its own limited 467 set of keywords. 468 469 If a match is found, the characters are effectively removed from the input 470 and |true| is returned. Otherwise |false| is returned, and the input 471 is left essentially unchanged (except for the fact that some macros 472 may have been expanded, etc.). 473 @^inner loop@> 474 475@c 476boolean scan_keyword(const char *s) 477{ /* look for a given string */ 478 halfword p; /* tail of the backup list */ 479 halfword q; /* new node being added to the token list via |store_new_token| */ 480 const char *k; /* index into |str_pool| */ 481 halfword save_cur_cs = cur_cs; 482 int saved_align_state = align_state; 483 if (strlen(s) == 0) /* was assert (strlen(s) > 1); */ 484 return false ; /* but not with newtokenlib zero keyword simply doesn't match */ 485 p = backup_head; 486 token_link(p) = null; 487 k = s; 488 while (*k) { 489 get_x_token(); /* recursion is possible here */ 490 if ((cur_cs == 0) && 491 ((cur_chr == *k) || (cur_chr == *k - 'a' + 'A'))) { 492 store_new_token(cur_tok); 493 k++; 494 } else if ((cur_cmd != spacer_cmd) || (p != backup_head)) { 495 if (p != backup_head) { 496 q = get_avail(); 497 token_info(q) = cur_tok; 498 token_link(q) = null; 499 token_link(p) = q; 500 begin_token_list(token_link(backup_head), backed_up); 501 if (cur_cmd != endv_cmd) 502 align_state = saved_align_state; 503 } else { 504 back_input(); 505 } 506 cur_cs = save_cur_cs; 507 return false; 508 } 509 } 510 flush_list(token_link(backup_head)); 511 cur_cs = save_cur_cs; 512 if (cur_cmd != endv_cmd) 513 align_state = saved_align_state; 514 return true; 515} 516 517@ We can not return |undefined_control_sequence| under some conditions 518 (inside |shift_case|, for example). This needs thinking. 519 520@c 521halfword active_to_cs(int curchr, int force) 522{ 523 halfword curcs; 524 char *a, *b; 525 char *utfbytes = xmalloc(10); 526 int nncs = no_new_control_sequence; 527 a = (char *) uni2str(0xFFFF); 528 utfbytes = strcpy(utfbytes, a); 529 if (force) 530 no_new_control_sequence = false; 531 if (curchr > 0) { 532 b = (char *) uni2str((unsigned) curchr); 533 utfbytes = strcat(utfbytes, b); 534 free(b); 535 curcs = string_lookup(utfbytes, strlen(utfbytes)); 536 } else { 537 utfbytes[3] = '\0'; 538 curcs = string_lookup(utfbytes, 4); 539 } 540 no_new_control_sequence = nncs; 541 free(a); 542 free(utfbytes); 543 return curcs; 544} 545 546@ TODO this function should listen to \.{\\escapechar} 547 548@c 549static char *cs_to_string(halfword p) 550{ /* prints a control sequence */ 551 const char *s; 552 char *sh; 553 int k = 0; 554 static char ret[256] = { 0 }; 555 if (p == 0 || p == null_cs) { 556 ret[k++] = '\\'; 557 s = "csname"; 558 while (*s) { 559 ret[k++] = *s++; 560 } 561 ret[k++] = '\\'; 562 s = "endcsname"; 563 while (*s) { 564 ret[k++] = *s++; 565 } 566 ret[k] = 0; 567 568 } else { 569 str_number txt = cs_text(p); 570 sh = makecstring(txt); 571 s = sh; 572 if (is_active_cs(txt)) { 573 s = s + 3; 574 while (*s) { 575 ret[k++] = *s++; 576 } 577 ret[k] = 0; 578 } else { 579 ret[k++] = '\\'; 580 while (*s) { 581 ret[k++] = *s++; 582 } 583 ret[k] = 0; 584 } 585 free(sh); 586 } 587 return (char *) ret; 588} 589 590@ TODO this is a quick hack, will be solved differently soon 591 592@c 593static char *cmd_chr_to_string(int cmd, int chr) 594{ 595 char *s; 596 str_number str; 597 int sel = selector; 598 selector = new_string; 599 print_cmd_chr((quarterword) cmd, chr); 600 str = make_string(); 601 s = makecstring(str); 602 selector = sel; 603 flush_str(str); 604 return s; 605} 606 607@ The heart of \TeX's input mechanism is the |get_next| procedure, which 608we shall develop in the next few sections of the program. Perhaps we 609shouldn't actually call it the ``heart,'' however, because it really acts 610as \TeX's eyes and mouth, reading the source files and gobbling them up. 611And it also helps \TeX\ to regurgitate stored token lists that are to be 612processed again. 613@^eyes and mouth@> 614 615The main duty of |get_next| is to input one token and to set |cur_cmd| 616and |cur_chr| to that token's command code and modifier. Furthermore, if 617the input token is a control sequence, the |eqtb| location of that control 618sequence is stored in |cur_cs|; otherwise |cur_cs| is set to zero. 619 620Underlying this simple description is a certain amount of complexity 621because of all the cases that need to be handled. 622However, the inner loop of |get_next| is reasonably short and fast. 623 624When |get_next| is asked to get the next token of a \.{\\read} line, 625it sets |cur_cmd=cur_chr=cur_cs=0| in the case that no more tokens 626appear on that line. (There might not be any tokens at all, if the 627|end_line_char| has |ignore| as its catcode.) 628 629 630@ The value of |par_loc| is the |eqtb| address of `\.{\\par}'. This quantity 631is needed because a blank line of input is supposed to be exactly equivalent 632to the appearance of \.{\\par}; we must set |cur_cs:=par_loc| 633when detecting a blank line. 634 635@c 636halfword par_loc; /* location of `\.{\\par}' in |eqtb| */ 637halfword par_token; /* token representing `\.{\\par}' */ 638 639 640@ Parts |get_next| are executed more often than any other instructions of \TeX. 641@^mastication@>@^inner loop@> 642 643 644 645@ The global variable |force_eof| is normally |false|; it is set |true| 646by an \.{\\endinput} command. |luacstrings| is the number of lua print 647statements waiting to be input, it is changed by |luatokencall|. 648 649@c 650boolean force_eof; /* should the next \.{\\input} be aborted early? */ 651int luacstrings; /* how many lua strings are waiting to be input? */ 652 653 654@ If the user has set the |pausing| parameter to some positive value, 655and if nonstop mode has not been selected, each line of input is displayed 656on the terminal and the transcript file, followed by `\.{=>}'. 657\TeX\ waits for a response. If the response is simply |carriage_return|, the 658line is accepted as it stands, otherwise the line typed is 659used instead of the line in the file. 660 661@c 662void firm_up_the_line(void) 663{ 664 int k; /* an index into |buffer| */ 665 ilimit = last; 666 if (pausing > 0) { 667 if (interaction > nonstop_mode) { 668 wake_up_terminal(); 669 print_ln(); 670 if (istart < ilimit) { 671 for (k = istart; k <= ilimit - 1; k++) 672 print_char(buffer[k]); 673 } 674 first = ilimit; 675 prompt_input("=>"); /* wait for user response */ 676 if (last > first) { 677 for (k = first; k < +last - 1; k++) /* move line down in buffer */ 678 buffer[k + istart - first] = buffer[k]; 679 ilimit = istart + last - first; 680 } 681 } 682 } 683} 684 685 686 687@ Before getting into |get_next|, let's consider the subroutine that 688 is called when an `\.{\\outer}' control sequence has been scanned or 689 when the end of a file has been reached. These two cases are distinguished 690 by |cur_cs|, which is zero at the end of a file. 691 692@c 693void check_outer_validity(void) 694{ 695 halfword p; /* points to inserted token list */ 696 halfword q; /* auxiliary pointer */ 697 if (suppress_outer_error) 698 return; 699 if (scanner_status != normal) { 700 deletions_allowed = false; 701 /* Back up an outer control sequence so that it can be reread; */ 702 /* An outer control sequence that occurs in a \.{\\read} will not be reread, 703 since the error recovery for \.{\\read} is not very powerful. */ 704 if (cur_cs != 0) { 705 if ((istate == token_list) || (iname < 1) || (iname > 17)) { 706 p = get_avail(); 707 token_info(p) = cs_token_flag + cur_cs; 708 begin_token_list(p, backed_up); /* prepare to read the control sequence again */ 709 } 710 cur_cmd = spacer_cmd; 711 cur_chr = ' '; /* replace it by a space */ 712 } 713 if (scanner_status > skipping) { 714 const char *errhlp[] = 715 { "I suspect you have forgotten a `}', causing me", 716 "to read past where you wanted me to stop.", 717 "I'll try to recover; but if the error is serious,", 718 "you'd better type `E' or `X' now and fix your file.", 719 NULL 720 }; 721 char errmsg[256]; 722 const char *startmsg; 723 const char *scannermsg; 724 /* Tell the user what has run away and try to recover */ 725 runaway(); /* print a definition, argument, or preamble */ 726 if (cur_cs == 0) { 727 startmsg = "File ended"; 728 } else { 729 cur_cs = 0; 730 startmsg = "Forbidden control sequence found"; 731 } 732 /* Print either `\.{definition}' or `\.{use}' or `\.{preamble}' or `\.{text}', 733 and insert tokens that should lead to recovery; */ 734 /* The recovery procedure can't be fully understood without knowing more 735 about the \TeX\ routines that should be aborted, but we can sketch the 736 ideas here: For a runaway definition we will insert a right brace; for a 737 runaway preamble, we will insert a special \.{\\cr} token and a right 738 brace; and for a runaway argument, we will set |long_state| to 739 |outer_call| and insert \.{\\par}. */ 740 p = get_avail(); 741 switch (scanner_status) { 742 case defining: 743 scannermsg = "definition"; 744 token_info(p) = right_brace_token + '}'; 745 break; 746 case matching: 747 scannermsg = "use"; 748 token_info(p) = par_token; 749 long_state = outer_call_cmd; 750 break; 751 case aligning: 752 scannermsg = "preamble"; 753 token_info(p) = right_brace_token + '}'; 754 q = p; 755 p = get_avail(); 756 token_link(p) = q; 757 token_info(p) = cs_token_flag + frozen_cr; 758 align_state = -1000000; 759 break; 760 case absorbing: 761 scannermsg = "text"; 762 token_info(p) = right_brace_token + '}'; 763 break; 764 default: /* can't happen */ 765 scannermsg = "unknown"; 766 break; 767 } /*there are no other cases */ 768 begin_token_list(p, inserted); 769 snprintf(errmsg, 255, "%s while scanning %s of %s", 770 startmsg, scannermsg, cs_to_string(warning_index)); 771 tex_error(errmsg, errhlp); 772 } else { 773 char errmsg[256]; 774 const char *errhlp_no[] = 775 { "The file ended while I was skipping conditional text.", 776 "This kind of error happens when you say `\\if...' and forget", 777 "the matching `\\fi'. I've inserted a `\\fi'; this might work.", 778 NULL 779 }; 780 const char *errhlp_cs[] = 781 { "A forbidden control sequence occurred in skipped text.", 782 "This kind of error happens when you say `\\if...' and forget", 783 "the matching `\\fi'. I've inserted a `\\fi'; this might work.", 784 NULL 785 }; 786 const char **errhlp = (const char **) errhlp_no; 787 char *ss; 788 if (cur_cs != 0) { 789 errhlp = errhlp_cs; 790 cur_cs = 0; 791 } 792 ss = cmd_chr_to_string(if_test_cmd, cur_if); 793 snprintf(errmsg, 255, 794 "Incomplete %s; all text was ignored after line %d", 795 ss, (int) skip_line); 796 free(ss); 797 /* Incomplete \\if... */ 798 cur_tok = cs_token_flag + frozen_fi; 799 /* back up one inserted token and call |error| */ 800 { 801 OK_to_interrupt = false; 802 back_input(); 803 token_type = inserted; 804 OK_to_interrupt = true; 805 tex_error(errmsg, errhlp); 806 } 807 } 808 deletions_allowed = true; 809 } 810} 811 812@ @c 813static boolean get_next_file(void) 814{ 815 SWITCH: 816 if (iloc <= ilimit) { /* current line not yet finished */ 817 do_buffer_to_unichar(cur_chr, iloc); 818 819 RESWITCH: 820 if (detokenized_line()) { 821 cur_cmd = (cur_chr == ' ' ? 10 : 12); 822 } else { 823 do_get_cat_code(cur_cmd, cur_chr); 824 } 825 /* 826 Change state if necessary, and |goto switch| if the current 827 character should be ignored, or |goto reswitch| if the current 828 character changes to another; 829 */ 830 /* The following 48-way switch accomplishes the scanning quickly, assuming 831 that a decent C compiler has translated the code. Note that the numeric 832 values for |mid_line|, |skip_blanks|, and |new_line| are spaced 833 apart from each other by |max_char_code+1|, so we can add a character's 834 command code to the state to get a single number that characterizes both. 835 */ 836 switch (istate + cur_cmd) { 837 case mid_line + ignore_cmd: 838 case skip_blanks + ignore_cmd: 839 case new_line + ignore_cmd: 840 case skip_blanks + spacer_cmd: 841 case new_line + spacer_cmd: /* Cases where character is ignored */ 842 goto SWITCH; 843 break; 844 case mid_line + escape_cmd: 845 case new_line + escape_cmd: 846 case skip_blanks + escape_cmd: /* Scan a control sequence ...; */ 847 istate = (unsigned char) scan_control_sequence(); 848 if (cur_cmd >= outer_call_cmd) 849 check_outer_validity(); 850 break; 851 case mid_line + active_char_cmd: 852 case new_line + active_char_cmd: 853 case skip_blanks + active_char_cmd: /* Process an active-character */ 854 cur_cs = active_to_cs(cur_chr, false); 855 cur_cmd = eq_type(cur_cs); 856 cur_chr = equiv(cur_cs); 857 istate = mid_line; 858 if (cur_cmd >= outer_call_cmd) 859 check_outer_validity(); 860 break; 861 case mid_line + sup_mark_cmd: 862 case new_line + sup_mark_cmd: 863 case skip_blanks + sup_mark_cmd: /* If this |sup_mark| starts */ 864 if (process_sup_mark()) 865 goto RESWITCH; 866 else 867 istate = mid_line; 868 break; 869 case mid_line + invalid_char_cmd: 870 case new_line + invalid_char_cmd: 871 case skip_blanks + invalid_char_cmd: /* Decry the invalid character and |goto restart|; */ 872 invalid_character_error(); 873 return false; /* because state may be |token_list| now */ 874 break; 875 case mid_line + spacer_cmd: /* Enter |skip_blanks| state, emit a space; */ 876 istate = skip_blanks; 877 cur_chr = ' '; 878 break; 879 case mid_line + car_ret_cmd: /* Finish line, emit a space; */ 880 /* When a character of type |spacer| gets through, its character code is 881 changed to $\.{"\ "}=040$. This means that the ASCII codes for tab and space, 882 and for the space inserted at the end of a line, will 883 be treated alike when macro parameters are being matched. We do this 884 since such characters are indistinguishable on most computer terminal displays. 885 */ 886 iloc = ilimit + 1; 887 cur_cmd = spacer_cmd; 888 cur_chr = ' '; 889 break; 890 case skip_blanks + car_ret_cmd: 891 case mid_line + comment_cmd: 892 case new_line + comment_cmd: 893 case skip_blanks + comment_cmd: /* Finish line, |goto switch|; */ 894 iloc = ilimit + 1; 895 goto SWITCH; 896 break; 897 case new_line + car_ret_cmd: /* Finish line, emit a \.{\\par}; */ 898 iloc = ilimit + 1; 899 cur_cs = par_loc; 900 cur_cmd = eq_type(cur_cs); 901 cur_chr = equiv(cur_cs); 902 if (cur_cmd >= outer_call_cmd) 903 check_outer_validity(); 904 break; 905 case skip_blanks + left_brace_cmd: 906 case new_line + left_brace_cmd: 907 istate = mid_line; /* fall through */ 908 case mid_line + left_brace_cmd: 909 align_state++; 910 break; 911 case skip_blanks + right_brace_cmd: 912 case new_line + right_brace_cmd: 913 istate = mid_line; /* fall through */ 914 case mid_line + right_brace_cmd: 915 align_state--; 916 break; 917 case mid_line + math_shift_cmd: 918 case mid_line + tab_mark_cmd: 919 case mid_line + mac_param_cmd: 920 case mid_line + sub_mark_cmd: 921 case mid_line + letter_cmd: 922 case mid_line + other_char_cmd: 923 break; 924#if 0 925 case skip_blanks + math_shift: 926 case skip_blanks + tab_mark: 927 case skip_blanks + mac_param: 928 case skip_blanks + sub_mark: 929 case skip_blanks + letter: 930 case skip_blanks + other_char: 931 case new_line + math_shift: 932 case new_line + tab_mark: 933 case new_line + mac_param: 934 case new_line + sub_mark: 935 case new_line + letter: 936 case new_line + other_char: 937#else 938 default: 939#endif 940 istate = mid_line; 941 break; 942 } 943 } else { 944 if (iname != 21) 945 istate = new_line; 946 947 /* 948 Move to next line of file, 949 or |goto restart| if there is no next line, 950 or |return| if a \.{\\read} line has finished; 951 */ 952 do { 953 next_line_retval r = next_line(); 954 if (r == next_line_return) { 955 return true; 956 } else if (r == next_line_restart) { 957 return false; 958 } 959 } while (0); 960 check_interrupt(); 961 goto SWITCH; 962 } 963 return true; 964} 965 966@ @c 967#define is_hex(a) ((a>='0'&&a<='9')||(a>='a'&&a<='f')) 968 969#define add_nybble(a) do { \ 970 if (a<='9') cur_chr=(cur_chr<<4)+a-'0'; \ 971 else cur_chr=(cur_chr<<4)+a-'a'+10; \ 972 } while (0) 973 974#define hex_to_cur_chr do { \ 975 if (c<='9') cur_chr=c-'0'; \ 976 else cur_chr=c-'a'+10; \ 977 add_nybble(cc); \ 978 } while (0) 979 980#define four_hex_to_cur_chr do { \ 981 hex_to_cur_chr; \ 982 add_nybble(ccc); add_nybble(cccc); \ 983 } while (0) 984 985#define five_hex_to_cur_chr do { \ 986 four_hex_to_cur_chr; \ 987 add_nybble(ccccc); \ 988 } while (0) 989 990#define six_hex_to_cur_chr do { \ 991 five_hex_to_cur_chr; \ 992 add_nybble(cccccc); \ 993 } while (0) 994 995 996@ Notice that a code like \.{\^\^8} becomes \.x if not followed by a hex digit. 997 998@c 999static boolean process_sup_mark(void) 1000{ 1001 if (cur_chr == buffer[iloc]) { 1002 int c, cc; 1003 if (iloc < ilimit) { 1004 if ((cur_chr == buffer[iloc + 1]) && (cur_chr == buffer[iloc + 2]) 1005 && (cur_chr == buffer[iloc + 3]) 1006 && (cur_chr == buffer[iloc + 4]) 1007 && ((iloc + 10) <= ilimit)) { 1008 int ccc, cccc, ccccc, cccccc; /* constituents of a possible expanded code */ 1009 c = buffer[iloc + 5]; 1010 cc = buffer[iloc + 6]; 1011 ccc = buffer[iloc + 7]; 1012 cccc = buffer[iloc + 8]; 1013 ccccc = buffer[iloc + 9]; 1014 cccccc = buffer[iloc + 10]; 1015 if ((is_hex(c)) && (is_hex(cc)) && (is_hex(ccc)) 1016 && (is_hex(cccc)) 1017 && (is_hex(ccccc)) && (is_hex(cccccc))) { 1018 iloc = iloc + 11; 1019 six_hex_to_cur_chr; 1020 return true; 1021 } 1022 } 1023 if ((cur_chr == buffer[iloc + 1]) && (cur_chr == buffer[iloc + 2]) 1024 && (cur_chr == buffer[iloc + 3]) && ((iloc + 8) <= ilimit)) { 1025 int ccc, cccc, ccccc; /* constituents of a possible expanded code */ 1026 c = buffer[iloc + 4]; 1027 cc = buffer[iloc + 5]; 1028 ccc = buffer[iloc + 6]; 1029 cccc = buffer[iloc + 7]; 1030 ccccc = buffer[iloc + 8]; 1031 if ((is_hex(c)) && (is_hex(cc)) && (is_hex(ccc)) 1032 && (is_hex(cccc)) && (is_hex(ccccc))) { 1033 iloc = iloc + 9; 1034 five_hex_to_cur_chr; 1035 return true; 1036 } 1037 } 1038 if ((cur_chr == buffer[iloc + 1]) && (cur_chr == buffer[iloc + 2]) 1039 && ((iloc + 6) <= ilimit)) { 1040 int ccc, cccc; /* constituents of a possible expanded code */ 1041 c = buffer[iloc + 3]; 1042 cc = buffer[iloc + 4]; 1043 ccc = buffer[iloc + 5]; 1044 cccc = buffer[iloc + 6]; 1045 if ((is_hex(c)) && (is_hex(cc)) && (is_hex(ccc)) 1046 && (is_hex(cccc))) { 1047 iloc = iloc + 7; 1048 four_hex_to_cur_chr; 1049 return true; 1050 } 1051 } 1052 c = buffer[iloc + 1]; 1053 if (c < 0200) { /* yes we have an expanded char */ 1054 iloc = iloc + 2; 1055 if (is_hex(c) && iloc <= ilimit) { 1056 cc = buffer[iloc]; 1057 if (is_hex(cc)) { 1058 incr(iloc); 1059 hex_to_cur_chr; 1060 return true; 1061 } 1062 } 1063 cur_chr = (c < 0100 ? c + 0100 : c - 0100); 1064 return true; 1065 } 1066 } 1067 } 1068 return false; 1069} 1070 1071@ Control sequence names are scanned only when they appear in some line of 1072 a file; once they have been scanned the first time, their |eqtb| location 1073 serves as a unique identification, so \TeX\ doesn't need to refer to the 1074 original name any more except when it prints the equivalent in symbolic form. 1075 1076 The program that scans a control sequence has been written carefully 1077 in order to avoid the blowups that might otherwise occur if a malicious 1078 user tried something like `\.{\\catcode\'15=0}'. The algorithm might 1079 look at |buffer[ilimit+1]|, but it never looks at |buffer[ilimit+2]|. 1080 1081 If expanded characters like `\.{\^\^A}' or `\.{\^\^df}' 1082 appear in or just following 1083 a control sequence name, they are converted to single characters in the 1084 buffer and the process is repeated, slowly but surely. 1085 1086@c 1087static boolean check_expanded_code(int *kk); /* below */ 1088 1089static int scan_control_sequence(void) 1090{ 1091 int retval = mid_line; 1092 if (iloc > ilimit) { 1093 cur_cs = null_cs; /* |state| is irrelevant in this case */ 1094 } else { 1095 register int cat; /* |cat_code(cur_chr)|, usually */ 1096 while (1) { 1097 int k = iloc; 1098 do_buffer_to_unichar(cur_chr, k); 1099 do_get_cat_code(cat, cur_chr); 1100 if (cat != letter_cmd || k > ilimit) { 1101 retval = (cat == spacer_cmd ? skip_blanks : mid_line); 1102 if (cat == sup_mark_cmd && check_expanded_code(&k)) /* If an expanded...; */ 1103 continue; 1104 } else { 1105 retval = skip_blanks; 1106 do { 1107 do_buffer_to_unichar(cur_chr, k); 1108 do_get_cat_code(cat, cur_chr); 1109 } while (cat == letter_cmd && k <= ilimit); 1110 1111 if (cat == sup_mark_cmd && check_expanded_code(&k)) /* If an expanded...; */ 1112 continue; 1113 if (cat != letter_cmd) { 1114 decr(k); 1115 if (cur_chr > 0xFFFF) 1116 decr(k); 1117 if (cur_chr > 0x7FF) 1118 decr(k); 1119 if (cur_chr > 0x7F) 1120 decr(k); 1121 } /* now |k| points to first nonletter */ 1122 } 1123 cur_cs = id_lookup(iloc, k - iloc); 1124 iloc = k; 1125 break; 1126 } 1127 } 1128 cur_cmd = eq_type(cur_cs); 1129 cur_chr = equiv(cur_cs); 1130 return retval; 1131} 1132 1133@ Whenever we reach the following piece of code, we will have 1134 |cur_chr=buffer[k-1]| and |k<=ilimit+1| and |cat=get_cat_code(cat_code_table,cur_chr)|. If an 1135 expanded code like \.{\^\^A} or \.{\^\^df} appears in |buffer[(k-1)..(k+1)]| 1136 or |buffer[(k-1)..(k+2)]|, we 1137 will store the corresponding code in |buffer[k-1]| and shift the rest of 1138 the buffer left two or three places. 1139 1140@c 1141static boolean check_expanded_code(int *kk) 1142{ 1143 int l; 1144 int k = *kk; 1145 int d = 1; /* number of excess characters in an expanded code */ 1146 int c, cc, ccc, cccc, ccccc, cccccc; /* constituents of a possible expanded code */ 1147 if (buffer[k] == cur_chr && k < ilimit) { 1148 if ((cur_chr == buffer[k + 1]) && (cur_chr == buffer[k + 2]) 1149 && ((k + 6) <= ilimit)) { 1150 d = 4; 1151 if ((cur_chr == buffer[k + 3]) && ((k + 8) <= ilimit)) 1152 d = 5; 1153 if ((cur_chr == buffer[k + 4]) && ((k + 10) <= ilimit)) 1154 d = 6; 1155 c = buffer[k + d - 1]; 1156 cc = buffer[k + d]; 1157 ccc = buffer[k + d + 1]; 1158 cccc = buffer[k + d + 2]; 1159 if (d == 6) { 1160 ccccc = buffer[k + d + 3]; 1161 cccccc = buffer[k + d + 4]; 1162 if (is_hex(c) && is_hex(cc) && is_hex(ccc) && is_hex(cccc) 1163 && is_hex(ccccc) && is_hex(cccccc)) 1164 six_hex_to_cur_chr; 1165 } else if (d == 5) { 1166 ccccc = buffer[k + d + 3]; 1167 if (is_hex(c) && is_hex(cc) && is_hex(ccc) && is_hex(cccc) 1168 && is_hex(ccccc)) 1169 five_hex_to_cur_chr; 1170 } else { 1171 if (is_hex(c) && is_hex(cc) && is_hex(ccc) && is_hex(cccc)) 1172 four_hex_to_cur_chr; 1173 } 1174 } else { 1175 c = buffer[k + 1]; 1176 if (c < 0200) { 1177 d = 1; 1178 if (is_hex(c) && (k + 2) <= ilimit) { 1179 cc = buffer[k + 2]; 1180 if (is_hex(c) && is_hex(cc)) { 1181 d = 2; 1182 hex_to_cur_chr; 1183 } 1184 } else if (c < 0100) { 1185 cur_chr = c + 0100; 1186 } else { 1187 cur_chr = c - 0100; 1188 } 1189 } 1190 } 1191 if (d > 2) 1192 d = 2 * d - 1; 1193 else 1194 d++; 1195 if (cur_chr <= 0x7F) { 1196 buffer[k - 1] = (packed_ASCII_code) cur_chr; 1197 } else if (cur_chr <= 0x7FF) { 1198 buffer[k - 1] = (packed_ASCII_code) (0xC0 + cur_chr / 0x40); 1199 k++; 1200 d--; 1201 buffer[k - 1] = (packed_ASCII_code) (0x80 + cur_chr % 0x40); 1202 } else if (cur_chr <= 0xFFFF) { 1203 buffer[k - 1] = (packed_ASCII_code) (0xE0 + cur_chr / 0x1000); 1204 k++; 1205 d--; 1206 buffer[k - 1] = 1207 (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) / 0x40); 1208 k++; 1209 d--; 1210 buffer[k - 1] = 1211 (packed_ASCII_code) (0x80 + (cur_chr % 0x1000) % 0x40); 1212 } else { 1213 buffer[k - 1] = (packed_ASCII_code) (0xF0 + cur_chr / 0x40000); 1214 k++; 1215 d--; 1216 buffer[k - 1] = 1217 (packed_ASCII_code) (0x80 + (cur_chr % 0x40000) / 0x1000); 1218 k++; 1219 d--; 1220 buffer[k - 1] = 1221 (packed_ASCII_code) (0x80 + 1222 ((cur_chr % 0x40000) % 0x1000) / 0x40); 1223 k++; 1224 d--; 1225 buffer[k - 1] = 1226 (packed_ASCII_code) (0x80 + 1227 ((cur_chr % 0x40000) % 0x1000) % 0x40); 1228 } 1229 l = k; 1230 ilimit = ilimit - d; 1231 while (l <= ilimit) { 1232 buffer[l] = buffer[l + d]; 1233 l++; 1234 } 1235 *kk = k; 1236 return true; 1237 } 1238 return false; 1239} 1240 1241 1242@ All of the easy branches of |get_next| have now been taken care of. 1243 There is one more branch. 1244 1245@c 1246static next_line_retval next_line(void) 1247{ 1248 boolean inhibit_eol = false; /* a way to end a pseudo file without trailing space */ 1249 if (iname > 17) { 1250 /* Read next line of file into |buffer|, or |goto restart| if the file has ended */ 1251 incr(line); 1252 first = istart; 1253 if (!force_eof) { 1254 if (iname <= 20) { 1255 if (pseudo_input()) { /* not end of file */ 1256 firm_up_the_line(); /* this sets |ilimit| */ 1257 line_catcode_table = DEFAULT_CAT_TABLE; 1258 if ((iname == 19) && (pseudo_lines(pseudo_files) == null)) 1259 inhibit_eol = true; 1260 } else if ((every_eof != null) && !eof_seen[iindex]) { 1261 ilimit = first - 1; 1262 eof_seen[iindex] = true; /* fake one empty line */ 1263 if (iname != 19) 1264 begin_token_list(every_eof, every_eof_text); 1265 return next_line_restart; 1266 } else { 1267 force_eof = true; 1268 } 1269 } else { 1270 if (iname == 21) { 1271 if (luacstring_input()) { /* not end of strings */ 1272 firm_up_the_line(); 1273 line_catcode_table = (short) luacstring_cattable(); 1274 line_partial = (signed char) luacstring_partial(); 1275 if (luacstring_final_line() || line_partial 1276 || line_catcode_table == NO_CAT_TABLE) 1277 inhibit_eol = true; 1278 if (!line_partial) 1279 istate = new_line; 1280 } else { 1281 force_eof = true; 1282 } 1283 } else { 1284 if (lua_input_ln(cur_file, 0, true)) { /* not end of file */ 1285 firm_up_the_line(); /* this sets |ilimit| */ 1286 line_catcode_table = DEFAULT_CAT_TABLE; 1287 } else if ((every_eof != null) && (!eof_seen[iindex])) { 1288 ilimit = first - 1; 1289 eof_seen[iindex] = true; /* fake one empty line */ 1290 begin_token_list(every_eof, every_eof_text); 1291 return next_line_restart; 1292 } else { 1293 force_eof = true; 1294 } 1295 } 1296 } 1297 } 1298 if (force_eof) { 1299 if (tracing_nesting > 0) 1300 if ((grp_stack[in_open] != cur_boundary) 1301 || (if_stack[in_open] != cond_ptr)) 1302 if (!((iname == 19) || (iname == 21))) 1303 file_warning(); /* give warning for some unfinished groups and/or conditionals */ 1304 if ((iname > 21) || (iname == 20)) { 1305 report_stop_file(filetype_tex); 1306 decr(open_parens); 1307#if 0 1308 update_terminal(); /* show user that file has been read */ 1309#endif 1310 } 1311 force_eof = false; 1312 if (iname == 21 || /* lua input */ 1313 iname == 19) { /* \.{\\scantextokens} */ 1314 end_file_reading(); 1315 } else { 1316 end_file_reading(); 1317 check_outer_validity(); 1318 } 1319 return next_line_restart; 1320 } 1321 if (inhibit_eol || end_line_char_inactive) 1322 ilimit--; 1323 else 1324 buffer[ilimit] = (packed_ASCII_code) end_line_char; 1325 first = ilimit + 1; 1326 iloc = istart; /* ready to read */ 1327 } else { 1328 if (!terminal_input) { /* \.{\\read} line has ended */ 1329 cur_cmd = 0; 1330 cur_chr = 0; 1331 return next_line_return; /* OUTER */ 1332 } 1333 if (input_ptr > 0) { /* text was inserted during error recovery */ 1334 end_file_reading(); 1335 return next_line_restart; /* resume previous level */ 1336 } 1337 if (selector < log_only) 1338 open_log_file(); 1339 if (interaction > nonstop_mode) { 1340 if (end_line_char_inactive) 1341 ilimit++; 1342 if (ilimit == istart) { /* previous line was empty */ 1343 tprint_nl("(Please type a command or say `\\end')"); 1344 } 1345 print_ln(); 1346 first = istart; 1347 prompt_input("*"); /* input on-line into |buffer| */ 1348 ilimit = last; 1349 if (end_line_char_inactive) 1350 ilimit--; 1351 else 1352 buffer[ilimit] = (packed_ASCII_code) end_line_char; 1353 first = ilimit + 1; 1354 iloc = istart; 1355 } else { 1356 fatal_error("*** (job aborted, no legal \\end found)"); 1357 /* nonstop mode, which is intended for overnight batch processing, 1358 never waits for on-line input */ 1359 } 1360 } 1361 return next_line_ok; 1362} 1363 1364@ Let's consider now what happens when |get_next| is looking at a token list. 1365 1366@c 1367static boolean get_next_tokenlist(void) 1368{ 1369 register halfword t; /* a token */ 1370 t = token_info(iloc); 1371 iloc = token_link(iloc); /* move to next */ 1372 if (t >= cs_token_flag) { /* a control sequence token */ 1373 cur_cs = t - cs_token_flag; 1374 cur_cmd = eq_type(cur_cs); 1375 if (cur_cmd >= outer_call_cmd) { 1376 if (cur_cmd == dont_expand_cmd) { /* Get the next token, suppressing expansion */ 1377 /* The present point in the program is reached only when the |expand| 1378 routine has inserted a special marker into the input. In this special 1379 case, |token_info(iloc)| is known to be a control sequence token, and |token_link(iloc)=null|. 1380 */ 1381 cur_cs = token_info(iloc) - cs_token_flag; 1382 iloc = null; 1383 cur_cmd = eq_type(cur_cs); 1384 if (cur_cmd > max_command_cmd) { 1385 cur_cmd = relax_cmd; 1386 cur_chr = no_expand_flag; 1387 return true; 1388 } 1389 } else { 1390 check_outer_validity(); 1391 } 1392 } 1393 cur_chr = equiv(cur_cs); 1394 } else { 1395 cur_cmd = token_cmd(t); 1396 cur_chr = token_chr(t); 1397 switch (cur_cmd) { 1398 case left_brace_cmd: 1399 align_state++; 1400 break; 1401 case right_brace_cmd: 1402 align_state--; 1403 break; 1404 case out_param_cmd: /* Insert macro parameter and |goto restart|; */ 1405 begin_token_list(param_stack[param_start + cur_chr - 1], parameter); 1406 return false; 1407 break; 1408 } 1409 } 1410 return true; 1411} 1412 1413@ Now we're ready to take the plunge into |get_next| itself. Parts of 1414 this routine are executed more often than any other instructions of \TeX. 1415 @^mastication@>@^inner loop@> 1416 1417@ sets |cur_cmd|, |cur_chr|, |cur_cs| to next token 1418 1419@c 1420void get_next(void) 1421{ 1422 RESTART: 1423 cur_cs = 0; 1424 if (istate != token_list) { 1425 /* Input from external file, |goto restart| if no input found */ 1426 if (!get_next_file()) 1427 goto RESTART; 1428 } else { 1429 if (iloc == null) { 1430 end_token_list(); 1431 goto RESTART; /* list exhausted, resume previous level */ 1432 } else if (!get_next_tokenlist()) { 1433 goto RESTART; /* parameter needs to be expanded */ 1434 } 1435 } 1436 /* If an alignment entry has just ended, take appropriate action */ 1437 if ((cur_cmd == tab_mark_cmd || cur_cmd == car_ret_cmd) && align_state == 0) { 1438 insert_vj_template(); 1439 goto RESTART; 1440 } 1441} 1442 1443 1444@ Since |get_next| is used so frequently in \TeX, it is convenient 1445to define three related procedures that do a little more: 1446 1447\yskip\hang|get_token| not only sets |cur_cmd| and |cur_chr|, it 1448also sets |cur_tok|, a packed halfword version of the current token. 1449 1450\yskip\hang|get_x_token|, meaning ``get an expanded token,'' is like 1451|get_token|, but if the current token turns out to be a user-defined 1452control sequence (i.e., a macro call), or a conditional, 1453or something like \.{\\topmark} or \.{\\expandafter} or \.{\\csname}, 1454it is eliminated from the input by beginning the expansion of the macro 1455or the evaluation of the conditional. 1456 1457\yskip\hang|x_token| is like |get_x_token| except that it assumes that 1458|get_next| has already been called. 1459 1460\yskip\noindent 1461In fact, these three procedures account for almost every use of |get_next|. 1462 1463No new control sequences will be defined except during a call of 1464|get_token|, or when \.{\\csname} compresses a token list, because 1465|no_new_control_sequence| is always |true| at other times. 1466 1467@c 1468void get_token(void) 1469{ /* sets |cur_cmd|, |cur_chr|, |cur_tok| */ 1470 no_new_control_sequence = false; 1471 get_token_lua(); 1472 no_new_control_sequence = true; 1473 if (cur_cs == 0) 1474 cur_tok = token_val(cur_cmd, cur_chr); 1475 else 1476 cur_tok = cs_token_flag + cur_cs; 1477} 1478 1479@ @c 1480void get_token_lua(void) 1481{ 1482 register int callback_id; 1483 callback_id = callback_defined(token_filter_callback); 1484 if (callback_id > 0) { 1485 while (istate == token_list && iloc == null && iindex != v_template) 1486 end_token_list(); 1487 /* there is some stuff we don't want to see inside the callback */ 1488 if (!(istate == token_list && 1489 ((nofilter == true) || (iindex == backed_up && iloc != null)))) { 1490 do_get_token_lua(callback_id); 1491 return; 1492 } 1493 } 1494 get_next(); 1495} 1496 1497 1498@ changes the string |s| to a token list 1499@c 1500halfword string_to_toks(char *ss) 1501{ 1502 halfword p; /* tail of the token list */ 1503 halfword q; /* new node being added to the token list via |store_new_token| */ 1504 halfword t; /* token being appended */ 1505 char *s = ss, *se = ss + strlen(s); 1506 p = temp_token_head; 1507 set_token_link(p, null); 1508 while (s < se) { 1509 t = (halfword) str2uni((unsigned char *) s); 1510 s += utf8_size(t); 1511 if (t == ' ') 1512 t = space_token; 1513 else 1514 t = other_token + t; 1515 fast_store_new_token(t); 1516 } 1517 return token_link(temp_token_head); 1518} 1519 1520@ The token lists for macros and for other things like \.{\\mark} and \.{\\output} 1521and \.{\\write} are produced by a procedure called |scan_toks|. 1522 1523Before we get into the details of |scan_toks|, let's consider a much 1524simpler task, that of converting the current string into a token list. 1525The |str_toks| function does this; it classifies spaces as type |spacer| 1526and everything else as type |other_char|. 1527 1528The token list created by |str_toks| begins at |link(temp_token_head)| and ends 1529at the value |p| that is returned. (If |p=temp_token_head|, the list is empty.) 1530 1531|lua_str_toks| is almost identical, but it also escapes the three 1532symbols that |lua| considers special while scanning a literal string 1533 1534@c 1535static halfword lua_str_toks(lstring b) 1536{ /* changes the string |str_pool[b..pool_ptr]| to a token list */ 1537 halfword p; /* tail of the token list */ 1538 halfword q; /* new node being added to the token list via |store_new_token| */ 1539 halfword t; /* token being appended */ 1540 unsigned char *k; /* index into string */ 1541 p = temp_token_head; 1542 set_token_link(p, null); 1543 k = (unsigned char *) b.s; 1544 while (k < (unsigned char *) b.s + b.l) { 1545 t = pool_to_unichar(k); 1546 k += utf8_size(t); 1547 if (t == ' ') { 1548 t = space_token; 1549 } else { 1550 if ((t == '\\') || (t == '"') || (t == '\'') || (t == 10) 1551 || (t == 13)) 1552 fast_store_new_token(other_token + '\\'); 1553 if (t == 10) 1554 t = 'n'; 1555 if (t == 13) 1556 t = 'r'; 1557 t = other_token + t; 1558 } 1559 fast_store_new_token(t); 1560 } 1561 return p; 1562} 1563 1564 1565@ Incidentally, the main reason for wanting |str_toks| is the function |the_toks|, 1566which has similar input/output characteristics. 1567 1568@c 1569halfword str_toks(lstring s) 1570{ /* changes the string |str_pool[b..pool_ptr]| to a token list */ 1571 halfword p; /* tail of the token list */ 1572 halfword q; /* new node being added to the token list via |store_new_token| */ 1573 halfword t; /* token being appended */ 1574 unsigned char *k, *l; /* index into string */ 1575 p = temp_token_head; 1576 set_token_link(p, null); 1577 k = s.s; 1578 l = k + s.l; 1579 while (k < l) { 1580 t = pool_to_unichar(k); 1581 k += utf8_size(t); 1582 if (t == ' ') 1583 t = space_token; 1584 else 1585 t = other_token + t; 1586 fast_store_new_token(t); 1587 } 1588 return p; 1589} 1590 1591@ Here's part of the |expand| subroutine that we are now ready to complete: 1592@c 1593void ins_the_toks(void) 1594{ 1595 (void) the_toks(); 1596 ins_list(token_link(temp_token_head)); 1597} 1598 1599@ This routine, used in the next one, prints the job name, possibly 1600modified by the |process_jobname| callback. 1601 1602@c 1603static void print_job_name(void) 1604{ 1605 if (job_name) { 1606 char *s, *ss; /* C strings for jobname before and after processing */ 1607 int callback_id, lua_retval; 1608 s = (char*)str_string(job_name); 1609 callback_id = callback_defined(process_jobname_callback); 1610 if (callback_id > 0) { 1611 lua_retval = run_callback(callback_id, "S->S", s, &ss); 1612 if ((lua_retval == true) && (ss != NULL)) 1613 s = ss; 1614 } 1615 tprint(s); 1616 } else { 1617 print(job_name); 1618 } 1619} 1620 1621@ Here is a routine that print the result of a convert command, using 1622 the argument |i|. It returns |false | if it does not know to print 1623 the code |c|. The function exists because lua code and tex code can 1624 both call it to convert something. 1625 1626@c 1627static boolean print_convert_string(halfword c, int i) 1628{ 1629 int ff; /* for use with |set_ff| */ 1630 boolean ret = true; 1631 switch (c) { 1632 case number_code: 1633 print_int(i); 1634 break; 1635 case uchar_code: 1636 print(i); 1637 break; 1638 case roman_numeral_code: 1639 print_roman_int(i); 1640 break; 1641 case etex_code: 1642 tprint(eTeX_version_string); 1643 break; 1644 case pdftex_revision_code: 1645 tprint(pdftex_revision); 1646 break; 1647 case luatex_revision_code: 1648 print(get_luatexrevision()); 1649 break; 1650 case luatex_date_code: 1651 print_int(get_luatex_date_info()); 1652 break; 1653 case luatex_banner_code: 1654 tprint(luatex_banner); 1655 break; 1656 case uniform_deviate_code: 1657 print_int(unif_rand(i)); 1658 break; 1659 case normal_deviate_code: 1660 print_int(norm_rand()); 1661 break; 1662 case format_name_code: 1663 print(format_name); 1664 break; 1665 case job_name_code: 1666 print_job_name(); 1667 break; 1668 case font_name_code: 1669 append_string((unsigned char *) font_name(i), 1670 (unsigned) strlen(font_name(i))); 1671 if (font_size(i) != font_dsize(i)) { 1672 tprint(" at "); 1673 print_scaled(font_size(i)); 1674 tprint("pt"); 1675 } 1676 break; 1677 case font_id_code: 1678 print_int(i); 1679 break; 1680 case math_style_code: 1681 print_math_style(); 1682 break; 1683 case pdf_font_name_code: 1684 case pdf_font_objnum_code: 1685 set_ff(i); 1686 if (c == pdf_font_name_code) 1687 print_int(obj_info(static_pdf, pdf_font_num(ff))); 1688 else 1689 print_int(pdf_font_num(ff)); 1690 break; 1691 case pdf_font_size_code: 1692 print_scaled(font_size(i)); 1693 tprint("pt"); 1694 break; 1695 case pdf_page_ref_code: 1696 print_int(pdf_get_obj(static_pdf, obj_type_page, i, false)); 1697 break; 1698 case pdf_xform_name_code: 1699 print_int(obj_info(static_pdf, i)); 1700 break; 1701 case eTeX_revision_code: 1702 tprint(eTeX_revision); 1703 break; 1704 default: 1705 ret = false; 1706 break; 1707 } 1708 return ret; 1709} 1710 1711@ @c 1712int scan_lua_state(void) /* hh-ls: optional name or number (not optional name optional number) */ 1713{ 1714 /* Parse optional lua state integer, or an instance name to be stored in |sn| */ 1715 /* Get the next non-blank non-relax non-call token */ 1716 int sn = 0; 1717 do { 1718 get_x_token(); 1719 } while ((cur_cmd == spacer_cmd) || (cur_cmd == relax_cmd)); 1720 back_input(); /* have to push it back, whatever it is */ 1721 if (cur_cmd != left_brace_cmd) { 1722 if (scan_keyword("name")) { 1723 (void) scan_toks(false, true); 1724 sn = def_ref; 1725 } else { 1726 scan_register_num(); 1727 if (get_lua_name(cur_val)) 1728 sn = (cur_val - 65536); 1729 } 1730 } 1731 return sn; 1732} 1733 1734 1735 1736@ The procedure |conv_toks| uses |str_toks| to insert the token list 1737for |convert| functions into the scanner; `\.{\\outer}' control sequences 1738are allowed to follow `\.{\\string}' and `\.{\\meaning}'. 1739 1740The extra temp string |u| is needed because |pdf_scan_ext_toks| incorporates 1741any pending string in its output. In order to save such a pending string, 1742we have to create a temporary string that is destroyed immediately after. 1743 1744@c 1745void conv_toks(void) 1746{ 1747 int old_setting; /* holds |selector| setting */ 1748 halfword p, q; 1749 int save_scanner_status; /* |scanner_status| upon entry */ 1750 halfword save_def_ref; /* |def_ref| upon entry, important if inside `\.{\\message}' */ 1751 halfword save_warning_index; 1752 boolean bool; /* temp boolean */ 1753 str_number s; /* first temp string */ 1754 int sn; /* lua chunk name */ 1755 str_number u = 0; /* third temp string, will become non-nil if a string is already being built */ 1756 int i = 0; /* first temp integer */ 1757 int j = 0; /* second temp integer */ 1758 int c = cur_chr; /* desired type of conversion */ 1759 str_number str; 1760 /* Scan the argument for command |c| */ 1761 switch (c) { 1762 case uchar_code: 1763 scan_char_num(); 1764 break; 1765 case number_code: 1766 case roman_numeral_code: 1767 scan_int(); 1768 break; 1769 case string_code: 1770 case meaning_code: 1771 save_scanner_status = scanner_status; 1772 scanner_status = normal; 1773 get_token(); 1774 scanner_status = save_scanner_status; 1775 break; 1776 case etex_code: 1777 break; 1778 case font_name_code: 1779 case font_id_code: 1780 scan_font_ident(); 1781 break; 1782 case pdftex_revision_code: 1783 case luatex_revision_code: 1784 case luatex_date_code: 1785 case luatex_banner_code: 1786 break; 1787 case pdf_font_name_code: 1788 case pdf_font_objnum_code: 1789 case pdf_font_size_code: 1790 scan_font_ident(); 1791 if (cur_val == null_font) 1792 pdf_error("font", "invalid font identifier"); 1793 if (c != pdf_font_size_code) { 1794 pdf_check_vf(cur_val); 1795 if (!font_used(cur_val)) 1796 pdf_init_font(static_pdf, cur_val); 1797 } 1798 break; 1799 case pdf_page_ref_code: 1800 scan_int(); 1801 if (cur_val <= 0) 1802 pdf_error("pageref", "invalid page number"); 1803 break; 1804 case left_margin_kern_code: 1805 case right_margin_kern_code: 1806 scan_int(); 1807 if ((box(cur_val) == null) || (type(box(cur_val)) != hlist_node)) 1808 pdf_error("marginkern", "a non-empty hbox expected"); 1809 break; 1810 case pdf_xform_name_code: 1811 scan_int(); 1812 check_obj_type(static_pdf, obj_type_xform, cur_val); 1813 break; 1814 case pdf_creation_date_code: 1815 ins_list(string_to_toks(getcreationdate(static_pdf))); 1816 return; 1817 break; 1818 case format_name_code: 1819 case job_name_code: 1820 if (job_name == 0) 1821 open_log_file(); 1822 break; 1823 case pdf_colorstack_init_code: 1824 bool = scan_keyword("page"); 1825 if (scan_keyword("direct")) 1826 cur_val = direct_always; 1827 else if (scan_keyword("page")) 1828 cur_val = direct_page; 1829 else 1830 cur_val = set_origin; 1831 save_scanner_status = scanner_status; 1832 save_warning_index = warning_index; 1833 save_def_ref = def_ref; 1834 u = save_cur_string(); 1835 scan_toks(false, true); /*hh-ls was scan_pdf_ext_toks();*/ 1836 s = tokens_to_string(def_ref); 1837 delete_token_ref(def_ref); 1838 def_ref = save_def_ref; 1839 warning_index = save_warning_index; 1840 scanner_status = save_scanner_status; 1841 cur_val = newcolorstack(s, cur_val, bool); 1842 flush_str(s); 1843 cur_val_level = int_val_level; 1844 if (cur_val < 0) { 1845 print_err("Too many color stacks"); 1846 help2("The number of color stacks is limited to 32768.", 1847 "I'll use the default color stack 0 here."); 1848 error(); 1849 cur_val = 0; 1850 restore_cur_string(u); 1851 } 1852 break; 1853 case uniform_deviate_code: 1854 scan_int(); 1855 break; 1856 case normal_deviate_code: 1857 break; 1858 case lua_escape_string_code: 1859 { 1860 lstring escstr; 1861 int l = 0; 1862 save_scanner_status = scanner_status; 1863 save_def_ref = def_ref; 1864 save_warning_index = warning_index; 1865 scan_toks(false, true); /*hh-ls was scan_pdf_ext_toks();*/ 1866 bool = in_lua_escape; 1867 in_lua_escape = true; 1868 escstr.s = (unsigned char *) tokenlist_to_cstring(def_ref, false, &l); 1869 escstr.l = (unsigned) l; 1870 in_lua_escape = bool; 1871 delete_token_ref(def_ref); 1872 def_ref = save_def_ref; 1873 warning_index = save_warning_index; 1874 scanner_status = save_scanner_status; 1875 (void) lua_str_toks(escstr); 1876 ins_list(token_link(temp_token_head)); 1877 free(escstr.s); 1878 return; 1879 } 1880 break; 1881 case math_style_code: 1882 break; 1883 case expanded_code: 1884 save_scanner_status = scanner_status; 1885 save_warning_index = warning_index; 1886 save_def_ref = def_ref; 1887 u = save_cur_string(); 1888 scan_toks(false, true); /*hh-ls was scan_pdf_ext_toks();*/ 1889 warning_index = save_warning_index; 1890 scanner_status = save_scanner_status; 1891 ins_list(token_link(def_ref)); 1892 def_ref = save_def_ref; 1893 restore_cur_string(u); 1894 return; 1895 break; 1896 case lua_code: 1897 u = save_cur_string(); 1898 save_scanner_status = scanner_status; 1899 save_def_ref = def_ref; 1900 save_warning_index = warning_index; 1901 sn = scan_lua_state(); 1902 scan_toks(false, true); /*hh-ls was scan_pdf_ext_toks();*/ 1903 s = def_ref; 1904 warning_index = save_warning_index; 1905 def_ref = save_def_ref; 1906 scanner_status = save_scanner_status; 1907 luacstrings = 0; 1908 luatokencall(s, sn); 1909 delete_token_ref(s); 1910 restore_cur_string(u); /* TODO: check this, was different */ 1911 if (luacstrings > 0) 1912 lua_string_start(); 1913 return; 1914 break; 1915 case lua_function_code: 1916 scan_int(); 1917 if (cur_val <= 0) { 1918 pdf_error("luafunction", "invalid number"); 1919 } else { 1920 u = save_cur_string(); 1921 luacstrings = 0; 1922 luafunctioncall(cur_val); 1923 restore_cur_string(u); 1924 if (luacstrings > 0) 1925 lua_string_start(); 1926 } 1927 return; 1928 break; 1929 case pdf_insert_ht_code: 1930 scan_register_num(); 1931 break; 1932 case pdf_ximage_bbox_code: 1933 scan_int(); 1934 check_obj_type(static_pdf, obj_type_ximage, cur_val); 1935 i = obj_data_ptr(static_pdf, cur_val); 1936 scan_int(); 1937 j = cur_val; 1938 if ((j < 1) || (j > 4)) 1939 pdf_error("pdfximagebbox", "invalid parameter"); 1940 break; 1941 /* Cases of 'Scan the argument for command |c|' */ 1942 case eTeX_revision_code: 1943 break; 1944 default: 1945 confusion("convert"); 1946 break; 1947 } 1948 1949 old_setting = selector; 1950 selector = new_string; 1951 1952 /* Print the result of command |c| */ 1953 if (!print_convert_string(c, cur_val)) { 1954 switch (c) { 1955 case string_code: 1956 if (cur_cs != 0) 1957 sprint_cs(cur_cs); 1958 else 1959 print(cur_chr); 1960 break; 1961 case meaning_code: 1962 print_meaning(); 1963 break; 1964 case left_margin_kern_code: 1965 p = list_ptr(box(cur_val)); 1966 if ((p != null) && (!is_char_node(p)) && 1967 (type(p) == glue_node) && (subtype(p) == left_skip_code + 1)) 1968 p = vlink(p); 1969 if ((p != null) && (!is_char_node(p)) && 1970 (type(p) == margin_kern_node) && (subtype(p) == left_side)) 1971 print_scaled(width(p)); 1972 else 1973 print_char('0'); 1974 tprint("pt"); 1975 break; 1976 case right_margin_kern_code: 1977 q = list_ptr(box(cur_val)); 1978 p = null; 1979 if (q != null) { 1980 p = prev_rightmost(q, null); 1981 if ((p != null) && (!is_char_node(p)) && (type(p) == glue_node) 1982 && (subtype(p) == right_skip_code + 1)) 1983 p = prev_rightmost(q, p); 1984 } 1985 if ((p != null) && (!is_char_node(p)) && 1986 (type(p) == margin_kern_node) && (subtype(p) == right_side)) 1987 print_scaled(width(p)); 1988 else 1989 print_char('0'); 1990 tprint("pt"); 1991 break; 1992 case pdf_colorstack_init_code: 1993 print_int(cur_val); 1994 break; 1995 case pdf_insert_ht_code: 1996 i = cur_val; 1997 p = page_ins_head; 1998 while (i >= subtype(vlink(p))) 1999 p = vlink(p); 2000 if (subtype(p) == i) 2001 print_scaled(height(p)); 2002 else 2003 print_char('0'); 2004 tprint("pt"); 2005 break; 2006 case pdf_ximage_bbox_code: 2007 if (is_pdf_image(i)) { 2008 switch (j) { 2009 case 1: 2010 print_scaled(epdf_orig_x(i)); 2011 break; 2012 case 2: 2013 print_scaled(epdf_orig_y(i)); 2014 break; 2015 case 3: 2016 print_scaled(epdf_orig_x(i) + epdf_xsize(i)); 2017 break; 2018 case 4: 2019 print_scaled(epdf_orig_y(i) + epdf_ysize(i)); 2020 break; 2021 } 2022 } else { 2023 print_scaled(0); 2024 } 2025 tprint("pt"); 2026 break; 2027 case pdf_creation_date_code: 2028 case lua_escape_string_code: 2029 case lua_code: 2030 case lua_function_code: 2031 case expanded_code: 2032 break; 2033 default: 2034 confusion("convert"); 2035 break; 2036 } 2037 } 2038 2039 selector = old_setting; 2040 str = make_string(); 2041 (void) str_toks(str_lstring(str)); 2042 flush_str(str); 2043 ins_list(token_link(temp_token_head)); 2044} 2045 2046@ This boolean is keeping track of the lua string escape state 2047@c 2048boolean in_lua_escape; 2049 2050@ probably not needed anymore 2051@c 2052boolean is_convert(halfword c) 2053{ 2054 return (c == convert_cmd); 2055} 2056 2057str_number the_convert_string(halfword c, int i) 2058{ 2059 int old_setting; /* saved |selector| setting */ 2060 str_number ret = 0; 2061 old_setting = selector; 2062 selector = new_string; 2063 if (print_convert_string(c, i)) { 2064 ret = make_string(); 2065 } else if (c == font_identifier_code) { 2066 print_font_identifier(i); 2067 ret = make_string(); 2068 } 2069 selector = old_setting; 2070 return ret; 2071} 2072 2073@ Another way to create a token list is via the \.{\\read} command. The 2074sixteen files potentially usable for reading appear in the following 2075global variables. The value of |read_open[n]| will be |closed| if 2076stream number |n| has not been opened or if it has been fully read; 2077|just_open| if an \.{\\openin} but not a \.{\\read} has been done; 2078and |normal| if it is open and ready to read the next line. 2079 2080@c 2081FILE *read_file[16]; /* used for \.{\\read} */ 2082int read_open[17]; /* state of |read_file[n]| */ 2083 2084void initialize_read(void) 2085{ 2086 int k; 2087 for (k = 0; k <= 16; k++) 2088 read_open[k] = closed; 2089} 2090 2091@ The |read_toks| procedure constructs a token list like that for any 2092macro definition, and makes |cur_val| point to it. Parameter |r| points 2093to the control sequence that will receive this token list. 2094 2095@c 2096void read_toks(int n, halfword r, halfword j) 2097{ 2098 halfword p; /* tail of the token list */ 2099 halfword q; /* new node being added to the token list via |store_new_token| */ 2100 int s; /* saved value of |align_state| */ 2101 int m; /* stream number */ 2102 scanner_status = defining; 2103 warning_index = r; 2104 p = get_avail(); 2105 def_ref = p; 2106 set_token_ref_count(def_ref, 0); 2107 p = def_ref; /* the reference count */ 2108 store_new_token(end_match_token); 2109 if ((n < 0) || (n > 15)) 2110 m = 16; 2111 else 2112 m = n; 2113 s = align_state; 2114 align_state = 1000000; /* disable tab marks, etc. */ 2115 do { 2116 /* Input and store tokens from the next line of the file */ 2117 begin_file_reading(); 2118 iname = m + 1; 2119 if (read_open[m] == closed) { 2120 /* Input for \.{\\read} from the terminal */ 2121 /* Here we input on-line into the |buffer| array, prompting the user explicitly 2122 if |n>=0|. The value of |n| is set negative so that additional prompts 2123 will not be given in the case of multi-line input. */ 2124 if (interaction > nonstop_mode) { 2125 if (n < 0) { 2126 prompt_input(""); 2127 } else { 2128 wake_up_terminal(); 2129 print_ln(); 2130 sprint_cs(r); 2131 prompt_input(" ="); 2132 n = -1; 2133 } 2134 } else { 2135 fatal_error 2136 ("*** (cannot \\read from terminal in nonstop modes)"); 2137 } 2138 2139 } else if (read_open[m] == just_open) { 2140 /* Input the first line of |read_file[m]| */ 2141 /* The first line of a file must be treated specially, since |lua_input_ln| 2142 must be told not to start with |get|. */ 2143 if (lua_input_ln(read_file[m], (m + 1), false)) { 2144 read_open[m] = normal; 2145 } else { 2146 lua_a_close_in(read_file[m], (m + 1)); 2147 read_open[m] = closed; 2148 } 2149 2150 } else { 2151 /* Input the next line of |read_file[m]| */ 2152 /* An empty line is appended at the end of a |read_file|. */ 2153 if (!lua_input_ln(read_file[m], (m + 1), true)) { 2154 lua_a_close_in(read_file[m], (m + 1)); 2155 read_open[m] = closed; 2156 if (align_state != 1000000) { 2157 runaway(); 2158 print_err("File ended within \\read"); 2159 help1("This \\read has unbalanced braces."); 2160 align_state = 1000000; 2161 error(); 2162 } 2163 } 2164 2165 } 2166 ilimit = last; 2167 if (end_line_char_inactive) 2168 decr(ilimit); 2169 else 2170 buffer[ilimit] = (packed_ASCII_code) int_par(end_line_char_code); 2171 first = ilimit + 1; 2172 iloc = istart; 2173 istate = new_line; 2174 /* Handle \.{\\readline} and |goto done|; */ 2175 if (j == 1) { 2176 while (iloc <= ilimit) { /* current line not yet finished */ 2177 do_buffer_to_unichar(cur_chr, iloc); 2178 if (cur_chr == ' ') 2179 cur_tok = space_token; 2180 else 2181 cur_tok = cur_chr + other_token; 2182 store_new_token(cur_tok); 2183 } 2184 } else { 2185 while (1) { 2186 get_token(); 2187 if (cur_tok == 0) 2188 break; /* |cur_cmd=cur_chr=0| will occur at the end of the line */ 2189 if (align_state < 1000000) { /* unmatched `\.\}' aborts the line */ 2190 do { 2191 get_token(); 2192 } while (cur_tok != 0); 2193 align_state = 1000000; 2194 break; 2195 } 2196 store_new_token(cur_tok); 2197 } 2198 } 2199 end_file_reading(); 2200 2201 } while (align_state != 1000000); 2202 cur_val = def_ref; 2203 scanner_status = normal; 2204 align_state = s; 2205} 2206 2207@ @c 2208str_number tokens_to_string(halfword p) 2209{ /* return a string from tokens list */ 2210 int old_setting; 2211 if (selector == new_string) 2212 pdf_error("tokens", 2213 "tokens_to_string() called while selector = new_string"); 2214 old_setting = selector; 2215 selector = new_string; 2216 show_token_list(token_link(p), null, -1); 2217 selector = old_setting; 2218 return make_string(); 2219} 2220 2221@ @c 2222#define make_room(a) \ 2223 if ((unsigned)i+a+1>alloci) { \ 2224 ret = xrealloc(ret,(alloci+64)); \ 2225 alloci = alloci + 64; \ 2226 } 2227 2228 2229#define append_i_byte(a) ret[i++] = (char)(a) 2230 2231#define Print_char(a) make_room(1); append_i_byte(a) 2232 2233#define Print_uchar(s) { \ 2234 make_room(4); \ 2235 if (s<=0x7F) { \ 2236 append_i_byte(s); \ 2237 } else if (s<=0x7FF) { \ 2238 append_i_byte(0xC0 + (s / 0x40)); \ 2239 append_i_byte(0x80 + (s % 0x40)); \ 2240 } else if (s<=0xFFFF) { \ 2241 append_i_byte(0xE0 + (s / 0x1000)); \ 2242 append_i_byte(0x80 + ((s % 0x1000) / 0x40)); \ 2243 append_i_byte(0x80 + ((s % 0x1000) % 0x40)); \ 2244 } else if (s>=0x110000) { \ 2245 append_i_byte(s-0x11000); \ 2246 } else { \ 2247 append_i_byte(0xF0 + (s / 0x40000)); \ 2248 append_i_byte(0x80 + ((s % 0x40000) / 0x1000)); \ 2249 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); \ 2250 append_i_byte(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); \ 2251 } } 2252 2253 2254#define Print_esc(b) { \ 2255 const char *v = b; \ 2256 if (e>0 && e<STRING_OFFSET) { \ 2257 Print_uchar (e); \ 2258 } \ 2259 make_room(strlen(v)); \ 2260 while (*v) { append_i_byte(*v); v++; } \ 2261 } 2262 2263#define is_cat_letter(a) \ 2264 (get_char_cat_code(pool_to_unichar(str_string((a)))) == 11) 2265 2266@ the actual token conversion in this function is now functionally 2267 equivalent to |show_token_list|, except that it always prints the 2268 whole token list. 2269 TODO: check whether this causes problems in the lua library. 2270 2271@c 2272char *tokenlist_to_cstring(int pp, int inhibit_par, int *siz) 2273{ 2274 register int p, c, m; 2275 int q; 2276 int infop; 2277 char *s, *sh; 2278 int e = 0; 2279 char *ret; 2280 int match_chr = '#'; 2281 int n = '0'; 2282 unsigned alloci = 1024; 2283 int i = 0; 2284 p = pp; 2285 if (p == null) { 2286 if (siz != NULL) 2287 *siz = 0; 2288 return NULL; 2289 } 2290 ret = xmalloc(alloci); 2291 p = token_link(p); /* skip refcount */ 2292 if (p != null) { 2293 e = int_par(escape_char_code); 2294 } 2295 while (p != null) { 2296 if (p < (int) fix_mem_min || p > (int) fix_mem_end) { 2297 Print_esc("CLOBBERED."); 2298 break; 2299 } 2300 infop = token_info(p); 2301 if (infop >= cs_token_flag) { 2302 if (!(inhibit_par && infop == par_token)) { 2303 q = infop - cs_token_flag; 2304 if (q < hash_base) { 2305 if (q == null_cs) { 2306 Print_esc("csname"); 2307 Print_esc("endcsname"); 2308 } else { 2309 Print_esc("IMPOSSIBLE."); 2310 } 2311 } else if ((q >= undefined_control_sequence) 2312 && ((q <= eqtb_size) 2313 || (q > eqtb_size + hash_extra))) { 2314 Print_esc("IMPOSSIBLE."); 2315 } else if ((cs_text(q) < 0) || (cs_text(q) >= str_ptr)) { 2316 Print_esc("NONEXISTENT."); 2317 } else { 2318 str_number txt = cs_text(q); 2319 sh = makecstring(txt); 2320 s = sh; 2321 if (is_active_cs(txt)) { 2322 s = s + 3; 2323 while (*s) { 2324 Print_char(*s); 2325 s++; 2326 } 2327 } else { 2328 if (e>=0 && e<0x110000) Print_uchar(e); 2329 while (*s) { 2330 Print_char(*s); 2331 s++; 2332 } 2333 if ((!single_letter(txt)) || is_cat_letter(txt)) { 2334 Print_char(' '); 2335 } 2336 } 2337 free(sh); 2338 } 2339 } 2340 } else { 2341 if (infop < 0) { 2342 Print_esc("BAD."); 2343 } else { 2344 m = token_cmd(infop); 2345 c = token_chr(infop); 2346 switch (m) { 2347 case left_brace_cmd: 2348 case right_brace_cmd: 2349 case math_shift_cmd: 2350 case tab_mark_cmd: 2351 case sup_mark_cmd: 2352 case sub_mark_cmd: 2353 case spacer_cmd: 2354 case letter_cmd: 2355 case other_char_cmd: 2356 Print_uchar(c); 2357 break; 2358 case mac_param_cmd: 2359 if (!in_lua_escape) 2360 Print_uchar(c); 2361 Print_uchar(c); 2362 break; 2363 case out_param_cmd: 2364 Print_uchar(match_chr); 2365 if (c <= 9) { 2366 Print_char(c + '0'); 2367 } else { 2368 Print_char('!'); 2369 goto EXIT; 2370 } 2371 break; 2372 case match_cmd: 2373 match_chr = c; 2374 Print_uchar(c); 2375 n++; 2376 Print_char(n); 2377 if (n > '9') 2378 goto EXIT; 2379 break; 2380 case end_match_cmd: 2381 if (c == 0) { 2382 Print_char('-'); 2383 Print_char('>'); 2384 } 2385 break; 2386 default: 2387 Print_esc("BAD."); 2388 break; 2389 } 2390 } 2391 } 2392 p = token_link(p); 2393 } 2394 EXIT: 2395 ret[i] = '\0'; 2396 if (siz != NULL) 2397 *siz = i; 2398 return ret; 2399} 2400 2401@ @c 2402lstring *tokenlist_to_lstring(int pp, int inhibit_par) 2403{ 2404 int siz; 2405 lstring *ret = xmalloc(sizeof(lstring)); 2406 ret->s = (unsigned char *) tokenlist_to_cstring(pp, inhibit_par, &siz); 2407 ret->l = (size_t) siz; 2408 return ret; 2409} 2410 2411@ @c 2412void free_lstring(lstring * ls) 2413{ 2414 if (ls == NULL) 2415 return; 2416 if (ls->s != NULL) 2417 free(ls->s); 2418 free(ls); 2419} 2420