1% This file is part of CWEB. 2% This program by Silvio Levy and Donald E. Knuth 3% is based on a program by Knuth. 4% It is distributed WITHOUT ANY WARRANTY, express or implied. 5% Version 3.64 --- February 2002 6% (same as Version 3.5 except for minor corrections) 7% (also quotes backslashes in file names of #line directives) 8 9% Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth 10 11% Permission is granted to make and distribute verbatim copies of this 12% document provided that the copyright notice and this permission notice 13% are preserved on all copies. 14 15% Permission is granted to copy and distribute modified versions of this 16% document under the conditions for verbatim copying, provided that the 17% entire resulting derived work is given a different name and distributed 18% under the terms of a permission notice identical to this one. 19 20% Here is TeX material that gets inserted after \input cwebmac 21\def\hang{\hangindent 3em\indent\ignorespaces} 22\def\pb{$\.|\ldots\.|$} % C brackets (|...|) 23\def\v{\char'174} % vertical (|) in typewriter font 24\mathchardef\RA="3221 % right arrow 25\mathchardef\BA="3224 % double arrow 26 27\def\title{CTANGLE (Version 3.64)} 28\def\topofcontents{\null\vfill 29 \centerline{\titlefont The {\ttitlefont CTANGLE} processor} 30 \vskip 15pt 31 \centerline{(Version 3.64)} 32 \vfill} 33\def\botofcontents{\vfill 34\noindent 35Copyright \copyright\ 1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth 36\bigskip\noindent 37Permission is granted to make and distribute verbatim copies of this 38document provided that the copyright notice and this permission notice 39are preserved on all copies. 40 41\smallskip\noindent 42Permission is granted to copy and distribute modified versions of this 43document under the conditions for verbatim copying, provided that the 44entire resulting derived work is given a different name and distributed 45under the terms of a permission notice identical to this one. 46} 47\pageno=\contentspagenumber \advance\pageno by 1 48\let\maybe=\iftrue 49@s not_eq normal @q unreserve a C++ keyword @> 50 51@** Introduction. 52This is the \.{CTANGLE} program by Silvio Levy and Donald E. Knuth, 53based on \.{TANGLE} by Knuth. 54We are thankful to 55Nelson Beebe, Hans-Hermann Bode (to whom the \CPLUSPLUS/ adaptation is due), 56Klaus Guntermann, Norman Ramsey, Tomas Rokicki, Joachim Schnitter, 57Joachim Schrod, Lee Wittenberg, and others who have contributed improvements. 58 59The ``banner line'' defined here should be changed whenever \.{CTANGLE} 60is modified. 61 62@d banner "This is CTANGLE (Version 3.64)\n" 63 64@c 65@<Include files@>@/ 66@h 67@<Common code for \.{CWEAVE} and \.{CTANGLE}@>@/ 68@<Typedef declarations@>@/ 69@<Global variables@>@/ 70@<Predeclaration of procedures@>@/ 71 72@ We predeclare several standard system functions here instead of including 73their system header files, because the names of the header files are not as 74standard as the names of the functions. (For example, some \CEE/ environments 75have \.{<string.h>} where others have \.{<strings.h>}.) 76 77@<Predecl...@>= 78extern int strlen(); /* length of string */ 79extern int strcmp(); /* compare strings lexicographically */ 80extern char* strcpy(); /* copy one string to another */ 81extern int strncmp(); /* compare up to $n$ string characters */ 82extern char* strncpy(); /* copy up to $n$ string characters */ 83 84@ \.{CTANGLE} has a fairly straightforward outline. It operates in 85two phases: First it reads the source file, saving the \CEE/ code in 86compressed form; then it shuffles and outputs the code. 87 88Please read the documentation for \.{common}, the set of routines common 89to \.{CTANGLE} and \.{CWEAVE}, before proceeding further. 90 91@c 92int main (ac, av) 93int ac; 94char **av; 95{ 96 argc=ac; argv=av; 97 program=ctangle; 98 @<Set initial values@>; 99 common_init(); 100 if (show_banner) printf(banner); /* print a ``banner line'' */ 101 phase_one(); /* read all the user's text and compress it into |tok_mem| */ 102 phase_two(); /* output the contents of the compressed tables */ 103 return wrap_up(); /* and exit gracefully */ 104} 105 106@ The following parameters were sufficient in the original \.{TANGLE} to 107handle \TEX/, 108so they should be sufficient for most applications of \.{CTANGLE}. 109If you change |max_bytes|, |max_names|, or |hash_size| you should also 110change them in the file |"common.w"|. 111 112@d max_bytes 90000 /* the number of bytes in identifiers, 113 index entries, and section names; used in |"common.w"| */ 114@d max_toks 270000 /* number of bytes in compressed \CEE/ code */ 115@d max_names 4000 /* number of identifiers, strings, section names; 116 must be less than 10240; used in |"common.w"| */ 117@d max_texts 2500 /* number of replacement texts, must be less than 10240 */ 118@d hash_size 353 /* should be prime; used in |"common.w"| */ 119@d longest_name 10000 /* section names shouldn't be longer than this */ 120@d stack_size 50 /* number of simultaneous levels of macro expansion */ 121@d buf_size 100 /* for \.{CWEAVE} and \.{CTANGLE} */ 122 123@ The next few sections contain stuff from the file |"common.w"| that must 124be included in both |"ctangle.w"| and |"cweave.w"|. It appears in 125file |"common.h"|, which needs to be updated when |"common.w"| changes. 126 127@i common.h 128 129@* Data structures exclusive to {\tt CTANGLE}. 130We've already seen that the |byte_mem| array holds the names of identifiers, 131strings, and sections; 132the |tok_mem| array holds the replacement texts 133for sections. Allocation is sequential, since things are deleted only 134during Phase II, and only in a last-in-first-out manner. 135 136A \&{text} variable is a structure containing a pointer into 137|tok_mem|, which tells where the corresponding text starts, and an 138integer |text_link|, which, as we shall see later, is used to connect 139pieces of text that have the same name. All the \&{text}s are stored in 140the array |text_info|, and we use a |text_pointer| variable to refer 141to them. 142 143The first position of |tok_mem| that is unoccupied by 144replacement text is called |tok_ptr|, and the first unused location of 145|text_info| is called |text_ptr|. Thus we usually have the identity 146|text_ptr->tok_start==tok_ptr|. 147 148If your machine does not support |unsigned char| you should change 149the definition of \&{eight\_bits} to |unsigned short|. 150@^system dependencies@> 151 152@<Typed...@>= 153typedef struct { 154 eight_bits *tok_start; /* pointer into |tok_mem| */ 155 sixteen_bits text_link; /* relates replacement texts */ 156} text; 157typedef text *text_pointer; 158 159@ @<Glob...@>= 160text text_info[max_texts]; 161text_pointer text_info_end=text_info+max_texts-1; 162text_pointer text_ptr; /* first unused position in |text_info| */ 163eight_bits tok_mem[max_toks]; 164eight_bits *tok_mem_end=tok_mem+max_toks-1; 165eight_bits *tok_ptr; /* first unused position in |tok_mem| */ 166 167@ @<Set init...@>= 168text_info->tok_start=tok_ptr=tok_mem; 169text_ptr=text_info+1; text_ptr->tok_start=tok_mem; 170 /* this makes replacement text 0 of length zero */ 171 172@ If |p| is a pointer to a section name, |p->equiv| is a pointer to its 173replacement text, an element of the array |text_info|. 174 175@d equiv equiv_or_xref /* info corresponding to names */ 176 177@ @<Set init...@>= 178name_dir->equiv=(char *)text_info; /* the undefined section has no replacement text */ 179 180@ Here's the procedure that decides whether a name of length |l| 181starting at position |first| equals the identifier pointed to by |p|: 182 183@c 184int names_match(p,first,l) 185name_pointer p; /* points to the proposed match */ 186char *first; /* position of first character of string */ 187int l; /* length of identifier */ 188{ 189 if (length(p)!=l) return 0; 190 return !strncmp(first,p->byte_start,l); 191} 192 193@ The common lookup routine refers to separate routines |init_node| and 194|init_p| when the data structure grows. Actually |init_p| is called only by 195\.{CWEAVE}, but we need to declare a dummy version so that 196the loader won't complain of its absence. 197 198@c 199void 200init_node(node) 201name_pointer node; 202{ 203 node->equiv=(char *)text_info; 204} 205void 206init_p() {} 207 208@* Tokens. 209Replacement texts, which represent \CEE/ code in a compressed format, 210appear in |tok_mem| as mentioned above. The codes in 211these texts are called `tokens'; some tokens occupy two consecutive 212eight-bit byte positions, and the others take just one byte. 213 214If $p$ points to a replacement text, |p->tok_start| is the |tok_mem| position 215of the first eight-bit code of that text. If |p->text_link==0|, 216this is the replacement text for a macro, otherwise it is the replacement 217text for a section. In the latter case |p->text_link| is either equal to 218|section_flag|, which means that there is no further text for this section, or 219|p->text_link| points to a continuation of this replacement text; such 220links are created when several sections have \CEE/ texts with the same 221name, and they also tie together all the \CEE/ texts of unnamed sections. 222The replacement text pointer for the first unnamed section appears in 223|text_info->text_link|, and the most recent such pointer is |last_unnamed|. 224 225@d section_flag max_texts /* final |text_link| in section replacement texts */ 226 227@<Glob...@>= 228text_pointer last_unnamed; /* most recent replacement text of unnamed section */ 229 230@ @<Set init...@>= last_unnamed=text_info; text_info->text_link=0; 231 232@ If the first byte of a token is less than |0200|, the token occupies a 233single byte. Otherwise we make a sixteen-bit token by combining two consecutive 234bytes |a| and |b|. If |0200<=a<0250|, then |(a-0200)@t${}\times2^8$@>+b| 235points to an identifier; if |0250<=a<0320|, then 236|(a-0250)@t${}\times2^8$@>+b| points to a section name 237(or, if it has the special value |output_defs_flag|, 238to the area where the preprocessor definitions are stored); and if 239|0320<=a<0400|, then |(a-0320)@t${}\times2^8$@>+b| is the number of the section 240in which the current replacement text appears. 241 242Codes less than |0200| are 7-bit |char| codes that represent themselves. 243Some of the 7-bit codes will not be present, however, so we can 244use them for special purposes. The following symbolic names are used: 245 246\yskip \hang |join| denotes the concatenation of adjacent items with no 247space or line breaks allowed between them (the \.{@@\&} operation of \.{CWEB}). 248 249\hang |string| denotes the beginning or end of a string, verbatim 250construction or numerical constant. 251@^ASCII code dependencies@> 252 253@d string 02 /* takes the place of extended ASCII \.{\char2} */ 254@d join 0177 /* takes the place of ASCII delete */ 255@d output_defs_flag (2*024000-1) 256 257@ The following procedure is used to enter a two-byte value into 258|tok_mem| when a replacement text is being generated. 259 260@c 261void 262store_two_bytes(x) 263sixteen_bits x; 264{ 265 if (tok_ptr+2>tok_mem_end) overflow("token"); 266 *tok_ptr++=x>>8; /* store high byte */ 267 *tok_ptr++=x&0377; /* store low byte */ 268} 269 270@** Stacks for output. The output process uses a stack to keep track 271of what is going on at different ``levels'' as the sections are being 272written out. Entries on this stack have five parts: 273 274\yskip\hang |end_field| is the |tok_mem| location where the replacement 275text of a particular level will end; 276 277\hang |byte_field| is the |tok_mem| location from which the next token 278on a particular level will be read; 279 280\hang |name_field| points to the name corresponding to a particular level; 281 282\hang |repl_field| points to the replacement text currently being read 283at a particular level; 284 285\hang |section_field| is the section number, or zero if this is a macro. 286 287\yskip\noindent The current values of these five quantities are referred to 288quite frequently, so they are stored in a separate place instead of in 289the |stack| array. We call the current values |cur_end|, |cur_byte|, 290|cur_name|, |cur_repl|, and |cur_section|. 291 292The global variable |stack_ptr| tells how many levels of output are 293currently in progress. The end of all output occurs when the stack is 294empty, i.e., when |stack_ptr==stack|. 295 296@<Typed...@>= 297typedef struct { 298 eight_bits *end_field; /* ending location of replacement text */ 299 eight_bits *byte_field; /* present location within replacement text */ 300 name_pointer name_field; /* |byte_start| index for text being output */ 301 text_pointer repl_field; /* |tok_start| index for text being output */ 302 sixteen_bits section_field; /* section number or zero if not a section */ 303} output_state; 304typedef output_state *stack_pointer; 305 306@ @d cur_end cur_state.end_field /* current ending location in |tok_mem| */ 307@d cur_byte cur_state.byte_field /* location of next output byte in |tok_mem|*/ 308@d cur_name cur_state.name_field /* pointer to current name being expanded */ 309@d cur_repl cur_state.repl_field /* pointer to current replacement text */ 310@d cur_section cur_state.section_field /* current section number being expanded */ 311 312@<Global...@>= 313output_state cur_state; /* |cur_end|, |cur_byte|, |cur_name|, |cur_repl|, 314 and |cur_section| */ 315output_state stack[stack_size+1]; /* info for non-current levels */ 316stack_pointer stack_ptr; /* first unused location in the output state stack */ 317stack_pointer stack_end=stack+stack_size; /* end of |stack| */ 318 319@ To get the output process started, we will perform the following 320initialization steps. We may assume that |text_info->text_link| is nonzero, 321since it points to the \CEE/ text in the first unnamed section that generates 322code; if there are no such sections, there is nothing to output, and an 323error message will have been generated before we do any of the initialization. 324 325@<Initialize the output stacks@>= 326stack_ptr=stack+1; cur_name=name_dir; cur_repl=text_info->text_link+text_info; 327cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start; cur_section=0; 328 329@ When the replacement text for name |p| is to be inserted into the output, 330the following subroutine is called to save the old level of output and get 331the new one going. 332 333We assume that the \CEE/ compiler can copy structures. 334@^system dependencies@> 335 336@c 337void 338push_level(p) /* suspends the current level */ 339name_pointer p; 340{ 341 if (stack_ptr==stack_end) overflow("stack"); 342 *stack_ptr=cur_state; 343 stack_ptr++; 344 if (p!=NULL) { /* |p==NULL| means we are in |output_defs| */ 345 cur_name=p; cur_repl=(text_pointer)p->equiv; 346 cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start; 347 cur_section=0; 348 } 349} 350 351@ When we come to the end of a replacement text, the |pop_level| subroutine 352does the right thing: It either moves to the continuation of this replacement 353text or returns the state to the most recently stacked level. 354 355@c 356void 357pop_level(flag) /* do this when |cur_byte| reaches |cur_end| */ 358int flag; /* |flag==0| means we are in |output_defs| */ 359{ 360 if (flag && cur_repl->text_link<section_flag) { /* link to a continuation */ 361 cur_repl=cur_repl->text_link+text_info; /* stay on the same level */ 362 cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start; 363 return; 364 } 365 stack_ptr--; /* go down to the previous level */ 366 if (stack_ptr>stack) cur_state=*stack_ptr; 367} 368 369@ The heart of the output procedure is the function |get_output|, 370which produces the next token of output and sends it on to the lower-level 371function |out_char|. The main purpose of |get_output| is to handle the 372necessary stacking and unstacking. It sends the value |section_number| 373if the next output begins or ends the replacement text of some section, 374in which case |cur_val| is that section's number (if beginning) or the 375negative of that value (if ending). (A section number of 0 indicates 376not the beginning or ending of a section, but a \&{\#line} command.) 377And it sends the value |identifier| 378if the next output is an identifier, in which case 379|cur_val| points to that identifier name. 380 381@d section_number 0201 /* code returned by |get_output| for section numbers */ 382@d identifier 0202 /* code returned by |get_output| for identifiers */ 383 384@<Global...@>= 385int cur_val; /* additional information corresponding to output token */ 386 387@ If |get_output| finds that no more output remains, it returns with 388|stack_ptr==stack|. 389@^high-bit character handling@> 390 391@c 392void 393get_output() /* sends next token to |out_char| */ 394{ 395 sixteen_bits a; /* value of current byte */ 396 restart: if (stack_ptr==stack) return; 397 if (cur_byte==cur_end) { 398 cur_val=-((int)cur_section); /* cast needed because of sign extension */ 399 pop_level(1); 400 if (cur_val==0) goto restart; 401 out_char(section_number); return; 402 } 403 a=*cur_byte++; 404 if (out_state==verbatim && a!=string && a!=constant && a!='\n') 405 C_putc(a); /* a high-bit character can occur in a string */ 406 else if (a<0200) out_char(a); /* one-byte token */ 407 else { 408 a=(a-0200)*0400+*cur_byte++; 409 switch (a/024000) { /* |024000==(0250-0200)*0400| */ 410 case 0: cur_val=a; out_char(identifier); break; 411 case 1: if (a==output_defs_flag) output_defs(); 412 else @<Expand section |a-024000|, |goto restart|@>; 413 break; 414 default: cur_val=a-050000; if (cur_val>0) cur_section=cur_val; 415 out_char(section_number); 416 } 417 } 418} 419 420@ The user may have forgotten to give any \CEE/ text for a section name, 421or the \CEE/ text may have been associated with a different name by mistake. 422 423@<Expand section |a-...@>= 424{ 425 a-=024000; 426 if ((a+name_dir)->equiv!=(char *)text_info) push_level(a+name_dir); 427 else if (a!=0) { 428 printf("\n! Not present: <"); 429 print_section_name(a+name_dir); err_print(">"); 430@.Not present: <section name>@> 431 } 432 goto restart; 433} 434 435@* Producing the output. 436The |get_output| routine above handles most of the complexity of output 437generation, but there are two further considerations that have a nontrivial 438effect on \.{CTANGLE}'s algorithms. 439 440@ First, 441we want to make sure that the output has spaces and line breaks in 442the right places (e.g., not in the middle of a string or a constant or an 443identifier, not at a `\.{@@\&}' position 444where quantities are being joined together, and certainly after an \.= 445because the \CEE/ compiler thinks \.{=-} is ambiguous). 446 447The output process can be in one of following states: 448 449\yskip\hang |num_or_id| means that the last item in the buffer is a number or 450identifier, hence a blank space or line break must be inserted if the next 451item is also a number or identifier. 452 453\yskip\hang |unbreakable| means that the last item in the buffer was followed 454by the \.{@@\&} operation that inhibits spaces between it and the next item. 455 456\yskip\hang |verbatim| means we're copying only character tokens, and 457that they are to be output exactly as stored. This is the case during 458strings, verbatim constructions and numerical constants. 459 460\yskip\hang |post_slash| means we've just output a slash. 461 462\yskip\hang |normal| means none of the above. 463 464\yskip\noindent Furthermore, if the variable |protect| is positive, newlines 465are preceded by a `\.\\'. 466 467@d normal 0 /* non-unusual state */ 468@d num_or_id 1 /* state associated with numbers and identifiers */ 469@d post_slash 2 /* state following a \./ */ 470@d unbreakable 3 /* state associated with \.{@@\&} */ 471@d verbatim 4 /* state in the middle of a string */ 472 473@<Global...@>= 474eight_bits out_state; /* current status of partial output */ 475boolean protect; /* should newline characters be quoted? */ 476 477@ Here is a routine that is invoked when we want to output the current line. 478During the output process, |cur_line| equals the number of the next line 479to be output. 480 481@c 482void 483flush_buffer() /* writes one line to output file */ 484{ 485 C_putc('\n'); 486 if (cur_line % 100 == 0 && show_progress) { 487 printf("."); 488 if (cur_line % 500 == 0) printf("%d",cur_line); 489 update_terminal; /* progress report */ 490 } 491 cur_line++; 492} 493 494@ Second, we have modified the original \.{TANGLE} so that it will write output 495on multiple files. 496If a section name is introduced in at least one place by \.{@@(} 497instead of \.{@@<}, we treat it as the name of a file. 498All these special sections are saved on a stack, |output_files|. 499We write them out after we've done the unnamed section. 500 501@d max_files 256 502@<Glob...@>= 503name_pointer output_files[max_files]; 504name_pointer *cur_out_file, *end_output_files, *an_output_file; 505char cur_section_name_char; /* is it |'<'| or |'('| */ 506char output_file_name[longest_name]; /* name of the file */ 507 508@ We make |end_output_files| point just beyond the end of 509|output_files|. The stack pointer 510|cur_out_file| starts out there. Every time we see a new file, we 511decrement |cur_out_file| and then write it in. 512@<Set initial...@>= 513cur_out_file=end_output_files=output_files+max_files; 514 515@ @<If it's not there, add |cur_section_name| to the output file stack, or 516complain we're out of room@>= 517{ 518 for (an_output_file=cur_out_file; 519 an_output_file<end_output_files; an_output_file++) 520 if (*an_output_file==cur_section_name) break; 521 if (an_output_file==end_output_files) { 522 if (cur_out_file>output_files) 523 *--cur_out_file=cur_section_name; 524 else { 525 overflow("output files"); 526 } 527 } 528} 529 530@* The big output switch. Here then is the routine that does the 531output. 532 533@<Predecl...@>= 534void phase_two(); 535 536@ @c 537void 538phase_two () { 539 web_file_open=0; 540 cur_line=1; 541 @<Initialize the output stacks@>; 542 @<Output macro definitions if appropriate@>; 543 if (text_info->text_link==0 && cur_out_file==end_output_files) { 544 printf("\n! No program text was specified."); mark_harmless; 545@.No program text...@> 546 } 547 else { 548 if(cur_out_file==end_output_files) { 549 if(show_progress) 550 printf("\nWriting the output file (%s):",C_file_name); 551 } 552 else { 553 if (show_progress) { 554 printf("\nWriting the output files:"); 555@.Writing the output...@> 556 printf(" (%s)",C_file_name); 557 update_terminal; 558 } 559 if (text_info->text_link==0) goto writeloop; 560 } 561 while (stack_ptr>stack) get_output(); 562 flush_buffer(); 563writeloop: @<Write all the named output files@>; 564 if(show_happiness) printf("\nDone."); 565 } 566} 567 568@ To write the named output files, we proceed as for the unnamed 569section. 570The only subtlety is that we have to open each one. 571 572@<Write all the named output files@>= 573for (an_output_file=end_output_files; an_output_file>cur_out_file;) { 574 an_output_file--; 575 sprint_section_name(output_file_name,*an_output_file); 576 fclose(C_file); 577 C_file=fopen(output_file_name,"w"); 578 if (C_file ==0) fatal("! Cannot open output file:",output_file_name); 579@.Cannot open output file@> 580 printf("\n(%s)",output_file_name); update_terminal; 581 cur_line=1; 582 stack_ptr=stack+1; 583 cur_name= (*an_output_file); 584 cur_repl= (text_pointer)cur_name->equiv; 585 cur_byte=cur_repl->tok_start; 586 cur_end=(cur_repl+1)->tok_start; 587 while (stack_ptr > stack) get_output(); 588 flush_buffer(); 589} 590 591@ If a \.{@@h} was not encountered in the input, 592we go through the list of replacement texts and copy the ones 593that refer to macros, preceded by the \.{\#define} preprocessor command. 594 595@<Output macro definitions if appropriate@>= 596 if (!output_defs_seen) 597 output_defs(); 598 599@ @<Glob...@>= 600boolean output_defs_seen=0; 601 602@ @<Predecl...@>= 603void output_defs(); 604 605@ @c 606void 607output_defs() 608{ 609 sixteen_bits a; 610 push_level(NULL); 611 for (cur_text=text_info+1; cur_text<text_ptr; cur_text++) 612 if (cur_text->text_link==0) { /* |cur_text| is the text for a macro */ 613 cur_byte=cur_text->tok_start; 614 cur_end=(cur_text+1)->tok_start; 615 C_printf("%s","#define "); 616 out_state=normal; 617 protect=1; /* newlines should be preceded by |'\\'| */ 618 while (cur_byte<cur_end) { 619 a=*cur_byte++; 620 if (cur_byte==cur_end && a=='\n') break; /* disregard a final newline */ 621 if (out_state==verbatim && a!=string && a!=constant && a!='\n') 622 C_putc(a); /* a high-bit character can occur in a string */ 623@^high-bit character handling@> 624 else if (a<0200) out_char(a); /* one-byte token */ 625 else { 626 a=(a-0200)*0400+*cur_byte++; 627 if (a<024000) { /* |024000==(0250-0200)*0400| */ 628 cur_val=a; out_char(identifier); 629 } 630 else if (a<050000) { confusion("macro defs have strange char");} 631 else { 632 cur_val=a-050000; cur_section=cur_val; out_char(section_number); 633 } 634 /* no other cases */ 635 } 636 } 637 protect=0; 638 flush_buffer(); 639 } 640 pop_level(0); 641} 642 643@ A many-way switch is used to send the output. Note that this function 644is not called if |out_state==verbatim|, except perhaps with arguments 645|'\n'| (protect the newline), |string| (end the string), or |constant| 646(end the constant). 647 648@<Predecl...@>= 649static void out_char(); 650 651@ @c 652static void 653out_char(cur_char) 654eight_bits cur_char; 655{ 656 char *j, *k; /* pointer into |byte_mem| */ 657restart: 658 switch (cur_char) { 659 case '\n': if (protect && out_state!=verbatim) C_putc(' '); 660 if (protect || out_state==verbatim) C_putc('\\'); 661 flush_buffer(); if (out_state!=verbatim) out_state=normal; break; 662 @/@t\4@>@<Case of an identifier@>; 663 @/@t\4@>@<Case of a section number@>; 664 @/@t\4@>@<Cases like \.{!=}@>; 665 case '=': case '>': C_putc(cur_char); C_putc(' '); 666 out_state=normal; break; 667 case join: out_state=unbreakable; break; 668 case constant: if (out_state==verbatim) { 669 out_state=num_or_id; break; 670 } 671 if(out_state==num_or_id) C_putc(' '); out_state=verbatim; break; 672 case string: if (out_state==verbatim) out_state=normal; 673 else out_state=verbatim; break; 674 case '/': C_putc('/'); out_state=post_slash; break; 675 case '*': if (out_state==post_slash) C_putc(' '); 676 /* fall through */ 677 default: C_putc(cur_char); out_state=normal; break; 678 } 679} 680 681@ @<Cases like \.{!=}@>= 682case plus_plus: C_putc('+'); C_putc('+'); out_state=normal; break; 683case minus_minus: C_putc('-'); C_putc('-'); out_state=normal; break; 684case minus_gt: C_putc('-'); C_putc('>'); out_state=normal; break; 685case gt_gt: C_putc('>'); C_putc('>'); out_state=normal; break; 686case eq_eq: C_putc('='); C_putc('='); out_state=normal; break; 687case lt_lt: C_putc('<'); C_putc('<'); out_state=normal; break; 688case gt_eq: C_putc('>'); C_putc('='); out_state=normal; break; 689case lt_eq: C_putc('<'); C_putc('='); out_state=normal; break; 690case not_eq: C_putc('!'); C_putc('='); out_state=normal; break; 691case and_and: C_putc('&'); C_putc('&'); out_state=normal; break; 692case or_or: C_putc('|'); C_putc('|'); out_state=normal; break; 693case dot_dot_dot: C_putc('.'); C_putc('.'); C_putc('.'); out_state=normal; 694 break; 695case colon_colon: C_putc(':'); C_putc(':'); out_state=normal; break; 696case period_ast: C_putc('.'); C_putc('*'); out_state=normal; break; 697case minus_gt_ast: C_putc('-'); C_putc('>'); C_putc('*'); out_state=normal; 698 break; 699 700@ When an identifier is output to the \CEE/ file, characters in the 701range 128--255 must be changed into something else, so the \CEE/ 702compiler won't complain. By default, \.{CTANGLE} converts the 703character with code $16 x+y$ to the three characters `\.X$xy$', but 704a different transliteration table can be specified. Thus a German 705might want {\it gr\"un\/} to appear as a still readable \.{gruen}. 706This makes debugging a lot less confusing. 707 708@d translit_length 10 709 710@<Glo...@>= 711char translit[128][translit_length]; 712 713@ @<Set init...@>= 714{ 715 int i; 716 for (i=0;i<128;i++) sprintf(translit[i],"X%02X",(unsigned)(128+i)); 717} 718 719@ @<Case of an identifier@>= 720case identifier: 721 if (out_state==num_or_id) C_putc(' '); 722 j=(cur_val+name_dir)->byte_start; 723 k=(cur_val+name_dir+1)->byte_start; 724 while (j<k) { 725 if ((unsigned char)(*j)<0200) C_putc(*j); 726@^high-bit character handling@> 727 else C_printf("%s",translit[(unsigned char)(*j)-0200]); 728 j++; 729 } 730 out_state=num_or_id; break; 731 732@ @<Case of a sec...@>= 733case section_number: 734 if (cur_val>0) C_printf("/*%d:*/",cur_val); 735 else if(cur_val<0) C_printf("/*:%d*/",-cur_val); 736 else if (protect) { 737 cur_byte +=4; /* skip line number and file name */ 738 cur_char = '\n'; 739 goto restart; 740 } else { 741 sixteen_bits a; 742 a=0400* *cur_byte++; 743 a+=*cur_byte++; /* gets the line number */ 744 C_printf("\n#line %d \"",a); 745@:line}{\.{\#line}@> 746 cur_val=*cur_byte++; 747 cur_val=0400*(cur_val-0200)+ *cur_byte++; /* points to the file name */ 748 for (j=(cur_val+name_dir)->byte_start, k=(cur_val+name_dir+1)->byte_start; 749 j<k; j++) { 750 if (*j=='\\' || *j=='"') C_putc('\\'); 751 C_putc(*j); 752 } 753 C_printf("%s","\"\n"); 754 } 755 break; 756 757@** Introduction to the input phase. 758We have now seen that \.{CTANGLE} will be able to output the full 759\CEE/ program, if we can only get that program into the byte memory in 760the proper format. The input process is something like the output process 761in reverse, since we compress the text as we read it in and we expand it 762as we write it out. 763 764There are three main input routines. The most interesting is the one that gets 765the next token of a \CEE/ text; the other two are used to scan rapidly past 766\TEX/ text in the \.{CWEB} source code. One of the latter routines will jump to 767the next token that starts with `\.{@@}', and the other skips to the end 768of a \CEE/ comment. 769 770@ Control codes in \.{CWEB} begin with `\.{@@}', and the next character 771identifies the code. Some of these are of interest only to \.{CWEAVE}, 772so \.{CTANGLE} ignores them; the others are converted by \.{CTANGLE} into 773internal code numbers by the |ccode| table below. The ordering 774of these internal code numbers has been chosen to simplify the program logic; 775larger numbers are given to the control codes that denote more significant 776milestones. 777 778@d ignore 0 /* control code of no interest to \.{CTANGLE} */ 779@d ord 0302 /* control code for `\.{@@'}' */ 780@d control_text 0303 /* control code for `\.{@@t}', `\.{@@\^}', etc. */ 781@d translit_code 0304 /* control code for `\.{@@l}' */ 782@d output_defs_code 0305 /* control code for `\.{@@h}' */ 783@d format_code 0306 /* control code for `\.{@@f}' */ 784@d definition 0307 /* control code for `\.{@@d}' */ 785@d begin_C 0310 /* control code for `\.{@@c}' */ 786@d section_name 0311 /* control code for `\.{@@<}' */ 787@d new_section 0312 /* control code for `\.{@@\ }' and `\.{@@*}' */ 788 789@<Global...@>= 790eight_bits ccode[256]; /* meaning of a char following \.{@@} */ 791 792@ @<Set ini...@>= { 793 int c; /* must be |int| so the |for| loop will end */ 794 for (c=0; c<256; c++) ccode[c]=ignore; 795 ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f'] 796 =ccode['*']=new_section; 797 ccode['@@']='@@'; ccode['=']=string; 798 ccode['d']=ccode['D']=definition; 799 ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code; 800 ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C; 801 ccode['^']=ccode[':']=ccode['.']=ccode['t']=ccode['T']= 802 ccode['q']=ccode['Q']=control_text; 803 ccode['h']=ccode['H']=output_defs_code; 804 ccode['l']=ccode['L']=translit_code; 805 ccode['&']=join; 806 ccode['<']=ccode['(']=section_name; 807 ccode['\'']=ord; 808} 809 810@ The |skip_ahead| procedure reads through the input at fairly high speed 811until finding the next non-ignorable control code, which it returns. 812 813@c 814eight_bits 815skip_ahead() /* skip to next control code */ 816{ 817 eight_bits c; /* control code found */ 818 while (1) { 819 if (loc>limit && (get_line()==0)) return(new_section); 820 *(limit+1)='@@'; 821 while (*loc!='@@') loc++; 822 if (loc<=limit) { 823 loc++; c=ccode[(eight_bits)*loc]; loc++; 824 if (c!=ignore || *(loc-1)=='>') return(c); 825 } 826 } 827} 828 829@ The |skip_comment| procedure reads through the input at somewhat high 830speed in order to pass over comments, which \.{CTANGLE} does not transmit 831to the output. If the comment is introduced by \.{/*}, |skip_comment| 832proceeds until finding the end-comment token \.{*/} or a newline; in the 833latter case |skip_comment| will be called again by |get_next|, since the 834comment is not finished. This is done so that each newline in the 835\CEE/ part of a section is copied to the output; otherwise the \&{\#line} 836commands inserted into the \CEE/ file by the output routines become useless. 837On the other hand, if the comment is introduced by \.{//} (i.e., if it 838is a \CPLUSPLUS/ ``short comment''), it always is simply delimited by the next 839newline. The boolean argument |is_long_comment| distinguishes between 840the two types of comments. 841 842If |skip_comment| comes to the end of the section, it prints an error message. 843No comment, long or short, is allowed to contain `\.{@@\ }' or `\.{@@*}'. 844 845@<Global...@>= 846boolean comment_continues=0; /* are we scanning a comment? */ 847 848@ @c 849int skip_comment(is_long_comment) /* skips over comments */ 850boolean is_long_comment; 851{ 852 char c; /* current character */ 853 while (1) { 854 if (loc>limit) { 855 if (is_long_comment) { 856 if(get_line()) return(comment_continues=1); 857 else{ 858 err_print("! Input ended in mid-comment"); 859@.Input ended in mid-comment@> 860 return(comment_continues=0); 861 } 862 } 863 else return(comment_continues=0); 864 } 865 c=*(loc++); 866 if (is_long_comment && c=='*' && *loc=='/') { 867 loc++; return(comment_continues=0); 868 } 869 if (c=='@@') { 870 if (ccode[(eight_bits)*loc]==new_section) { 871 err_print("! Section name ended in mid-comment"); loc--; 872@.Section name ended in mid-comment@> 873 return(comment_continues=0); 874 } 875 else loc++; 876 } 877 } 878} 879 880@* Inputting the next token. 881 882@d constant 03 883 884@<Global...@>= 885name_pointer cur_section_name; /* name of section just scanned */ 886int no_where; /* suppress |print_where|? */ 887 888@ @<Include...@>= 889#include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */ 890#include <stdlib.h> /* definition of |exit| */ 891 892@ As one might expect, |get_next| consists mostly of a big switch 893that branches to the various special cases that can arise. 894 895@d isxalpha(c) ((c)=='_' || (c)=='$') 896 /* non-alpha characters allowed in identifier */ 897@d ishigh(c) ((unsigned char)(c)>0177) 898@^high-bit character handling@> 899 900@c 901eight_bits 902get_next() /* produces the next input token */ 903{ 904 static int preprocessing=0; 905 eight_bits c; /* the current character */ 906 while (1) { 907 if (loc>limit) { 908 if (preprocessing && *(limit-1)!='\\') preprocessing=0; 909 if (get_line()==0) return(new_section); 910 else if (print_where && !no_where) { 911 print_where=0; 912 @<Insert the line number into |tok_mem|@>; 913 } 914 else return ('\n'); 915 } 916 c=*loc; 917 if (comment_continues || (c=='/' && (*(loc+1)=='*' || *(loc+1)=='/'))) { 918 skip_comment(comment_continues||*(loc+1)=='*'); 919 /* scan to end of comment or newline */ 920 if (comment_continues) return('\n'); 921 else continue; 922 } 923 loc++; 924 if (xisdigit(c) || c=='.') @<Get a constant@>@; 925 else if (c=='\'' || c=='"' || (c=='L'&&(*loc=='\'' || *loc=='"'))) 926 @<Get a string@>@; 927 else if (isalpha(c) || isxalpha(c) || ishigh(c)) 928 @<Get an identifier@>@; 929 else if (c=='@@') @<Get control code and possible section name@>@; 930 else if (xisspace(c)) { 931 if (!preprocessing || loc>limit) continue; 932 /* we don't want a blank after a final backslash */ 933 else return(' '); /* ignore spaces and tabs, unless preprocessing */ 934 } 935 else if (c=='#' && loc==buffer+1) preprocessing=1; 936 mistake: @<Compress two-symbol operator@>@; 937 return(c); 938 } 939} 940 941@ The following code assigns values to the combinations \.{++}, 942\.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, \.{||} and 943\.{\&\&}, and to the \CPLUSPLUS/ 944combinations \.{...}, \.{::}, \.{.*} and \.{->*}. 945The compound assignment operators (e.g., \.{+=}) are 946treated as separate tokens. 947 948@d compress(c) if (loc++<=limit) return(c) 949 950@<Compress tw...@>= 951switch(c) { 952 case '+': if (*loc=='+') compress(plus_plus); break; 953 case '-': if (*loc=='-') {compress(minus_minus);} 954 else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);} 955 else compress(minus_gt); break; 956 case '.': if (*loc=='*') {compress(period_ast);} 957 else if (*loc=='.' && *(loc+1)=='.') { 958 loc++; compress(dot_dot_dot); 959 } 960 break; 961 case ':': if (*loc==':') compress(colon_colon); break; 962 case '=': if (*loc=='=') compress(eq_eq); break; 963 case '>': if (*loc=='=') {compress(gt_eq);} 964 else if (*loc=='>') compress(gt_gt); break; 965 case '<': if (*loc=='=') {compress(lt_eq);} 966 else if (*loc=='<') compress(lt_lt); break; 967 case '&': if (*loc=='&') compress(and_and); break; 968 case '|': if (*loc=='|') compress(or_or); break; 969 case '!': if (*loc=='=') compress(not_eq); break; 970} 971 972@ @<Get an identifier@>= { 973 id_first=--loc; 974 while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc)); 975 id_loc=loc; return(identifier); 976} 977 978@ @<Get a constant@>= { 979 id_first=loc-1; 980 if (*id_first=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */ 981 if (*id_first=='0') { 982 if (*loc=='x' || *loc=='X') { /* hex constant */ 983 loc++; while (xisxdigit(*loc)) loc++; goto found; 984 } 985 } 986 while (xisdigit(*loc)) loc++; 987 if (*loc=='.') { 988 loc++; 989 while (xisdigit(*loc)) loc++; 990 } 991 if (*loc=='e' || *loc=='E') { /* float constant */ 992 if (*++loc=='+' || *loc=='-') loc++; 993 while (xisdigit(*loc)) loc++; 994 } 995 found: while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L' 996 || *loc=='f' || *loc=='F') loc++; 997 id_loc=loc; 998 return(constant); 999} 1000 1001@ \CEE/ strings and character constants, delimited by double and single 1002quotes, respectively, can contain newlines or instances of their own 1003delimiters if they are protected by a backslash. We follow this 1004convention, but do not allow the string to be longer than |longest_name|. 1005 1006@<Get a string@>= { 1007 char delim = c; /* what started the string */ 1008 id_first = section_text+1; 1009 id_loc = section_text; *++id_loc=delim; 1010 if (delim=='L') { /* wide character constant */ 1011 delim=*loc++; *++id_loc=delim; 1012 } 1013 while (1) { 1014 if (loc>=limit) { 1015 if(*(limit-1)!='\\') { 1016 err_print("! String didn't end"); loc=limit; break; 1017@.String didn't end@> 1018 } 1019 if(get_line()==0) { 1020 err_print("! Input ended in middle of string"); loc=buffer; break; 1021@.Input ended in middle of string@> 1022 } 1023 else if (++id_loc<=section_text_end) *id_loc='\n'; /* will print as 1024 \.{"\\\\\\n"} */ 1025 } 1026 if ((c=*loc++)==delim) { 1027 if (++id_loc<=section_text_end) *id_loc=c; 1028 break; 1029 } 1030 if (c=='\\') { 1031 if (loc>=limit) continue; 1032 if (++id_loc<=section_text_end) *id_loc = '\\'; 1033 c=*loc++; 1034 } 1035 if (++id_loc<=section_text_end) *id_loc=c; 1036 } 1037 if (id_loc>=section_text_end) { 1038 printf("\n! String too long: "); 1039@.String too long@> 1040 term_write(section_text+1,25); 1041 err_print("..."); 1042 } 1043 id_loc++; 1044 return(string); 1045} 1046 1047@ After an \.{@@} sign has been scanned, the next character tells us 1048whether there is more work to do. 1049 1050@<Get control code and possible section name@>= { 1051 c=ccode[(eight_bits)*loc++]; 1052 switch(c) { 1053 case ignore: continue; 1054 case translit_code: err_print("! Use @@l in limbo only"); continue; 1055@.Use @@l in limbo...@> 1056 case control_text: while ((c=skip_ahead())=='@@'); 1057 /* only \.{@@@@} and \.{@@>} are expected */ 1058 if (*(loc-1)!='>') 1059 err_print("! Double @@ should be used in control text"); 1060@.Double @@ should be used...@> 1061 continue; 1062 case section_name: 1063 cur_section_name_char=*(loc-1); 1064 @<Scan the section name and make |cur_section_name| point to it@>; 1065 case string: @<Scan a verbatim string@>; 1066 case ord: @<Scan an ASCII constant@>; 1067 default: return(c); 1068 } 1069} 1070 1071@ After scanning a valid ASCII constant that follows 1072\.{@@'}, this code plows ahead until it finds the next single quote. 1073(Special care is taken if the quote is part of the constant.) 1074Anything after a valid ASCII constant is ignored; 1075thus, \.{@@'\\nopq'} gives the same result as \.{@@'\\n'}. 1076 1077@<Scan an ASCII constant@>= 1078 id_first=loc; 1079 if (*loc=='\\') { 1080 if (*++loc=='\'') loc++; 1081 } 1082 while (*loc!='\'') { 1083 if (*loc=='@@') { 1084 if (*(loc+1)!='@@') 1085 err_print("! Double @@ should be used in ASCII constant"); 1086@.Double @@ should be used...@> 1087 else loc++; 1088 } 1089 loc++; 1090 if (loc>limit) { 1091 err_print("! String didn't end"); loc=limit-1; break; 1092@.String didn't end@> 1093 } 1094 } 1095 loc++; 1096 return(ord); 1097 1098@ @<Scan the section name...@>= { 1099 char *k; /* pointer into |section_text| */ 1100 @<Put section name into |section_text|@>; 1101 if (k-section_text>3 && strncmp(k-2,"...",3)==0) 1102 cur_section_name=section_lookup(section_text+1,k-3,1); /* 1 means is a prefix */ 1103 else cur_section_name=section_lookup(section_text+1,k,0); 1104 if (cur_section_name_char=='(') 1105 @<If it's not there, add |cur_section_name| to the output file stack, or 1106 complain we're out of room@>; 1107 return(section_name); 1108} 1109 1110@ Section names are placed into the |section_text| array with consecutive spaces, 1111tabs, and carriage-returns replaced by single spaces. There will be no 1112spaces at the beginning or the end. (We set |section_text[0]=' '| to facilitate 1113this, since the |section_lookup| routine uses |section_text[1]| as the first 1114character of the name.) 1115 1116@<Set init...@>=section_text[0]=' '; 1117 1118@ @<Put section name...@>= 1119k=section_text; 1120while (1) { 1121 if (loc>limit && get_line()==0) { 1122 err_print("! Input ended in section name"); 1123@.Input ended in section name@> 1124 loc=buffer+1; break; 1125 } 1126 c=*loc; 1127 @<If end of name or erroneous nesting, |break|@>; 1128 loc++; if (k<section_text_end) k++; 1129 if (xisspace(c)) { 1130 c=' '; if (*(k-1)==' ') k--; 1131 } 1132*k=c; 1133} 1134if (k>=section_text_end) { 1135 printf("\n! Section name too long: "); 1136@.Section name too long@> 1137 term_write(section_text+1,25); 1138 printf("..."); mark_harmless; 1139} 1140if (*k==' ' && k>section_text) k--; 1141 1142@ @<If end of name or erroneous nesting,...@>= 1143if (c=='@@') { 1144 c=*(loc+1); 1145 if (c=='>') { 1146 loc+=2; break; 1147 } 1148 if (ccode[(eight_bits)c]==new_section) { 1149 err_print("! Section name didn't end"); break; 1150@.Section name didn't end@> 1151 } 1152 if (ccode[(eight_bits)c]==section_name) { 1153 err_print("! Nesting of section names not allowed"); break; 1154@.Nesting of section names...@> 1155 } 1156 *(++k)='@@'; loc++; /* now |c==*loc| again */ 1157} 1158 1159@ At the present point in the program we 1160have |*(loc-1)==string|; we set |id_first| to the beginning 1161of the string itself, and |id_loc| to its ending-plus-one location in the 1162buffer. We also set |loc| to the position just after the ending delimiter. 1163 1164@<Scan a verbatim string@>= { 1165 id_first=loc++; *(limit+1)='@@'; *(limit+2)='>'; 1166 while (*loc!='@@' || *(loc+1)!='>') loc++; 1167 if (loc>=limit) err_print("! Verbatim string didn't end"); 1168@.Verbatim string didn't end@> 1169 id_loc=loc; loc+=2; 1170 return(string); 1171} 1172 1173@* Scanning a macro definition. 1174The rules for generating the replacement texts corresponding to macros and 1175\CEE/ texts of a section are almost identical; the only differences are that 1176 1177\yskip \item{a)}Section names are not allowed in macros; 1178in fact, the appearance of a section name terminates such macros and denotes 1179the name of the current section. 1180 1181\item{b)}The symbols \.{@@d} and \.{@@f} and \.{@@c} are not allowed after 1182section names, while they terminate macro definitions. 1183 1184\item{c)}Spaces are inserted after right parentheses in macros, because the 1185ANSI \CEE/ preprocessor sometimes requires it. 1186 1187\yskip Therefore there is a single procedure |scan_repl| whose parameter 1188|t| specifies either |macro| or |section_name|. After |scan_repl| has 1189acted, |cur_text| will point to the replacement text just generated, and 1190|next_control| will contain the control code that terminated the activity. 1191 1192@d macro 0 1193@d app_repl(c) {if (tok_ptr==tok_mem_end) overflow("token"); *tok_ptr++=c;} 1194 1195@<Global...@>= 1196text_pointer cur_text; /* replacement text formed by |scan_repl| */ 1197eight_bits next_control; 1198 1199@ @c 1200void 1201scan_repl(t) /* creates a replacement text */ 1202eight_bits t; 1203{ 1204 sixteen_bits a; /* the current token */ 1205 if (t==section_name) {@<Insert the line number into |tok_mem|@>;} 1206 while (1) switch (a=get_next()) { 1207 @<In cases that |a| is a non-|char| token (|identifier|, 1208 |section_name|, etc.), either process it and change |a| to a byte 1209 that should be stored, or |continue| if |a| should be ignored, 1210 or |goto done| if |a| signals the end of this replacement text@>@; 1211 case ')': app_repl(a); 1212 if (t==macro) app_repl(' '); 1213 break; 1214 default: app_repl(a); /* store |a| in |tok_mem| */ 1215 } 1216 done: next_control=(eight_bits) a; 1217 if (text_ptr>text_info_end) overflow("text"); 1218 cur_text=text_ptr; (++text_ptr)->tok_start=tok_ptr; 1219} 1220 1221@ Here is the code for the line number: first a |sixteen_bits| equal 1222to |0150000|; then the numeric line number; then a pointer to the 1223file name. 1224 1225@<Insert the line...@>= 1226store_two_bytes(0150000); 1227if (changing) id_first=change_file_name; 1228else id_first=cur_file_name; 1229id_loc=id_first+strlen(id_first); 1230if (changing) store_two_bytes((sixteen_bits)change_line); 1231else store_two_bytes((sixteen_bits)cur_line); 1232{int a=id_lookup(id_first,id_loc,0)-name_dir; app_repl((a / 0400)+0200); 1233 app_repl(a % 0400);} 1234 1235@ @<In cases that |a| is...@>= 1236case identifier: a=id_lookup(id_first,id_loc,0)-name_dir; 1237 app_repl((a / 0400)+0200); 1238 app_repl(a % 0400); break; 1239case section_name: if (t!=section_name) goto done; 1240 else { 1241 @<Was an `@@' missed here?@>; 1242 a=cur_section_name-name_dir; 1243 app_repl((a / 0400)+0250); 1244 app_repl(a % 0400); 1245 @<Insert the line number into |tok_mem|@>; break; 1246 } 1247case output_defs_code: if (t!=section_name) err_print("! Misplaced @@h"); 1248@.Misplaced @@h@> 1249 else { 1250 output_defs_seen=1; 1251 a=output_defs_flag; 1252 app_repl((a / 0400)+0200); 1253 app_repl(a % 0400); 1254 @<Insert the line number into |tok_mem|@>; 1255 } 1256 break; 1257case constant: case string: 1258 @<Copy a string or verbatim construction or numerical constant@>; 1259case ord: 1260 @<Copy an ASCII constant@>; 1261case definition: case format_code: case begin_C: if (t!=section_name) goto done; 1262 else { 1263 err_print("! @@d, @@f and @@c are ignored in C text"); continue; 1264@.@@d, @@f and @@c are ignored in C text@> 1265 } 1266case new_section: goto done; 1267 1268@ @<Was an `@@'...@>= { 1269 char *try_loc=loc; 1270 while (*try_loc==' ' && try_loc<limit) try_loc++; 1271 if (*try_loc=='+' && try_loc<limit) try_loc++; 1272 while (*try_loc==' ' && try_loc<limit) try_loc++; 1273 if (*try_loc=='=') err_print ("! Missing `@@ ' before a named section"); 1274@.Missing `@@ '...@> 1275 /* user who isn't defining a section should put newline after the name, 1276 as explained in the manual */ 1277} 1278 1279@ @<Copy a string...@>= 1280 app_repl(a); /* |string| or |constant| */ 1281 while (id_first < id_loc) { /* simplify \.{@@@@} pairs */ 1282 if (*id_first=='@@') { 1283 if (*(id_first+1)=='@@') id_first++; 1284 else err_print("! Double @@ should be used in string"); 1285@.Double @@ should be used...@> 1286 } 1287 app_repl(*id_first++); 1288 } 1289 app_repl(a); break; 1290 1291@ This section should be rewritten on machines that don't use ASCII 1292code internally. 1293@^ASCII code dependencies@> 1294 1295@<Copy an ASCII constant@>= { 1296 int c=(eight_bits) *id_first; 1297 if (c=='\\') { 1298 c=*++id_first; 1299 if (c>='0' && c<='7') { 1300 c-='0'; 1301 if (*(id_first+1)>='0' && *(id_first+1)<='7') { 1302 c=8*c+*(++id_first) - '0'; 1303 if (*(id_first+1)>='0' && *(id_first+1)<='7' && c<32) 1304 c=8*c+*(++id_first)- '0'; 1305 } 1306 } 1307 else switch (c) { 1308 case 't':c='\t';@+break; 1309 case 'n':c='\n';@+break; 1310 case 'b':c='\b';@+break; 1311 case 'f':c='\f';@+break; 1312 case 'v':c='\v';@+break; 1313 case 'r':c='\r';@+break; 1314 case 'a':c='\7';@+break; 1315 case '?':c='?';@+break; 1316 case 'x': 1317 if (xisdigit(*(id_first+1))) c=*(++id_first)-'0'; 1318 else if (xisxdigit(*(id_first+1))) { 1319 ++id_first; 1320 c=toupper(*id_first)-'A'+10; 1321 } 1322 if (xisdigit(*(id_first+1))) c=16*c+*(++id_first)-'0'; 1323 else if (xisxdigit(*(id_first+1))) { 1324 ++id_first; 1325 c=16*c+toupper(*id_first)-'A'+10; 1326 } 1327 break; 1328 case '\\':c='\\';@+break; 1329 case '\'':c='\'';@+break; 1330 case '\"':c='\"';@+break; 1331 default: err_print("! Unrecognized escape sequence"); 1332@.Unrecognized escape sequence@> 1333 } 1334 }@/ 1335 /* at this point |c| should have been converted to its ASCII code number */ 1336 app_repl(constant); 1337 if (c>=100) app_repl('0'+c/100); 1338 if (c>=10) app_repl('0'+(c/10)%10); 1339 app_repl('0'+c%10); 1340 app_repl(constant); 1341} 1342break; 1343 1344@* Scanning a section. 1345The |scan_section| procedure starts when `\.{@@\ }' or `\.{@@*}' has been 1346sensed in the input, and it proceeds until the end of that section. It 1347uses |section_count| to keep track of the current section number; with luck, 1348\.{CWEAVE} and \.{CTANGLE} will both assign the same numbers to sections. 1349 1350@<Global...@>= 1351extern sixteen_bits section_count; /* the current section number */ 1352 1353@ The body of |scan_section| is a loop where we look for control codes 1354that are significant to \.{CTANGLE}: those 1355that delimit a definition, the \CEE/ part of a module, or a new module. 1356 1357@c 1358void 1359scan_section() 1360{ 1361 name_pointer p; /* section name for the current section */ 1362 text_pointer q; /* text for the current section */ 1363 sixteen_bits a; /* token for left-hand side of definition */ 1364 section_count++; @+ no_where=1; 1365 if (*(loc-1)=='*' && show_progress) { /* starred section */ 1366 printf("*%d",section_count); update_terminal; 1367 } 1368 next_control=0; 1369 while (1) { 1370 @<Skip ahead until |next_control| corresponds to \.{@@d}, \.{@@<}, 1371 \.{@@\ } or the like@>; 1372 if (next_control == definition) { /* \.{@@d} */ 1373 @<Scan a definition@>@; 1374 continue; 1375 } 1376 if (next_control == begin_C) { /* \.{@@c} or \.{@@p} */ 1377 p=name_dir; break; 1378 } 1379 if (next_control == section_name) { /* \.{@@<} or \.{@@(} */ 1380 p=cur_section_name; 1381 @<If section is not being defined, |continue| @>; 1382 break; 1383 } 1384 return; /* \.{@@\ } or \.{@@*} */ 1385 } 1386 no_where=print_where=0; 1387 @<Scan the \CEE/ part of the current section@>; 1388} 1389 1390@ At the top of this loop, if |next_control==section_name|, the 1391section name has already been scanned (see |@<Get control code 1392and...@>|). Thus, if we encounter |next_control==section_name| in the 1393skip-ahead process, we should likewise scan the section name, so later 1394processing will be the same in both cases. 1395 1396@<Skip ahead until |next_control| ...@>= 1397while (next_control<definition) 1398 /* |definition| is the lowest of the ``significant'' codes */ 1399 if((next_control=skip_ahead())==section_name){ 1400 loc-=2; next_control=get_next(); 1401 } 1402 1403@ @<Scan a definition@>= { 1404 while ((next_control=get_next())=='\n'); /*allow newline before definition */ 1405 if (next_control!=identifier) { 1406 err_print("! Definition flushed, must start with identifier"); 1407@.Definition flushed...@> 1408 continue; 1409 } 1410 app_repl(((a=id_lookup(id_first,id_loc,0)-name_dir) / 0400)+0200); 1411 /* append the lhs */ 1412 app_repl(a % 0400); 1413 if (*loc!='(') { /* identifier must be separated from replacement text */ 1414 app_repl(string); app_repl(' '); app_repl(string); 1415 } 1416 scan_repl(macro); 1417 cur_text->text_link=0; /* |text_link==0| characterizes a macro */ 1418} 1419 1420@ If the section name is not followed by \.{=} or \.{+=}, no \CEE/ 1421code is forthcoming: the section is being cited, not being 1422defined. This use is illegal after the definition part of the 1423current section has started, except inside a comment, but 1424\.{CTANGLE} does not enforce this rule; it simply ignores the offending 1425section name and everything following it, up to the next significant 1426control code. 1427 1428@<If section is not being defined, |continue| @>= 1429while ((next_control=get_next())=='+'); /* allow optional \.{+=} */ 1430if (next_control!='=' && next_control!=eq_eq) 1431 continue; 1432 1433@ @<Scan the \CEE/...@>= 1434@<Insert the section number into |tok_mem|@>; 1435scan_repl(section_name); /* now |cur_text| points to the replacement text */ 1436@<Update the data structure so that the replacement text is accessible@>; 1437 1438@ @<Insert the section number...@>= 1439store_two_bytes((sixteen_bits)(0150000+section_count)); 1440 /* |0150000==0320*0400| */ 1441 1442@ @<Update the data...@>= 1443if (p==name_dir||p==0) { /* unnamed section, or bad section name */ 1444 (last_unnamed)->text_link=cur_text-text_info; last_unnamed=cur_text; 1445} 1446else if (p->equiv==(char *)text_info) p->equiv=(char *)cur_text; 1447 /* first section of this name */ 1448else { 1449 q=(text_pointer)p->equiv; 1450 while (q->text_link<section_flag) 1451 q=q->text_link+text_info; /* find end of list */ 1452 q->text_link=cur_text-text_info; 1453} 1454cur_text->text_link=section_flag; 1455 /* mark this replacement text as a nonmacro */ 1456 1457@ @<Predec...@>= 1458void phase_one(); 1459 1460@ @c 1461void 1462phase_one() { 1463 phase=1; 1464 section_count=0; 1465 reset_input(); 1466 skip_limbo(); 1467 while (!input_has_ended) scan_section(); 1468 check_complete(); 1469 phase=2; 1470} 1471 1472@ Only a small subset of the control codes is legal in limbo, so limbo 1473processing is straightforward. 1474 1475@<Predecl...@>= 1476void skip_limbo(); 1477 1478@ @c 1479void 1480skip_limbo() 1481{ 1482 char c; 1483 while (1) { 1484 if (loc>limit && get_line()==0) return; 1485 *(limit+1)='@@'; 1486 while (*loc!='@@') loc++; 1487 if (loc++<=limit) { 1488 c=*loc++; 1489 if (ccode[(eight_bits)c]==new_section) break; 1490 switch (ccode[(eight_bits)c]) { 1491 case translit_code: @<Read in transliteration of a character@>; break; 1492 case format_code: case '@@': break; 1493 case control_text: if (c=='q' || c=='Q') { 1494 while ((c=skip_ahead())=='@@'); 1495 if (*(loc-1)!='>') 1496 err_print("! Double @@ should be used in control text"); 1497@.Double @@ should be used...@> 1498 break; 1499 } /* otherwise fall through */ 1500 default: err_print("! Double @@ should be used in limbo"); 1501@.Double @@ should be used...@> 1502 } 1503 } 1504 } 1505} 1506 1507@ @<Read in transliteration of a character@>= 1508 while(xisspace(*loc)&&loc<limit) loc++; 1509 loc+=3; 1510 if (loc>limit || !xisxdigit(*(loc-3)) || !xisxdigit(*(loc-2)) @| 1511 || (*(loc-3)>='0' && *(loc-3)<='7') || !xisspace(*(loc-1))) 1512 err_print("! Improper hex number following @@l"); 1513@.Improper hex number...@> 1514 else { 1515 unsigned i; 1516 char *beg; 1517 sscanf(loc-3,"%x",&i); 1518 while(xisspace(*loc)&&loc<limit) loc++; 1519 beg=loc; 1520 while(loc<limit&&(xisalpha(*loc)||xisdigit(*loc)||*loc=='_')) loc++; 1521 if (loc-beg>=translit_length) 1522 err_print("! Replacement string in @@l too long"); 1523@.Replacement string in @@l...@> 1524 else{ 1525 strncpy(translit[i-0200],beg,loc-beg); 1526 translit[i-0200][loc-beg]='\0'; 1527 } 1528 } 1529 1530@ Because on some systems the difference between two pointers is a |long| 1531but not an |int|, we use \.{\%ld} to print these quantities. 1532 1533@c 1534void 1535print_stats() { 1536 printf("\nMemory usage statistics:\n"); 1537 printf("%ld names (out of %ld)\n", 1538 (long)(name_ptr-name_dir),(long)max_names); 1539 printf("%ld replacement texts (out of %ld)\n", 1540 (long)(text_ptr-text_info),(long)max_texts); 1541 printf("%ld bytes (out of %ld)\n", 1542 (long)(byte_ptr-byte_mem),(long)max_bytes); 1543 printf("%ld tokens (out of %ld)\n", 1544 (long)(tok_ptr-tok_mem),(long)max_toks); 1545} 1546 1547@** Index. 1548Here is a cross-reference table for \.{CTANGLE}. 1549All sections in which an identifier is 1550used are listed with that identifier, except that reserved words are 1551indexed only when they appear in format definitions, and the appearances 1552of identifiers in section names are not indexed. Underlined entries 1553correspond to where the identifier was declared. Error messages and 1554a few other things like ``ASCII code dependencies'' are indexed here too. 1555