1% This file is part of CWEB. 2% This program by Silvio Levy and Donald E. Knuth 3% is based on a program by Knuth. 4% It is distributed WITHOUT ANY WARRANTY, express or implied. 5% Version 3.64 --- February 2002 6% (essentially the same as version 3.6, which added 7% recently introduced features of standard C++ to version 3.4) 8 9% Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth 10 11% Permission is granted to make and distribute verbatim copies of this 12% document provided that the copyright notice and this permission notice 13% are preserved on all copies. 14 15% Permission is granted to copy and distribute modified versions of this 16% document under the conditions for verbatim copying, provided that the 17% entire resulting derived work is given a different name and distributed 18% under the terms of a permission notice identical to this one. 19 20% Here is TeX material that gets inserted after \input cwebmac 21\def\hang{\hangindent 3em\indent\ignorespaces} 22\def\pb{$\.|\ldots\.|$} % C brackets (|...|) 23\def\v{\char'174} % vertical (|) in typewriter font 24\def\dleft{[\![} \def\dright{]\!]} % double brackets 25\mathchardef\RA="3221 % right arrow 26\mathchardef\BA="3224 % double arrow 27\def\({} % ) kludge for alphabetizing certain section names 28\def\TeXxstring{\\{\TEX/\_string}} 29\def\skipxTeX{\\{skip\_\TEX/}} 30\def\copyxTeX{\\{copy\_\TEX/}} 31 32\def\title{CWEAVE (Version 3.64)} 33\def\topofcontents{\null\vfill 34 \centerline{\titlefont The {\ttitlefont CWEAVE} processor} 35 \vskip 15pt 36 \centerline{(Version 3.64)} 37 \vfill} 38\def\botofcontents{\vfill 39\noindent 40Copyright \copyright\ 1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth 41\bigskip\noindent 42Permission is granted to make and distribute verbatim copies of this 43document provided that the copyright notice and this permission notice 44are preserved on all copies. 45 46\smallskip\noindent 47Permission is granted to copy and distribute modified versions of this 48document under the conditions for verbatim copying, provided that the 49entire resulting derived work is given a different name and distributed 50under the terms of a permission notice identical to this one. 51} 52\pageno=\contentspagenumber \advance\pageno by 1 53\let\maybe=\iftrue 54@s not_eq normal @q unreserve a C++ keyword @> 55 56@** Introduction. 57This is the \.{CWEAVE} program by Silvio Levy and Donald E. Knuth, 58based on \.{WEAVE} by Knuth. 59We are thankful to Steve Avery, 60Nelson Beebe, Hans-Hermann Bode (to whom the original \CPLUSPLUS/ adaptation 61is due), Klaus Guntermann, Norman Ramsey, Tomas Rokicki, Joachim Schnitter, 62Joachim Schrod, Lee Wittenberg, Saroj Mahapatra, Cesar Augusto Rorato 63Crusius, and others who have contributed improvements. 64 65The ``banner line'' defined here should be changed whenever \.{CWEAVE} 66is modified. 67 68@d banner "This is CWEAVE (Version 3.64)\n" 69 70@c @<Include files@>@/ 71@h 72@<Common code for \.{CWEAVE} and \.{CTANGLE}@>@/ 73@<Typedef declarations@>@/ 74@<Global variables@>@/ 75@<Predeclaration of procedures@> 76 77@ We predeclare several standard system functions here instead of including 78their system header files, because the names of the header files are not as 79standard as the names of the functions. (For example, some \CEE/ environments 80have \.{<string.h>} where others have \.{<strings.h>}.) 81 82@<Predecl...@>= 83extern int strlen(); /* length of string */ 84extern int strcmp(); /* compare strings lexicographically */ 85extern char* strcpy(); /* copy one string to another */ 86extern int strncmp(); /* compare up to $n$ string characters */ 87extern char* strncpy(); /* copy up to $n$ string characters */ 88 89@ \.{CWEAVE} has a fairly straightforward outline. It operates in 90three phases: First it inputs the source file and stores cross-reference 91data, then it inputs the source once again and produces the \TEX/ output 92file, finally it sorts and outputs the index. 93 94Please read the documentation for \.{common}, the set of routines common 95to \.{CTANGLE} and \.{CWEAVE}, before proceeding further. 96 97@c 98int main (ac, av) 99int ac; /* argument count */ 100char **av; /* argument values */ 101{ 102 argc=ac; argv=av; 103 program=cweave; 104 make_xrefs=force_lines=make_pb=1; /* controlled by command-line options */ 105 common_init(); 106 @<Set initial values@>; 107 if (show_banner) printf(banner); /* print a ``banner line'' */ 108 @<Store all the reserved words@>; 109 phase_one(); /* read all the user's text and store the cross-references */ 110 phase_two(); /* read all the text again and translate it to \TEX/ form */ 111 phase_three(); /* output the cross-reference index */ 112 return wrap_up(); /* and exit gracefully */ 113} 114 115@ The following parameters were sufficient in the original \.{WEAVE} to 116handle \TEX/, so they should be sufficient for most applications of \.{CWEAVE}. 117If you change |max_bytes|, |max_names|, |hash_size|, or |buf_size| 118you have to change them also in the file |"common.w"|. 119 120@d max_bytes 90000 /* the number of bytes in identifiers, 121 index entries, and section names */ 122@d max_names 4000 /* number of identifiers, strings, section names; 123 must be less than 10240; used in |"common.w"| */ 124@d max_sections 2000 /* greater than the total number of sections */ 125@d hash_size 353 /* should be prime */ 126@d buf_size 100 /* maximum length of input line, plus one */ 127@d longest_name 10000 /* section names and strings shouldn't be longer than this */ 128@d long_buf_size (buf_size+longest_name) 129@d line_length 80 /* lines of \TEX/ output have at most this many characters; 130 should be less than 256 */ 131@d max_refs 20000 /* number of cross-references; must be less than 65536 */ 132@d max_toks 20000 /* number of symbols in \CEE/ texts being parsed; 133 must be less than 65536 */ 134@d max_texts 4000 /* number of phrases in \CEE/ texts being parsed; 135 must be less than 10240 */ 136@d max_scraps 2000 /* number of tokens in \CEE/ texts being parsed */ 137@d stack_size 400 /* number of simultaneous output levels */ 138 139@ The next few sections contain stuff from the file |"common.w"| that must 140be included in both |"ctangle.w"| and |"cweave.w"|. It appears in 141file |"common.h"|, which needs to be updated when |"common.w"| changes. 142 143@i common.h 144 145@* Data structures exclusive to {\tt CWEAVE}. 146As explained in \.{common.w}, the field of a |name_info| structure 147that contains the |rlink| of a section name is used for a completely 148different purpose in the case of identifiers. It is then called the 149|ilk| of the identifier, and it is used to 150distinguish between various types of identifiers, as follows: 151 152\yskip\hang |normal| and |func_template| identifiers are part of the 153\CEE/ program that will appear in italic type (or in typewriter type 154if all uppercase). 155 156\yskip\hang |custom| identifiers are part of the \CEE/ program that 157will be typeset in special ways. 158 159\yskip\hang |roman| identifiers are index entries that appear after 160\.{@@\^} in the \.{CWEB} file. 161 162\yskip\hang |wildcard| identifiers are index entries that appear after 163\.{@@:} in the \.{CWEB} file. 164 165\yskip\hang |typewriter| identifiers are index entries that appear after 166\.{@@.} in the \.{CWEB} file. 167 168\yskip\hang |alfop|, \dots, |template_like| 169identifiers are \CEE/ or \CPLUSPLUS/ reserved words whose |ilk| 170explains how they are to be treated when \CEE/ code is being 171formatted. 172 173@d ilk dummy.Ilk 174@d normal 0 /* ordinary identifiers have |normal| ilk */ 175@d roman 1 /* normal index entries have |roman| ilk */ 176@d wildcard 2 /* user-formatted index entries have |wildcard| ilk */ 177@d typewriter 3 /* `typewriter type' entries have |typewriter| ilk */ 178@d abnormal(a) (a->ilk>typewriter) /* tells if a name is special */ 179@d func_template 4 /* identifiers that can be followed by optional template */ 180@d custom 5 /* identifiers with user-given control sequence */ 181@d alfop 22 /* alphabetic operators like \&{and} or \&{not\_eq} */ 182@d else_like 26 /* \&{else} */ 183@d public_like 40 /* \&{public}, \&{private}, \&{protected} */ 184@d operator_like 41 /* \&{operator} */ 185@d new_like 42 /* \&{new} */ 186@d catch_like 43 /* \&{catch} */ 187@d for_like 45 /* \&{for}, \&{switch}, \&{while} */ 188@d do_like 46 /* \&{do} */ 189@d if_like 47 /* \&{if}, \&{ifdef}, \&{endif}, \&{pragma}, \dots */ 190@d delete_like 48 /* \&{delete} */ 191@d raw_ubin 49 /* `\.\&' or `\.*' when looking for \&{const} following */ 192@d const_like 50 /* \&{const}, \&{volatile} */ 193@d raw_int 51 /* \&{int}, \&{char}, \dots; also structure and class names */ 194@d int_like 52 /* same, when not followed by left parenthesis or \DC\ */ 195@d case_like 53 /* \&{case}, \&{return}, \&{goto}, \&{break}, \&{continue} */ 196@d sizeof_like 54 /* \&{sizeof} */ 197@d struct_like 55 /* \&{struct}, \&{union}, \&{enum}, \&{class} */ 198@d typedef_like 56 /* \&{typedef} */ 199@d define_like 57 /* \&{define} */ 200@d template_like 58 /* \&{template} */ 201 202@ We keep track of the current section number in |section_count|, which 203is the total number of sections that have started. Sections which have 204been altered by a change file entry have their |changed_section| flag 205turned on during the first phase. 206 207@<Global...@>= 208boolean change_exists; /* has any section changed? */ 209 210@ The other large memory area in \.{CWEAVE} keeps the cross-reference data. 211All uses of the name |p| are recorded in a linked list beginning at 212|p->xref|, which points into the |xmem| array. The elements of |xmem| 213are structures consisting of an integer, |num|, and a pointer |xlink| 214to another element of |xmem|. If |x=p->xref| is a pointer into |xmem|, 215the value of |x->num| is either a section number where |p| is used, 216or |cite_flag| plus a section number where |p| is mentioned, 217or |def_flag| plus a section number where |p| is defined; 218and |x->xlink| points to the next such cross-reference for |p|, 219if any. This list of cross-references is in decreasing order by 220section number. The next unused slot in |xmem| is |xref_ptr|. 221The linked list ends at |&xmem[0]|. 222 223The global variable |xref_switch| is set either to |def_flag| or to zero, 224depending on whether the next cross-reference to an identifier is to be 225underlined or not in the index. This switch is set to |def_flag| when 226\.{@@!} or \.{@@d} is scanned, and it is cleared to zero when 227the next identifier or index entry cross-reference has been made. 228Similarly, the global variable |section_xref_switch| is either 229|def_flag| or |cite_flag| or zero, depending 230on whether a section name is being defined, cited or used in \CEE/ text. 231 232@<Type...@>= 233typedef struct xref_info { 234 sixteen_bits num; /* section number plus zero or |def_flag| */ 235 struct xref_info *xlink; /* pointer to the previous cross-reference */ 236} xref_info; 237typedef xref_info *xref_pointer; 238 239@ @<Global...@>= 240xref_info xmem[max_refs]; /* contains cross-reference information */ 241xref_pointer xmem_end = xmem+max_refs-1; 242xref_pointer xref_ptr; /* the largest occupied position in |xmem| */ 243sixteen_bits xref_switch,section_xref_switch; /* either zero or |def_flag| */ 244 245@ A section that is used for multi-file output (with the \.{@@(} feature) 246has a special first cross-reference whose |num| field is |file_flag|. 247 248@d file_flag (3*cite_flag) 249@d def_flag (2*cite_flag) 250@d cite_flag 10240 /* must be strictly larger than |max_sections| */ 251@d xref equiv_or_xref 252 253@<Set init...@>= 254xref_ptr=xmem; name_dir->xref=(char*)xmem; xref_switch=0; section_xref_switch=0; 255xmem->num=0; /* sentinel value */ 256 257@ A new cross-reference for an identifier is formed by calling |new_xref|, 258which discards duplicate entries and ignores non-underlined references 259to one-letter identifiers or \CEE/'s reserved words. 260 261If the user has sent the |no_xref| flag (the \.{-x} option of the command line), 262it is unnecessary to keep track of cross-references for identifiers. 263If one were careful, one could probably make more changes around section 264100 to avoid a lot of identifier looking up. 265 266@d append_xref(c) if (xref_ptr==xmem_end) overflow("cross-reference"); 267 else (++xref_ptr)->num=c; 268@d no_xref (flags['x']==0) 269@d make_xrefs flags['x'] /* should cross references be output? */ 270@d is_tiny(p) ((p+1)->byte_start==(p)->byte_start+1) 271@d unindexed(a) (a<res_wd_end && a->ilk>=custom) 272 /* tells if uses of a name are to be indexed */ 273 274@c 275void 276new_xref(p) 277name_pointer p; 278{ 279 xref_pointer q; /* pointer to previous cross-reference */ 280 sixteen_bits m, n; /* new and previous cross-reference value */ 281 if (no_xref) return; 282 if ((unindexed(p) || is_tiny(p)) && xref_switch==0) return; 283 m=section_count+xref_switch; xref_switch=0; q=(xref_pointer)p->xref; 284 if (q != xmem) { 285 n=q->num; 286 if (n==m || n==m+def_flag) return; 287 else if (m==n+def_flag) { 288 q->num=m; return; 289 } 290 } 291 append_xref(m); xref_ptr->xlink=q; p->xref=(char*)xref_ptr; 292} 293 294@ The cross-reference lists for section names are slightly different. 295Suppose that a section name is defined in sections $m_1$, \dots, 296$m_k$, cited in sections $n_1$, \dots, $n_l$, and used in sections 297$p_1$, \dots, $p_j$. Then its list will contain $m_1+|def_flag|$, 298\dots, $m_k+|def_flag|$, $n_1+|cite_flag|$, \dots, 299$n_l+|cite_flag|$, $p_1$, \dots, $p_j$, in this order. 300 301Although this method of storage takes quadratic time with respect to 302the length of the list, under foreseeable uses of \.{CWEAVE} this inefficiency 303is insignificant. 304 305@c 306void 307new_section_xref(p) 308name_pointer p; 309{ 310 xref_pointer q,r; /* pointers to previous cross-references */ 311 q=(xref_pointer)p->xref; r=xmem; 312 if (q>xmem) 313 while (q->num>section_xref_switch) {r=q; q=q->xlink;} 314 if (r->num==section_count+section_xref_switch) 315 return; /* don't duplicate entries */ 316 append_xref(section_count+section_xref_switch); 317 xref_ptr->xlink=q; section_xref_switch=0; 318 if (r==xmem) p->xref=(char*)xref_ptr; 319 else r->xlink=xref_ptr; 320} 321 322@ The cross-reference list for a section name may also begin with 323|file_flag|. Here's how that flag gets put~in. 324 325@c 326void 327set_file_flag(p) 328name_pointer p; 329{ 330 xref_pointer q; 331 q=(xref_pointer)p->xref; 332 if (q->num==file_flag) return; 333 append_xref(file_flag); 334 xref_ptr->xlink = q; 335 p->xref = (char *)xref_ptr; 336} 337 338@ A third large area of memory is used for sixteen-bit `tokens', which appear 339in short lists similar to the strings of characters in |byte_mem|. Token lists 340are used to contain the result of \CEE/ code translated into \TEX/ form; 341further details about them will be explained later. A |text_pointer| variable 342is an index into |tok_start|. 343 344@<Typed...@>= 345typedef sixteen_bits token; 346typedef token *token_pointer; 347typedef token_pointer *text_pointer; 348 349@ The first position of |tok_mem| 350that is unoccupied by replacement text is called |tok_ptr|, and the first 351unused location of |tok_start| is called |text_ptr|. 352Thus, we usually have |*text_ptr==tok_ptr|. 353 354@<Global...@>= 355token tok_mem[max_toks]; /* tokens */ 356token_pointer tok_mem_end = tok_mem+max_toks-1; /* end of |tok_mem| */ 357token_pointer tok_start[max_texts]; /* directory into |tok_mem| */ 358token_pointer tok_ptr; /* first unused position in |tok_mem| */ 359text_pointer text_ptr; /* first unused position in |tok_start| */ 360text_pointer tok_start_end = tok_start+max_texts-1; /* end of |tok_start| */ 361token_pointer max_tok_ptr; /* largest value of |tok_ptr| */ 362text_pointer max_text_ptr; /* largest value of |text_ptr| */ 363 364@ @<Set init...@>= 365tok_ptr=tok_mem+1; text_ptr=tok_start+1; tok_start[0]=tok_mem+1; 366tok_start[1]=tok_mem+1; 367max_tok_ptr=tok_mem+1; max_text_ptr=tok_start+1; 368 369@ Here are the three procedures needed to complete |id_lookup|: 370@c 371int names_match(p,first,l,t) 372name_pointer p; /* points to the proposed match */ 373char *first; /* position of first character of string */ 374int l; /* length of identifier */ 375eight_bits t; /* desired ilk */ 376{ 377 if (length(p)!=l) return 0; 378 if (p->ilk!=t && !(t==normal && abnormal(p))) return 0; 379 return !strncmp(first,p->byte_start,l); 380} 381 382void 383init_p(p,t) 384name_pointer p; 385eight_bits t; 386{ 387 p->ilk=t; p->xref=(char*)xmem; 388} 389 390void 391init_node(p) 392name_pointer p; 393{ 394 p->xref=(char*)xmem; 395} 396 397@ We have to get \CEE/'s 398reserved words into the hash table, and the simplest way to do this is 399to insert them every time \.{CWEAVE} is run. Fortunately there are relatively 400few reserved words. (Some of these are not strictly ``reserved,'' but 401are defined in header files of the ISO Standard \CEE/ Library.) 402@^reserved words@> 403 404@<Store all the reserved words@>= 405id_lookup("and",NULL,alfop); 406id_lookup("and_eq",NULL,alfop); 407id_lookup("asm",NULL,sizeof_like); 408id_lookup("auto",NULL,int_like); 409id_lookup("bitand",NULL,alfop); 410id_lookup("bitor",NULL,alfop); 411id_lookup("bool",NULL,raw_int); 412id_lookup("break",NULL,case_like); 413id_lookup("case",NULL,case_like); 414id_lookup("catch",NULL,catch_like); 415id_lookup("char",NULL,raw_int); 416id_lookup("class",NULL,struct_like); 417id_lookup("clock_t",NULL,raw_int); 418id_lookup("compl",NULL,alfop); 419id_lookup("const",NULL,const_like); 420id_lookup("const_cast",NULL,raw_int); 421id_lookup("continue",NULL,case_like); 422id_lookup("default",NULL,case_like); 423id_lookup("define",NULL,define_like); 424id_lookup("defined",NULL,sizeof_like); 425id_lookup("delete",NULL,delete_like); 426id_lookup("div_t",NULL,raw_int); 427id_lookup("do",NULL,do_like); 428id_lookup("double",NULL,raw_int); 429id_lookup("dynamic_cast",NULL,raw_int); 430id_lookup("elif",NULL,if_like); 431id_lookup("else",NULL,else_like); 432id_lookup("endif",NULL,if_like); 433id_lookup("enum",NULL,struct_like); 434id_lookup("error",NULL,if_like); 435id_lookup("explicit",NULL,int_like); 436id_lookup("export",NULL,int_like); 437id_lookup("extern",NULL,int_like); 438id_lookup("FILE",NULL,raw_int); 439id_lookup("float",NULL,raw_int); 440id_lookup("for",NULL,for_like); 441id_lookup("fpos_t",NULL,raw_int); 442id_lookup("friend",NULL,int_like); 443id_lookup("goto",NULL,case_like); 444id_lookup("if",NULL,if_like); 445id_lookup("ifdef",NULL,if_like); 446id_lookup("ifndef",NULL,if_like); 447id_lookup("include",NULL,if_like); 448id_lookup("inline",NULL,int_like); 449id_lookup("int",NULL,raw_int); 450id_lookup("jmp_buf",NULL,raw_int); 451id_lookup("ldiv_t",NULL,raw_int); 452id_lookup("line",NULL,if_like); 453id_lookup("long",NULL,raw_int); 454id_lookup("mutable",NULL,int_like); 455id_lookup("namespace",NULL,struct_like); 456id_lookup("new",NULL,new_like); 457id_lookup("not",NULL,alfop); 458id_lookup("not_eq",NULL,alfop); 459id_lookup("NULL",NULL,custom); 460id_lookup("offsetof",NULL,raw_int); 461id_lookup("operator",NULL,operator_like); 462id_lookup("or",NULL,alfop); 463id_lookup("or_eq",NULL,alfop); 464id_lookup("pragma",NULL,if_like); 465id_lookup("private",NULL,public_like); 466id_lookup("protected",NULL,public_like); 467id_lookup("ptrdiff_t",NULL,raw_int); 468id_lookup("public",NULL,public_like); 469id_lookup("register",NULL,int_like); 470id_lookup("reinterpret_cast",NULL,raw_int); 471id_lookup("return",NULL,case_like); 472id_lookup("short",NULL,raw_int); 473id_lookup("sig_atomic_t",NULL,raw_int); 474id_lookup("signed",NULL,raw_int); 475id_lookup("size_t",NULL,raw_int); 476id_lookup("sizeof",NULL,sizeof_like); 477id_lookup("static",NULL,int_like); 478id_lookup("static_cast",NULL,raw_int); 479id_lookup("struct",NULL,struct_like); 480id_lookup("switch",NULL,for_like); 481id_lookup("template",NULL,template_like); 482id_lookup("this",NULL,custom); 483id_lookup("throw",NULL,case_like); 484id_lookup("time_t",NULL,raw_int); 485id_lookup("try",NULL,else_like); 486id_lookup("typedef",NULL,typedef_like); 487id_lookup("typeid",NULL,raw_int); 488id_lookup("typename",NULL,struct_like); 489id_lookup("undef",NULL,if_like); 490id_lookup("union",NULL,struct_like); 491id_lookup("unsigned",NULL,raw_int); 492id_lookup("using",NULL,int_like); 493id_lookup("va_dcl",NULL,decl); /* Berkeley's variable-arg-list convention */ 494id_lookup("va_list",NULL,raw_int); /* ditto */ 495id_lookup("virtual",NULL,int_like); 496id_lookup("void",NULL,raw_int); 497id_lookup("volatile",NULL,const_like); 498id_lookup("wchar_t",NULL,raw_int); 499id_lookup("while",NULL,for_like); 500id_lookup("xor",NULL,alfop); 501id_lookup("xor_eq",NULL,alfop); 502res_wd_end=name_ptr; 503id_lookup("TeX",NULL,custom); 504id_lookup("make_pair",NULL,func_template); 505 506@* Lexical scanning. 507Let us now consider the subroutines that read the \.{CWEB} source file 508and break it into meaningful units. There are four such procedures: 509One simply skips to the next `\.{@@\ }' or `\.{@@*}' that begins a 510section; another passes over the \TEX/ text at the beginning of a 511section; the third passes over the \TEX/ text in a \CEE/ comment; 512and the last, which is the most interesting, gets the next token of 513a \CEE/ text. They all use the pointers |limit| and |loc| into 514the line of input currently being studied. 515 516@ Control codes in \.{CWEB}, which begin with `\.{@@}', are converted 517into a numeric code designed to simplify \.{CWEAVE}'s logic; for example, 518larger numbers are given to the control codes that denote more significant 519milestones, and the code of |new_section| should be the largest of 520all. Some of these numeric control codes take the place of |char| 521control codes that will not otherwise appear in the output of the 522scanning routines. 523@^ASCII code dependencies@> 524 525@d ignore 00 /* control code of no interest to \.{CWEAVE} */ 526@d verbatim 02 /* takes the place of extended ASCII \.{\char2} */ 527@d begin_short_comment 03 /* \CPLUSPLUS/ short comment */ 528@d begin_comment '\t' /* tab marks will not appear */ 529@d underline '\n' /* this code will be intercepted without confusion */ 530@d noop 0177 /* takes the place of ASCII delete */ 531@d xref_roman 0203 /* control code for `\.{@@\^}' */ 532@d xref_wildcard 0204 /* control code for `\.{@@:}' */ 533@d xref_typewriter 0205 /* control code for `\.{@@.}' */ 534@d TeX_string 0206 /* control code for `\.{@@t}' */ 535@f TeX_string TeX 536@d ord 0207 /* control code for `\.{@@'}' */ 537@d join 0210 /* control code for `\.{@@\&}' */ 538@d thin_space 0211 /* control code for `\.{@@,}' */ 539@d math_break 0212 /* control code for `\.{@@\v}' */ 540@d line_break 0213 /* control code for `\.{@@/}' */ 541@d big_line_break 0214 /* control code for `\.{@@\#}' */ 542@d no_line_break 0215 /* control code for `\.{@@+}' */ 543@d pseudo_semi 0216 /* control code for `\.{@@;}' */ 544@d macro_arg_open 0220 /* control code for `\.{@@[}' */ 545@d macro_arg_close 0221 /* control code for `\.{@@]}' */ 546@d trace 0222 /* control code for `\.{@@0}', `\.{@@1}' and `\.{@@2}' */ 547@d translit_code 0223 /* control code for `\.{@@l}' */ 548@d output_defs_code 0224 /* control code for `\.{@@h}' */ 549@d format_code 0225 /* control code for `\.{@@f}' and `\.{@@s}' */ 550@d definition 0226 /* control code for `\.{@@d}' */ 551@d begin_C 0227 /* control code for `\.{@@c}' */ 552@d section_name 0230 /* control code for `\.{@@<}' */ 553@d new_section 0231 /* control code for `\.{@@\ }' and `\.{@@*}' */ 554 555@ Control codes are converted to \.{CWEAVE}'s internal 556representation by means of the table |ccode|. 557 558@<Global...@>= 559eight_bits ccode[256]; /* meaning of a char following \.{@@} */ 560 561@ @<Set ini...@>= 562{int c; for (c=0; c<256; c++) ccode[c]=0;} 563ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f'] 564 =ccode['*']=new_section; 565ccode['@@']='@@'; /* `quoted' at sign */ 566ccode['=']=verbatim; 567ccode['d']=ccode['D']=definition; 568ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code; 569ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C; 570ccode['t']=ccode['T']=TeX_string; 571ccode['l']=ccode['L']=translit_code; 572ccode['q']=ccode['Q']=noop; 573ccode['h']=ccode['H']=output_defs_code; 574ccode['&']=join; ccode['<']=ccode['(']=section_name; 575ccode['!']=underline; ccode['^']=xref_roman; 576ccode[':']=xref_wildcard; ccode['.']=xref_typewriter; ccode[',']=thin_space; 577ccode['|']=math_break; ccode['/']=line_break; ccode['#']=big_line_break; 578ccode['+']=no_line_break; ccode[';']=pseudo_semi; 579ccode['[']=macro_arg_open; ccode[']']=macro_arg_close; 580ccode['\'']=ord; 581@<Special control codes for debugging@>@; 582 583@ Users can write 584\.{@@2}, \.{@@1}, and \.{@@0} to turn tracing fully on, partly on, 585and off, respectively. 586 587@<Special control codes...@>= 588ccode['0']=ccode['1']=ccode['2']=trace; 589 590@ The |skip_limbo| routine is used on the first pass to skip through 591portions of the input that are not in any sections, i.e., that precede 592the first section. After this procedure has been called, the value of 593|input_has_ended| will tell whether or not a section has actually been found. 594 595There's a complication that we will postpone until later: If the \.{@@s} 596operation appears in limbo, we want to use it to adjust the default 597interpretation of identifiers. 598 599@<Predec...@>= 600void skip_limbo(); 601 602@ @c 603void 604skip_limbo() { 605 while(1) { 606 if (loc>limit && get_line()==0) return; 607 *(limit+1)='@@'; 608 while (*loc!='@@') loc++; /* look for '@@', then skip two chars */ 609 if (loc++ <=limit) { int c=ccode[(eight_bits)*loc++]; 610 if (c==new_section) return; 611 if (c==noop) skip_restricted(); 612 else if (c==format_code) @<Process simple format in limbo@>; 613 } 614 } 615} 616 617@ The |skip_TeX| routine is used on the first pass to skip through 618the \TEX/ code at the beginning of a section. It returns the next 619control code or `\.{\v}' found in the input. A |new_section| is 620assumed to exist at the very end of the file. 621 622@f skip_TeX TeX 623 624@c 625unsigned 626skip_TeX() /* skip past pure \TEX/ code */ 627{ 628 while (1) { 629 if (loc>limit && get_line()==0) return(new_section); 630 *(limit+1)='@@'; 631 while (*loc!='@@' && *loc!='|') loc++; 632 if (*loc++ =='|') return('|'); 633 if (loc<=limit) return(ccode[(eight_bits)*(loc++)]); 634 } 635} 636 637@*1 Inputting the next token. 638As stated above, \.{CWEAVE}'s most interesting lexical scanning routine is the 639|get_next| function that inputs the next token of \CEE/ input. However, 640|get_next| is not especially complicated. 641 642The result of |get_next| is either a |char| code for some special character, 643or it is a special code representing a pair of characters (e.g., `\.{!=}'), 644or it is the numeric value computed by the |ccode| 645table, or it is one of the following special codes: 646 647\yskip\hang |identifier|: In this case the global variables |id_first| and 648|id_loc| will have been set to the beginning and ending-plus-one locations 649in the buffer, as required by the |id_lookup| routine. 650 651\yskip\hang |string|: The string will have been copied into the array 652|section_text|; |id_first| and |id_loc| are set as above (now they are 653pointers into |section_text|). 654 655\yskip\hang |constant|: The constant is copied into |section_text|, with 656slight modifications; |id_first| and |id_loc| are set. 657 658\yskip\noindent Furthermore, some of the control codes cause 659|get_next| to take additional actions: 660 661\yskip\hang |xref_roman|, |xref_wildcard|, |xref_typewriter|, |TeX_string|, 662|verbatim|: The values of |id_first| and |id_loc| will have been set to 663the beginning and ending-plus-one locations in the buffer. 664 665\yskip\hang |section_name|: In this case the global variable |cur_section| will 666point to the |byte_start| entry for the section name that has just been scanned. 667The value of |cur_section_char| will be |'('| if the section name was 668preceded by \.{@@(} instead of \.{@@<}. 669 670\yskip\noindent If |get_next| sees `\.{@@!}' 671it sets |xref_switch| to |def_flag| and goes on to the next token. 672 673@d constant 0200 /* \CEE/ constant */ 674@d string 0201 /* \CEE/ string */ 675@d identifier 0202 /* \CEE/ identifier or reserved word */ 676 677@<Global...@>= 678name_pointer cur_section; /* name of section just scanned */ 679char cur_section_char; /* the character just before that name */ 680 681@ @<Include...@>= 682#include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */ 683#include <stdlib.h> /* definition of |exit| */ 684 685@ As one might expect, |get_next| consists mostly of a big switch 686that branches to the various special cases that can arise. 687\CEE/ allows underscores to appear in identifiers, and some \CEE/ 688compilers even allow the dollar sign. 689 690@d isxalpha(c) ((c)=='_' || (c)=='$') 691 /* non-alpha characters allowed in identifier */ 692@d ishigh(c) ((eight_bits)(c)>0177) 693@^high-bit character handling@> 694 695@<Predecl...@>= 696eight_bits get_next(); 697 698@ @c 699eight_bits 700get_next() /* produces the next input token */ 701{@+eight_bits c; /* the current character */ 702 while (1) { 703 @<Check if we're at the end of a preprocessor command@>; 704 if (loc>limit && get_line()==0) return(new_section); 705 c=*(loc++); 706 if (xisdigit(c) || c=='.') @<Get a constant@>@; 707 else if (c=='\'' || c=='"' || (c=='L'&&(*loc=='\'' || *loc=='"'))@| 708 || (c=='<' && sharp_include_line==1)) 709 @<Get a string@>@; 710 else if (xisalpha(c) || isxalpha(c) || ishigh(c)) 711 @<Get an identifier@>@; 712 else if (c=='@@') @<Get control code and possible section name@>@; 713 else if (xisspace(c)) continue; /* ignore spaces and tabs */ 714 if (c=='#' && loc==buffer+1) @<Raise preprocessor flag@>; 715 mistake: @<Compress two-symbol operator@>@; 716 return(c); 717 } 718} 719 720@ Because preprocessor commands do not fit in with the rest of the syntax 721of \CEE/, 722we have to deal with them separately. One solution is to enclose such 723commands between special markers. Thus, when a \.\# is seen as the 724first character of a line, |get_next| returns a special code 725|left_preproc| and raises a flag |preprocessing|. 726 727We can use the same internal code number for |left_preproc| as we do 728for |ord|, since |get_next| changes |ord| into a string. 729 730@d left_preproc ord /* begins a preprocessor command */ 731@d right_preproc 0217 /* ends a preprocessor command */ 732 733@<Glob...@>= 734boolean preprocessing=0; /* are we scanning a preprocessor command? */ 735 736@ @<Raise prep...@>= { 737 preprocessing=1; 738 @<Check if next token is |include|@>; 739 return (left_preproc); 740} 741 742@ An additional complication is the freakish use of \.< and \.> to delimit 743a file name in lines that start with \.{\#include}. We must treat this file 744name as a string. 745 746@<Glob...@>= 747boolean sharp_include_line=0; /* are we scanning a |#include| line? */ 748 749@ @<Check if next token is |include|@>= 750while (loc<=buffer_end-7 && xisspace(*loc)) loc++; 751if (loc<=buffer_end-6 && strncmp(loc,"include",7)==0) sharp_include_line=1; 752 753@ When we get to the end of a preprocessor line, 754we lower the flag and send a code |right_preproc|, unless 755the last character was a \.\\. 756 757@<Check if we're at...@>= 758 while (loc==limit-1 && preprocessing && *loc=='\\') 759 if (get_line()==0) return(new_section); /* still in preprocessor mode */ 760 if (loc>=limit && preprocessing) { 761 preprocessing=sharp_include_line=0; 762 return(right_preproc); 763 } 764 765@ The following code assigns values to the combinations \.{++}, 766\.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, \.{\v\v}, and 767\.{\&\&}, and to the \CPLUSPLUS/ 768combinations \.{...}, \.{::}, \.{.*} and \.{->*}. 769The compound assignment operators (e.g., \.{+=}) are 770treated as separate tokens. 771 772@d compress(c) if (loc++<=limit) return(c) 773 774@<Compress tw...@>= 775switch(c) { 776 case '/': if (*loc=='*') {compress(begin_comment);} 777 else if (*loc=='/') compress(begin_short_comment); break; 778 case '+': if (*loc=='+') compress(plus_plus); break; 779 case '-': if (*loc=='-') {compress(minus_minus);} 780 else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);} 781 else compress(minus_gt); break; 782 case '.': if (*loc=='*') {compress(period_ast);} 783 else if (*loc=='.' && *(loc+1)=='.') { 784 loc++; compress(dot_dot_dot); 785 } 786 break; 787 case ':': if (*loc==':') compress(colon_colon); break; 788 case '=': if (*loc=='=') compress(eq_eq); break; 789 case '>': if (*loc=='=') {compress(gt_eq);} 790 else if (*loc=='>') compress(gt_gt); break; 791 case '<': if (*loc=='=') {compress(lt_eq);} 792 else if (*loc=='<') compress(lt_lt); break; 793 case '&': if (*loc=='&') compress(and_and); break; 794 case '|': if (*loc=='|') compress(or_or); break; 795 case '!': if (*loc=='=') compress(not_eq); break; 796} 797 798@ @<Get an identifier@>= { 799 id_first=--loc; 800 while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc)); 801 id_loc=loc; return(identifier); 802} 803 804@ Different conventions are followed by \TEX/ and \CEE/ to express octal 805and hexadecimal numbers; it is reasonable to stick to each convention 806within its realm. Thus the \CEE/ part of a \.{CWEB} file has octals 807introduced by \.0 and hexadecimals by \.{0x}, but \.{CWEAVE} will print 808with \TEX/ macros that the user can redefine to fit the context. 809In order to simplify such macros, we replace some of the characters. 810 811Notice that in this section and the next, |id_first| and |id_loc| 812are pointers into the array |section_text|, not into |buffer|. 813 814@<Get a constant@>= { 815 id_first=id_loc=section_text+1; 816 if (*(loc-1)=='0') { 817 if (*loc=='x' || *loc=='X') {*id_loc++='^'; loc++; 818 while (xisxdigit(*loc)) *id_loc++=*loc++;} /* hex constant */ 819 else if (xisdigit(*loc)) {*id_loc++='~'; 820 while (xisdigit(*loc)) *id_loc++=*loc++;} /* octal constant */ 821 else goto dec; /* decimal constant */ 822 } 823 else { /* decimal constant */ 824 if (*(loc-1)=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */ 825 dec: *id_loc++=*(loc-1); 826 while (xisdigit(*loc) || *loc=='.') *id_loc++=*loc++; 827 if (*loc=='e' || *loc=='E') { /* float constant */ 828 *id_loc++='_'; loc++; 829 if (*loc=='+' || *loc=='-') *id_loc++=*loc++; 830 while (xisdigit(*loc)) *id_loc++=*loc++; 831 } 832 } 833 while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L' 834 || *loc=='f' || *loc=='F') { 835 *id_loc++='$'; *id_loc++=toupper(*loc); loc++; 836 } 837 return(constant); 838} 839 840@ \CEE/ strings and character constants, delimited by double and single 841quotes, respectively, can contain newlines or instances of their own 842delimiters if they are protected by a backslash. We follow this 843convention, but do not allow the string to be longer than |longest_name|. 844 845@<Get a string@>= { 846 char delim = c; /* what started the string */ 847 id_first = section_text+1; 848 id_loc = section_text; 849 if (delim=='\'' && *(loc-2)=='@@') {*++id_loc='@@'; *++id_loc='@@';} 850 *++id_loc=delim; 851 if (delim=='L') { /* wide character constant */ 852 delim=*loc++; *++id_loc=delim; 853 } 854 if (delim=='<') delim='>'; /* for file names in |#include| lines */ 855 while (1) { 856 if (loc>=limit) { 857 if(*(limit-1)!='\\') { 858 err_print("! String didn't end"); loc=limit; break; 859@.String didn't end@> 860 } 861 if(get_line()==0) { 862 err_print("! Input ended in middle of string"); loc=buffer; break; 863@.Input ended in middle of string@> 864 } 865 } 866 if ((c=*loc++)==delim) { 867 if (++id_loc<=section_text_end) *id_loc=c; 868 break; 869 } 870 if (c=='\\') if (loc>=limit) continue; 871 else if (++id_loc<=section_text_end) { 872 *id_loc = '\\'; c=*loc++; 873 } 874 if (++id_loc<=section_text_end) *id_loc=c; 875 } 876 if (id_loc>=section_text_end) { 877 printf("\n! String too long: "); 878@.String too long@> 879 term_write(section_text+1,25); 880 printf("..."); mark_error; 881 } 882 id_loc++; 883 return(string); 884} 885 886@ After an \.{@@} sign has been scanned, the next character tells us 887whether there is more work to do. 888 889@<Get control code and possible section name@>= { 890 c=*loc++; 891 switch(ccode[(eight_bits)c]) { 892 case translit_code: err_print("! Use @@l in limbo only"); continue; 893@.Use @@l in limbo...@> 894 case underline: xref_switch=def_flag; continue; 895 case trace: tracing=c-'0'; continue; 896 case xref_roman: case xref_wildcard: case xref_typewriter: 897 case noop: case TeX_string: c=ccode[c]; skip_restricted(); return(c); 898 case section_name: 899 @<Scan the section name and make |cur_section| point to it@>; 900 case verbatim: @<Scan a verbatim string@>; 901 case ord: @<Get a string@>; 902 default: return(ccode[(eight_bits)c]); 903 } 904} 905 906@ The occurrence of a section name sets |xref_switch| to zero, 907because the section name might (for example) follow \&{int}. 908 909@<Scan the section name...@>= { 910 char *k; /* pointer into |section_text| */ 911 cur_section_char=*(loc-1); 912 @<Put section name into |section_text|@>; 913 if (k-section_text>3 && strncmp(k-2,"...",3)==0) 914 cur_section=section_lookup(section_text+1,k-3,1); /* 1 indicates a prefix */ 915 else cur_section=section_lookup(section_text+1,k,0); 916 xref_switch=0; return(section_name); 917} 918 919@ Section names are placed into the |section_text| array with consecutive spaces, 920tabs, and carriage-returns replaced by single spaces. There will be no 921spaces at the beginning or the end. (We set |section_text[0]=' '| to facilitate 922this, since the |section_lookup| routine uses |section_text[1]| as the first 923character of the name.) 924 925@<Set init...@>=section_text[0]=' '; 926 927@ @<Put section name...@>= 928k=section_text; 929while (1) { 930 if (loc>limit && get_line()==0) { 931 err_print("! Input ended in section name"); 932@.Input ended in section name@> 933 loc=buffer+1; break; 934 } 935 c=*loc; 936 @<If end of name or erroneous control code, |break|@>; 937 loc++; if (k<section_text_end) k++; 938 if (xisspace(c)) { 939 c=' '; if (*(k-1)==' ') k--; 940 } 941*k=c; 942} 943if (k>=section_text_end) { 944 printf("\n! Section name too long: "); 945@.Section name too long@> 946 term_write(section_text+1,25); 947 printf("..."); mark_harmless; 948} 949if (*k==' ' && k>section_text) k--; 950 951@ @<If end of name...@>= 952if (c=='@@') { 953 c=*(loc+1); 954 if (c=='>') { 955 loc+=2; break; 956 } 957 if (ccode[(eight_bits)c]==new_section) { 958 err_print("! Section name didn't end"); break; 959@.Section name didn't end@> 960 } 961 if (c!='@@') { 962 err_print("! Control codes are forbidden in section name"); break; 963@.Control codes are forbidden...@> 964 } 965 *(++k)='@@'; loc++; /* now |c==*loc| again */ 966} 967 968@ This function skips over a restricted context at relatively high speed. 969 970@<Predecl...@>= 971void skip_restricted(); 972 973@ @c 974void 975skip_restricted() 976{ 977 id_first=loc; *(limit+1)='@@'; 978false_alarm: 979 while (*loc!='@@') loc++; 980 id_loc=loc; 981 if (loc++>limit) { 982 err_print("! Control text didn't end"); loc=limit; 983@.Control text didn't end@> 984 } 985 else { 986 if (*loc=='@@'&&loc<=limit) {loc++; goto false_alarm;} 987 if (*loc++!='>') 988 err_print("! Control codes are forbidden in control text"); 989@.Control codes are forbidden...@> 990 } 991} 992 993@ At the present point in the program we 994have |*(loc-1)==verbatim|; we set |id_first| to the beginning 995of the string itself, and |id_loc| to its ending-plus-one location in the 996buffer. We also set |loc| to the position just after the ending delimiter. 997 998@<Scan a verbatim string@>= { 999 id_first=loc++; *(limit+1)='@@'; *(limit+2)='>'; 1000 while (*loc!='@@' || *(loc+1)!='>') loc++; 1001 if (loc>=limit) err_print("! Verbatim string didn't end"); 1002@.Verbatim string didn't end@> 1003 id_loc=loc; loc+=2; 1004 return (verbatim); 1005} 1006 1007@** Phase one processing. 1008We now have accumulated enough subroutines to make it possible to carry out 1009\.{CWEAVE}'s first pass over the source file. If everything works right, 1010both phase one and phase two of \.{CWEAVE} will assign the same numbers to 1011sections, and these numbers will agree with what \.{CTANGLE} does. 1012 1013The global variable |next_control| often contains the most recent output of 1014|get_next|; in interesting cases, this will be the control code that 1015ended a section or part of a section. 1016 1017@<Global...@>= 1018eight_bits next_control; /* control code waiting to be acting upon */ 1019 1020@ The overall processing strategy in phase one has the following 1021straightforward outline. 1022 1023@<Predecl...@>= 1024void phase_one(); 1025 1026@ @c 1027void 1028phase_one() { 1029 phase=1; reset_input(); section_count=0; 1030 skip_limbo(); change_exists=0; 1031 while (!input_has_ended) 1032 @<Store cross-reference data for the current section@>; 1033 changed_section[section_count]=change_exists; 1034 /* the index changes if anything does */ 1035 phase=2; /* prepare for second phase */ 1036 @<Print error messages about unused or undefined section names@>; 1037} 1038 1039@ @<Store cross-reference data...@>= 1040{ 1041 if (++section_count==max_sections) overflow("section number"); 1042 changed_section[section_count]=changing; 1043 /* it will become 1 if any line changes */ 1044 if (*(loc-1)=='*' && show_progress) { 1045 printf("*%d",section_count); 1046 update_terminal; /* print a progress report */ 1047 } 1048 @<Store cross-references in the \TEX/ part of a section@>; 1049 @<Store cross-references in the definition part of a section@>; 1050 @<Store cross-references in the \CEE/ part of a section@>; 1051 if (changed_section[section_count]) change_exists=1; 1052} 1053 1054@ The |C_xref| subroutine stores references to identifiers in 1055\CEE/ text material beginning with the current value of |next_control| 1056and continuing until |next_control| is `\.\{' or `\.{\v}', or until the next 1057``milestone'' is passed (i.e., |next_control>=format_code|). If 1058|next_control>=format_code| when |C_xref| is called, nothing will happen; 1059but if |next_control=='|'| upon entry, the procedure assumes that this is 1060the `\.{\v}' preceding \CEE/ text that is to be processed. 1061 1062The parameter |spec_ctrl| is used to change this behavior. In most cases 1063|C_xref| is called with |spec_ctrl==ignore|, which triggers the default 1064processing described above. If |spec_ctrl==section_name|, section names will 1065be gobbled. This is used when \CEE/ text in the \TEX/ part or inside comments 1066is parsed: It allows for section names to appear in \pb, but these 1067strings will not be entered into the cross reference lists since they are not 1068definitions of section names. 1069 1070The program uses the fact that our internal code numbers satisfy 1071the relations |xref_roman==identifier+roman| and |xref_wildcard==identifier 1072+wildcard| and |xref_typewriter==identifier+typewriter|, 1073as well as |normal==0|. 1074 1075@<Predecl...@>= 1076void C_xref(); 1077 1078@ @c 1079void 1080C_xref( spec_ctrl ) /* makes cross-references for \CEE/ identifiers */ 1081 eight_bits spec_ctrl; 1082{ 1083 name_pointer p; /* a referenced name */ 1084 while (next_control<format_code || next_control==spec_ctrl) { 1085 if (next_control>=identifier && next_control<=xref_typewriter) { 1086 if (next_control>identifier) @<Replace |"@@@@"| by |"@@"| @>@; 1087 p=id_lookup(id_first, id_loc,next_control-identifier); new_xref(p); 1088 } 1089 if (next_control==section_name) { 1090 section_xref_switch=cite_flag; 1091 new_section_xref(cur_section); 1092 } 1093 next_control=get_next(); 1094 if (next_control=='|' || next_control==begin_comment || 1095 next_control==begin_short_comment) return; 1096 } 1097} 1098 1099@ The |outer_xref| subroutine is like |C_xref| except that it begins 1100with |next_control!='|'| and ends with |next_control>=format_code|. Thus, it 1101handles \CEE/ text with embedded comments. 1102 1103@<Predecl...@>= 1104void outer_xref(); 1105 1106@ @c 1107void 1108outer_xref() /* extension of |C_xref| */ 1109{ 1110 int bal; /* brace level in comment */ 1111 while (next_control<format_code) 1112 if (next_control!=begin_comment && next_control!=begin_short_comment) 1113 C_xref(ignore); 1114 else { 1115 boolean is_long_comment=(next_control==begin_comment); 1116 bal=copy_comment(is_long_comment,1); next_control='|'; 1117 while (bal>0) { 1118 C_xref(section_name); /* do not reference section names in comments */ 1119 if (next_control=='|') bal=copy_comment(is_long_comment,bal); 1120 else bal=0; /* an error message will occur in phase two */ 1121 } 1122 } 1123} 1124 1125@ In the \TEX/ part of a section, cross-reference entries are made only for 1126the identifiers in \CEE/ texts enclosed in \pb, or for control texts 1127enclosed in \.{@@\^}$\,\ldots\,$\.{@@>} or \.{@@.}$\,\ldots\,$\.{@@>} 1128or \.{@@:}$\,\ldots\,$\.{@@>}. 1129 1130@<Store cross-references in the \T...@>= 1131while (1) { 1132 switch (next_control=skip_TeX()) { 1133 case translit_code: err_print("! Use @@l in limbo only"); continue; 1134@.Use @@l in limbo...@> 1135 case underline: xref_switch=def_flag; continue; 1136 case trace: tracing=*(loc-1)-'0'; continue; 1137 case '|': C_xref(section_name); break; 1138 case xref_roman: case xref_wildcard: case xref_typewriter: 1139 case noop: case section_name: 1140 loc-=2; next_control=get_next(); /* scan to \.{@@>} */ 1141 if (next_control>=xref_roman && next_control<=xref_typewriter) { 1142 @<Replace |"@@@@"| by |"@@"| @>@; 1143 new_xref(id_lookup(id_first, id_loc,next_control-identifier)); 1144 } 1145 break; 1146 } 1147 if (next_control>=format_code) break; 1148} 1149 1150@ @<Replace |"@@@@"| by |"@@"| @>= 1151{ 1152 char *src=id_first,*dst=id_first; 1153 while(src<id_loc){ 1154 if(*src=='@@') src++; 1155 *dst++=*src++; 1156 } 1157 id_loc=dst; 1158 while (dst<src) *dst++=' '; /* clean up in case of error message display */ 1159} 1160 1161@ During the definition and \CEE/ parts of a section, cross-references 1162are made for all identifiers except reserved words. However, the right 1163identifier in a format definition is not referenced, and the left 1164identifier is referenced only if it has been explicitly 1165underlined (preceded by \.{@@!}). 1166The \TEX/ code in comments is, of course, ignored, except for 1167\CEE/ portions enclosed in \pb; the text of a section name is skipped 1168entirely, even if it contains \pb\ constructions. 1169 1170The variables |lhs| and |rhs| point to the respective identifiers involved 1171in a format definition. 1172 1173@<Global...@>= 1174name_pointer lhs, rhs; /* pointers to |byte_start| for format identifiers */ 1175name_pointer res_wd_end; /* pointer to the first nonreserved identifier */ 1176 1177@ When we get to the following code we have |next_control>=format_code|. 1178 1179@<Store cross-references in the d...@>= 1180while (next_control<=definition) { /* |format_code| or |definition| */ 1181 if (next_control==definition) { 1182 xref_switch=def_flag; /* implied \.{@@!} */ 1183 next_control=get_next(); 1184 } else @<Process a format definition@>; 1185 outer_xref(); 1186} 1187 1188@ Error messages for improper format definitions will be issued in phase 1189two. Our job in phase one is to define the |ilk| of a properly formatted 1190identifier, and to remove cross-references to identifiers that we now 1191discover should be unindexed. 1192 1193@<Process a form...@>= { 1194 next_control=get_next(); 1195 if (next_control==identifier) { 1196 lhs=id_lookup(id_first, id_loc,normal); lhs->ilk=normal; 1197 if (xref_switch) new_xref(lhs); 1198 next_control=get_next(); 1199 if (next_control==identifier) { 1200 rhs=id_lookup(id_first, id_loc,normal); 1201 lhs->ilk=rhs->ilk; 1202 if (unindexed(lhs)) { /* retain only underlined entries */ 1203 xref_pointer q,r=NULL; 1204 for (q=(xref_pointer)lhs->xref;q>xmem;q=q->xlink) 1205 if (q->num<def_flag) 1206 if (r) r->xlink=q->xlink; 1207 else lhs->xref=(char*)q->xlink; 1208 else r=q; 1209 } 1210 next_control=get_next(); 1211 } 1212 } 1213} 1214 1215@ A much simpler processing of format definitions occurs when the 1216definition is found in limbo. 1217 1218@<Process simple format in limbo@>= 1219{ 1220 if (get_next()!=identifier) 1221 err_print("! Missing left identifier of @@s"); 1222@.Missing left identifier...@> 1223 else { 1224 lhs=id_lookup(id_first,id_loc,normal); 1225 if (get_next()!=identifier) 1226 err_print("! Missing right identifier of @@s"); 1227@.Missing right identifier...@> 1228 else { 1229 rhs=id_lookup(id_first,id_loc,normal); 1230 lhs->ilk=rhs->ilk; 1231 } 1232 } 1233} 1234 1235@ Finally, when the \TEX/ and definition parts have been treated, we have 1236|next_control>=begin_C|. 1237 1238@<Store cross-references in the \CEE/...@>= 1239if (next_control<=section_name) { /* |begin_C| or |section_name| */ 1240 if (next_control==begin_C) section_xref_switch=0; 1241 else { 1242 section_xref_switch=def_flag; 1243 if(cur_section_char=='(' && cur_section!=name_dir) 1244 set_file_flag(cur_section); 1245 } 1246 do { 1247 if (next_control==section_name && cur_section!=name_dir) 1248 new_section_xref(cur_section); 1249 next_control=get_next(); outer_xref(); 1250 } while ( next_control<=section_name); 1251} 1252 1253@ After phase one has looked at everything, we want to check that each 1254section name was both defined and used. The variable |cur_xref| will point 1255to cross-references for the current section name of interest. 1256 1257@<Global...@>= 1258xref_pointer cur_xref; /* temporary cross-reference pointer */ 1259boolean an_output; /* did |file_flag| precede |cur_xref|? */ 1260 1261@ The following recursive procedure 1262walks through the tree of section names and prints out anomalies. 1263@^recursion@> 1264 1265@<Predecl...@>= 1266void section_check(); 1267 1268@ @c 1269void 1270section_check(p) 1271name_pointer p; /* print anomalies in subtree |p| */ 1272{ 1273 if (p) { 1274 section_check(p->llink); 1275 cur_xref=(xref_pointer)p->xref; 1276 if (cur_xref->num==file_flag) {an_output=1; cur_xref=cur_xref->xlink;} 1277 else an_output=0; 1278 if (cur_xref->num <def_flag) { 1279 printf("\n! Never defined: <"); print_section_name(p); putchar('>'); mark_harmless; 1280@.Never defined: <section name>@> 1281 } 1282 while (cur_xref->num >=cite_flag) cur_xref=cur_xref->xlink; 1283 if (cur_xref==xmem && !an_output) { 1284 printf("\n! Never used: <"); print_section_name(p); putchar('>'); mark_harmless; 1285@.Never used: <section name>@> 1286 } 1287 section_check(p->rlink); 1288 } 1289} 1290 1291@ @<Print error messages about un...@>=section_check(root) 1292 1293@* Low-level output routines. 1294The \TEX/ output is supposed to appear in lines at most |line_length| 1295characters long, so we place it into an output buffer. During the output 1296process, |out_line| will hold the current line number of the line about to 1297be output. 1298 1299@<Global...@>= 1300char out_buf[line_length+1]; /* assembled characters */ 1301char *out_ptr; /* just after last character in |out_buf| */ 1302char *out_buf_end = out_buf+line_length; /* end of |out_buf| */ 1303int out_line; /* number of next line to be output */ 1304 1305@ The |flush_buffer| routine empties the buffer up to a given breakpoint, 1306and moves any remaining characters to the beginning of the next line. 1307If the |per_cent| parameter is 1 a |'%'| is appended to the line 1308that is being output; in this case the breakpoint |b| should be strictly 1309less than |out_buf_end|. If the |per_cent| parameter is |0|, 1310trailing blanks are suppressed. 1311The characters emptied from the buffer form a new line of output; 1312if the |carryover| parameter is true, a |"%"| in that line will be 1313carried over to the next line (so that \TEX/ will ignore the completion 1314of commented-out text). 1315 1316@d c_line_write(c) fflush(active_file),fwrite(out_buf+1,sizeof(char),c,active_file) 1317@d tex_putc(c) putc(c,active_file) 1318@d tex_new_line putc('\n',active_file) 1319@d tex_printf(c) fprintf(active_file,c) 1320 1321@c 1322void 1323flush_buffer(b,per_cent,carryover) 1324char *b; /* outputs from |out_buf+1| to |b|,where |b<=out_ptr| */ 1325boolean per_cent,carryover; 1326{ 1327 char *j; j=b; /* pointer into |out_buf| */ 1328 if (! per_cent) /* remove trailing blanks */ 1329 while (j>out_buf && *j==' ') j--; 1330 c_line_write(j-out_buf); 1331 if (per_cent) tex_putc('%'); 1332 tex_new_line; out_line++; 1333 if (carryover) 1334 while (j>out_buf) 1335 if (*j--=='%' && (j==out_buf || *j!='\\')) { 1336 *b--='%'; break; 1337 } 1338 if (b<out_ptr) strncpy(out_buf+1,b+1,out_ptr-b); 1339 out_ptr-=b-out_buf; 1340} 1341 1342@ When we are copying \TEX/ source material, we retain line breaks 1343that occur in the input, except that an empty line is not 1344output when the \TEX/ source line was nonempty. For example, a line 1345of the \TEX/ file that contains only an index cross-reference entry 1346will not be copied. The |finish_line| routine is called just before 1347|get_line| inputs a new line, and just after a line break token has 1348been emitted during the output of translated \CEE/ text. 1349 1350@c 1351void 1352finish_line() /* do this at the end of a line */ 1353{ 1354 char *k; /* pointer into |buffer| */ 1355 if (out_ptr>out_buf) flush_buffer(out_ptr,0,0); 1356 else { 1357 for (k=buffer; k<=limit; k++) 1358 if (!(xisspace(*k))) return; 1359 flush_buffer(out_buf,0,0); 1360 } 1361} 1362 1363@ In particular, the |finish_line| procedure is called near the very 1364beginning of phase two. We initialize the output variables in a slightly 1365tricky way so that the first line of the output file will be 1366`\.{\\input cwebmac}'. 1367 1368@<Set init...@>= 1369out_ptr=out_buf+1; out_line=1; active_file=tex_file; 1370*out_ptr='c'; tex_printf("\\input cwebma"); 1371 1372@ When we wish to append one character |c| to the output buffer, we write 1373`|out(c)|'; this will cause the buffer to be emptied if it was already 1374full. If we want to append more than one character at once, we say 1375|out_str(s)|, where |s| is a string containing the characters. 1376 1377A line break will occur at a space or after a single-nonletter 1378\TEX/ control sequence. 1379 1380@d out(c) {if (out_ptr>=out_buf_end) break_out(); *(++out_ptr)=c;} 1381 1382@c 1383void 1384out_str(s) /* output characters from |s| to end of string */ 1385char *s; 1386{ 1387 while (*s) out(*s++); 1388} 1389 1390@ The |break_out| routine is called just before the output buffer is about 1391to overflow. To make this routine a little faster, we initialize position 13920 of the output buffer to `\.\\'; this character isn't really output. 1393 1394@<Set init...@>= 1395out_buf[0]='\\'; 1396 1397@ A long line is broken at a blank space or just before a backslash that isn't 1398preceded by another backslash. In the latter case, a |'%'| is output at 1399the break. 1400 1401@<Predecl...@>= 1402void break_out(); 1403 1404@ @c 1405void 1406break_out() /* finds a way to break the output line */ 1407{ 1408 char *k=out_ptr; /* pointer into |out_buf| */ 1409 while (1) { 1410 if (k==out_buf) @<Print warning message, break the line, |return|@>; 1411 if (*k==' ') { 1412 flush_buffer(k,0,1); return; 1413 } 1414 if (*(k--)=='\\' && *k!='\\') { /* we've decreased |k| */ 1415 flush_buffer(k,1,1); return; 1416 } 1417 } 1418} 1419 1420@ We get to this section only in the unusual case that the entire output line 1421consists of a string of backslashes followed by a string of nonblank 1422non-backslashes. In such cases it is almost always safe to break the 1423line by putting a |'%'| just before the last character. 1424 1425@<Print warning message...@>= 1426{ 1427 printf("\n! Line had to be broken (output l. %d):\n",out_line); 1428@.Line had to be broken@> 1429 term_write(out_buf+1, out_ptr-out_buf-1); 1430 new_line; mark_harmless; 1431 flush_buffer(out_ptr-1,1,1); return; 1432} 1433 1434@ Here is a macro that outputs a section number in decimal notation. 1435The number to be converted by |out_section| is known to be less than 1436|def_flag|, so it cannot have more than five decimal digits. If 1437the section is changed, we output `\.{\\*}' just after the number. 1438 1439@c 1440void 1441out_section(n) 1442sixteen_bits n; 1443{ 1444 char s[6]; 1445 sprintf(s,"%d",n); out_str(s); 1446 if(changed_section[n]) out_str ("\\*"); 1447@.\\*@> 1448} 1449 1450@ The |out_name| procedure is used to output an identifier or index 1451entry, enclosing it in braces. 1452 1453@c 1454void 1455out_name(p,quote_xalpha) 1456name_pointer p; 1457boolean quote_xalpha; 1458{ 1459 char *k, *k_end=(p+1)->byte_start; /* pointers into |byte_mem| */ 1460 out('{'); 1461 for (k=p->byte_start; k<k_end; k++) { 1462 if (isxalpha(*k) && quote_xalpha) out('\\'); 1463@.\\\$@> 1464@.\\\_@> 1465 out(*k); 1466 } 1467 out('}'); 1468} 1469 1470@* Routines that copy \TEX/ material. 1471During phase two, we use subroutines |copy_limbo|, |copy_TeX|, and 1472|copy_comment| in place of the analogous |skip_limbo|, |skip_TeX|, and 1473|skip_comment| that were used in phase one. (Well, |copy_comment| 1474was actually written in such a way that it functions as |skip_comment| 1475in phase one.) 1476 1477The |copy_limbo| routine, for example, takes \TEX/ material that is not 1478part of any section and transcribes it almost verbatim to the output file. 1479The use of `\.{@@}' signs is severely restricted in such material: 1480`\.{@@@@}' pairs are replaced by singletons; `\.{@@l}' and `\.{@@q}' and 1481`\.{@@s}' are interpreted. 1482 1483@c 1484void 1485copy_limbo() 1486{ 1487 char c; 1488 while (1) { 1489 if (loc>limit && (finish_line(), get_line()==0)) return; 1490 *(limit+1)='@@'; 1491 while (*loc!='@@') out(*(loc++)); 1492 if (loc++<=limit) { 1493 c=*loc++; 1494 if (ccode[(eight_bits)c]==new_section) break; 1495 switch (ccode[(eight_bits)c]) { 1496 case translit_code: out_str("\\ATL"); break; 1497@.\\ATL@> 1498 case '@@': out('@@'); break; 1499 case noop: skip_restricted(); break; 1500 case format_code: if (get_next()==identifier) get_next(); 1501 if (loc>=limit) get_line(); /* avoid blank lines in output */ 1502 break; /* the operands of \.{@@s} are ignored on this pass */ 1503 default: err_print("! Double @@ should be used in limbo"); 1504@.Double @@ should be used...@> 1505 out('@@'); 1506 } 1507 } 1508 } 1509} 1510 1511@ The |copy_TeX| routine processes the \TEX/ code at the beginning of a 1512section; for example, the words you are now reading were copied in this 1513way. It returns the next control code or `\.{\v}' found in the input. 1514We don't copy spaces or tab marks into the beginning of a line. This 1515makes the test for empty lines in |finish_line| work. 1516 1517@ @f copy_TeX TeX 1518@c 1519eight_bits 1520copy_TeX() 1521{ 1522 char c; /* current character being copied */ 1523 while (1) { 1524 if (loc>limit && (finish_line(), get_line()==0)) return(new_section); 1525 *(limit+1)='@@'; 1526 while ((c=*(loc++))!='|' && c!='@@') { 1527 out(c); 1528 if (out_ptr==out_buf+1 && (xisspace(c))) out_ptr--; 1529 } 1530 if (c=='|') return('|'); 1531 if (loc<=limit) return(ccode[(eight_bits)*(loc++)]); 1532 } 1533} 1534 1535@ The |copy_comment| function issues a warning if more braces are opened than 1536closed, and in the case of a more serious error it supplies enough 1537braces to keep \TEX/ from complaining about unbalanced braces. 1538Instead of copying the \TEX/ material 1539into the output buffer, this function copies it into the token memory 1540(in phase two only). 1541The abbreviation |app_tok(t)| is used to append token |t| to the current 1542token list, and it also makes sure that it is possible to append at least 1543one further token without overflow. 1544 1545@d app_tok(c) {if (tok_ptr+2>tok_mem_end) overflow("token"); *(tok_ptr++)=c;} 1546 1547@<Predec...@>= 1548int copy_comment(); 1549 1550@ @c 1551int copy_comment(is_long_comment,bal) /* copies \TEX/ code in comments */ 1552boolean is_long_comment; /* is this a traditional \CEE/ comment? */ 1553int bal; /* brace balance */ 1554{ 1555 char c; /* current character being copied */ 1556 while (1) { 1557 if (loc>limit) { 1558 if (is_long_comment) { 1559 if (get_line()==0) { 1560 err_print("! Input ended in mid-comment"); 1561@.Input ended in mid-comment@> 1562 loc=buffer+1; goto done; 1563 } 1564 } 1565 else { 1566 if (bal>1) err_print("! Missing } in comment"); 1567@.Missing \} in comment@> 1568 goto done; 1569 } 1570 } 1571 c=*(loc++); 1572 if (c=='|') return(bal); 1573 if (is_long_comment) @<Check for end of comment@>; 1574 if (phase==2) { 1575 if (ishigh(c)) app_tok(quoted_char); 1576 app_tok(c); 1577 } 1578 @<Copy special things when |c=='@@', '\\'|@>; 1579 if (c=='{') bal++; 1580 else if (c=='}') { 1581 if(bal>1) bal--; 1582 else {err_print("! Extra } in comment"); 1583@.Extra \} in comment@> 1584 if (phase==2) tok_ptr--; 1585 } 1586 } 1587 } 1588done:@<Clear |bal| and |return|@>; 1589} 1590 1591@ @<Check for end of comment@>= 1592if (c=='*' && *loc=='/') { 1593 loc++; 1594 if (bal>1) err_print("! Missing } in comment"); 1595@.Missing \} in comment@> 1596 goto done; 1597} 1598 1599@ @<Copy special things when |c=='@@'...@>= 1600if (c=='@@') { 1601 if (*(loc++)!='@@') { 1602 err_print("! Illegal use of @@ in comment"); 1603@.Illegal use of @@...@> 1604 loc-=2; if (phase==2) *(tok_ptr-1)=' '; goto done; 1605 } 1606} 1607else if (c=='\\' && *loc!='@@') 1608 if (phase==2) app_tok(*(loc++)) else loc++; 1609 1610@ We output 1611enough right braces to keep \TEX/ happy. 1612 1613@<Clear |bal|...@>= 1614if (phase==2) while (bal-- >0) app_tok('}'); 1615return(0); 1616 1617@** Parsing. 1618The most intricate part of \.{CWEAVE} is its mechanism for converting 1619\CEE/-like code into \TEX/ code, and we might as well plunge into this 1620aspect of the program now. A ``bottom up'' approach is used to parse the 1621\CEE/-like material, since \.{CWEAVE} must deal with fragmentary 1622constructions whose overall ``part of speech'' is not known. 1623 1624At the lowest level, the input is represented as a sequence of entities 1625that we shall call {\it scraps}, where each scrap of information consists 1626of two parts, its {\it category} and its {\it translation}. The category 1627is essentially a syntactic class, and the translation is a token list that 1628represents \TEX/ code. Rules of syntax and semantics tell us how to 1629combine adjacent scraps into larger ones, and if we are lucky an entire 1630\CEE/ text that starts out as hundreds of small scraps will join 1631together into one gigantic scrap whose translation is the desired \TEX/ 1632code. If we are unlucky, we will be left with several scraps that don't 1633combine; their translations will simply be output, one by one. 1634 1635The combination rules are given as context-sensitive productions that are 1636applied from left to right. Suppose that we are currently working on the 1637sequence of scraps $s_1\,s_2\ldots s_n$. We try first to find the longest 1638production that applies to an initial substring $s_1\,s_2\ldots\,$; but if 1639no such productions exist, we try to find the longest production 1640applicable to the next substring $s_2\,s_3\ldots\,$; and if that fails, we 1641try to match $s_3\,s_4\ldots\,$, etc. 1642 1643A production applies if the category codes have a given pattern. For 1644example, one of the productions (see rule~3) is 1645$$\hbox{|exp| }\left\{\matrix{\hbox{|binop|}\cr\hbox{|ubinop|}}\right\} 1646\hbox{ |exp| }\RA\hbox{ |exp|}$$ 1647and it means that three consecutive scraps whose respective categories are 1648|exp|, |binop| (or |ubinop|), 1649and |exp| are converted to one scrap whose category 1650is |exp|. The translations of the original 1651scraps are simply concatenated. The case of 1652$$\hbox{|exp| |comma| |exp| $\RA$ |exp|} \hskip4emE_1C\,\\{opt}9\,E_2$$ 1653(rule 4) is only slightly more complicated: 1654Here the resulting |exp| translation 1655consists not only of the three original translations, but also of the 1656tokens |opt| and 9 between the translations of the 1657|comma| and the following |exp|. 1658In the \TEX/ file, this will specify an optional line break after the 1659comma, with penalty 90. 1660 1661At each opportunity the longest possible production is applied. For 1662example, if the current sequence of scraps is |int_like| |cast| 1663|lbrace|, rule 31 is applied; but if the sequence is |int_like| |cast| 1664followed by anything other than |lbrace|, rule 32 takes effect. 1665 1666Translation rules such as `$E_1C\,\\{opt}9\,E_2$' above use subscripts 1667to distinguish between translations of scraps whose categories have the 1668same initial letter; these subscripts are assigned from left to right. 1669 1670@ Here is a list of the category codes that scraps can have. 1671(A few others, like |int_like|, have already been defined; the 1672|cat_name| array contains a complete list.) 1673 1674@d exp 1 /* denotes an expression, including perhaps a single identifier */ 1675@d unop 2 /* denotes a unary operator */ 1676@d binop 3 /* denotes a binary operator */ 1677@d ubinop 4 1678 /* denotes an operator that can be unary or binary, depending on context */ 1679@d cast 5 /* denotes a cast */ 1680@d question 6 /* denotes a question mark and possibly the expressions flanking it */ 1681@d lbrace 7 /* denotes a left brace */ 1682@d rbrace 8 /* denotes a right brace */ 1683@d decl_head 9 /* denotes an incomplete declaration */ 1684@d comma 10 /* denotes a comma */ 1685@d lpar 11 /* denotes a left parenthesis or left bracket */ 1686@d rpar 12 /* denotes a right parenthesis or right bracket */ 1687@d prelangle 13 /* denotes `$<$' before we know what it is */ 1688@d prerangle 14 /* denotes `$>$' before we know what it is */ 1689@d langle 15 /* denotes `$<$' when it's used as angle bracket in a template */ 1690@d colcol 18 /* denotes `::' */ 1691@d base 19 /* denotes a colon that introduces a base specifier */ 1692@d decl 20 /* denotes a complete declaration */ 1693@d struct_head 21 /* denotes the beginning of a structure specifier */ 1694@d stmt 23 /* denotes a complete statement */ 1695@d function 24 /* denotes a complete function */ 1696@d fn_decl 25 /* denotes a function declarator */ 1697@d semi 27 /* denotes a semicolon */ 1698@d colon 28 /* denotes a colon */ 1699@d tag 29 /* denotes a statement label */ 1700@d if_head 30 /* denotes the beginning of a compound conditional */ 1701@d else_head 31 /* denotes a prefix for a compound statement */ 1702@d if_clause 32 /* pending \.{if} together with a condition */ 1703@d lproc 35 /* begins a preprocessor command */ 1704@d rproc 36 /* ends a preprocessor command */ 1705@d insert 37 /* a scrap that gets combined with its neighbor */ 1706@d section_scrap 38 /* section name */ 1707@d dead 39 /* scrap that won't combine */ 1708@d ftemplate 59 /* \\{make\_pair} */ 1709@d new_exp 60 /* \&{new} and a following type identifier */ 1710@d begin_arg 61 /* \.{@@[} */ 1711@d end_arg 62 /* \.{@@]} */ 1712 1713@<Glo...@>= 1714char cat_name[256][12]; 1715eight_bits cat_index; 1716 1717@ @<Set in...@>= 1718 for (cat_index=0;cat_index<255;cat_index++) 1719 strcpy(cat_name[cat_index],"UNKNOWN"); 1720@.UNKNOWN@> 1721 strcpy(cat_name[exp],"exp"); 1722 strcpy(cat_name[unop],"unop"); 1723 strcpy(cat_name[binop],"binop"); 1724 strcpy(cat_name[ubinop],"ubinop"); 1725 strcpy(cat_name[cast],"cast"); 1726 strcpy(cat_name[question],"?"); 1727 strcpy(cat_name[lbrace],"{"@q}@>); 1728 strcpy(cat_name[rbrace],@q{@>"}"); 1729 strcpy(cat_name[decl_head],"decl_head"); 1730 strcpy(cat_name[comma],","); 1731 strcpy(cat_name[lpar],"("); 1732 strcpy(cat_name[rpar],")"); 1733 strcpy(cat_name[prelangle],"<"); 1734 strcpy(cat_name[prerangle],">"); 1735 strcpy(cat_name[langle],"\\<"); 1736 strcpy(cat_name[colcol],"::"); 1737 strcpy(cat_name[base],"\\:"); 1738 strcpy(cat_name[decl],"decl"); 1739 strcpy(cat_name[struct_head],"struct_head"); 1740 strcpy(cat_name[alfop],"alfop"); 1741 strcpy(cat_name[stmt],"stmt"); 1742 strcpy(cat_name[function],"function"); 1743 strcpy(cat_name[fn_decl],"fn_decl"); 1744 strcpy(cat_name[else_like],"else_like"); 1745 strcpy(cat_name[semi],";"); 1746 strcpy(cat_name[colon],":"); 1747 strcpy(cat_name[tag],"tag"); 1748 strcpy(cat_name[if_head],"if_head"); 1749 strcpy(cat_name[else_head],"else_head"); 1750 strcpy(cat_name[if_clause],"if()"); 1751 strcpy(cat_name[lproc],"#{"@q}@>); 1752 strcpy(cat_name[rproc],@q{@>"#}"); 1753 strcpy(cat_name[insert],"insert"); 1754 strcpy(cat_name[section_scrap],"section"); 1755 strcpy(cat_name[dead],"@@d"); 1756 strcpy(cat_name[public_like],"public"); 1757 strcpy(cat_name[operator_like],"operator"); 1758 strcpy(cat_name[new_like],"new"); 1759 strcpy(cat_name[catch_like],"catch"); 1760 strcpy(cat_name[for_like],"for"); 1761 strcpy(cat_name[do_like],"do"); 1762 strcpy(cat_name[if_like],"if"); 1763 strcpy(cat_name[delete_like],"delete"); 1764 strcpy(cat_name[raw_ubin],"ubinop?"); 1765 strcpy(cat_name[const_like],"const"); 1766 strcpy(cat_name[raw_int],"raw"); 1767 strcpy(cat_name[int_like],"int"); 1768 strcpy(cat_name[case_like],"case"); 1769 strcpy(cat_name[sizeof_like],"sizeof"); 1770 strcpy(cat_name[struct_like],"struct"); 1771 strcpy(cat_name[typedef_like],"typedef"); 1772 strcpy(cat_name[define_like],"define"); 1773 strcpy(cat_name[template_like],"template"); 1774 strcpy(cat_name[ftemplate],"ftemplate"); 1775 strcpy(cat_name[new_exp],"new_exp"); 1776 strcpy(cat_name[begin_arg],"@@["@q]@>); 1777 strcpy(cat_name[end_arg],@q[@>"@@]"); 1778 strcpy(cat_name[0],"zero"); 1779 1780@ This code allows \.{CWEAVE} to display its parsing steps. 1781 1782@c 1783void 1784print_cat(c) /* symbolic printout of a category */ 1785eight_bits c; 1786{ 1787 printf(cat_name[c]); 1788} 1789 1790@ The token lists for translated \TEX/ output contain some special control 1791symbols as well as ordinary characters. These control symbols are 1792interpreted by \.{CWEAVE} before they are written to the output file. 1793 1794\yskip\hang |break_space| denotes an optional line break or an en space; 1795 1796\yskip\hang |force| denotes a line break; 1797 1798\yskip\hang |big_force| denotes a line break with additional vertical space; 1799 1800\yskip\hang |preproc_line| denotes that the line will be printed flush left; 1801 1802\yskip\hang |opt| denotes an optional line break (with the continuation 1803line indented two ems with respect to the normal starting position)---this 1804code is followed by an integer |n|, and the break will occur with penalty 1805$10n$; 1806 1807\yskip\hang |backup| denotes a backspace of one em; 1808 1809\yskip\hang |cancel| obliterates any |break_space|, |opt|, |force|, or 1810|big_force| tokens that immediately precede or follow it and also cancels any 1811|backup| tokens that follow it; 1812 1813\yskip\hang |indent| causes future lines to be indented one more em; 1814 1815\yskip\hang |outdent| causes future lines to be indented one less em. 1816 1817\yskip\noindent All of these tokens are removed from the \TEX/ output that 1818comes from \CEE/ text between \pb\ signs; |break_space| and |force| and 1819|big_force| become single spaces in this mode. The translation of other 1820\CEE/ texts results in \TEX/ control sequences \.{\\1}, \.{\\2}, 1821\.{\\3}, \.{\\4}, \.{\\5}, \.{\\6}, \.{\\7}, \.{\\8} 1822corresponding respectively to 1823|indent|, |outdent|, |opt|, |backup|, |break_space|, |force|, 1824|big_force| and |preproc_line|. 1825However, a sequence of consecutive `\.\ ', |break_space|, 1826|force|, and/or |big_force| tokens is first replaced by a single token 1827(the maximum of the given ones). 1828 1829The token |math_rel| will be translated into 1830\.{\\MRL\{}, and it will get a matching \.\} later. 1831Other control sequences in the \TEX/ output will be 1832`\.{\\\\\{}$\,\ldots\,$\.\}' 1833surrounding identifiers, `\.{\\\&\{}$\,\ldots\,$\.\}' surrounding 1834reserved words, `\.{\\.\{}$\,\ldots\,$\.\}' surrounding strings, 1835`\.{\\C\{}$\,\ldots\,$\.\}$\,$|force|' surrounding comments, and 1836`\.{\\X$n$:}$\,\ldots\,$\.{\\X}' surrounding section names, where 1837|n| is the section number. 1838 1839@d math_rel 0206 1840@d big_cancel 0210 /* like |cancel|, also overrides spaces */ 1841@d cancel 0211 /* overrides |backup|, |break_space|, |force|, |big_force| */ 1842@d indent 0212 /* one more tab (\.{\\1}) */ 1843@d outdent 0213 /* one less tab (\.{\\2}) */ 1844@d opt 0214 /* optional break in mid-statement (\.{\\3}) */ 1845@d backup 0215 /* stick out one unit to the left (\.{\\4}) */ 1846@d break_space 0216 /* optional break between statements (\.{\\5}) */ 1847@d force 0217 /* forced break between statements (\.{\\6}) */ 1848@d big_force 0220 /* forced break with additional space (\.{\\7}) */ 1849@d preproc_line 0221 /* begin line without indentation (\.{\\8}) */ 1850@^high-bit character handling@> 1851@d quoted_char 0222 1852 /* introduces a character token in the range |0200|--|0377| */ 1853@d end_translation 0223 /* special sentinel token at end of list */ 1854@d inserted 0224 /* sentinel to mark translations of inserts */ 1855@d qualifier 0225 /* introduces an explicit namespace qualifier */ 1856 1857@ The raw input is converted into scraps according to the following table, 1858which gives category codes followed by the translations. 1859\def\stars {\.{**}}% 1860The symbol `\stars' stands for `\.{\\\&\{{\rm identifier}\}}', 1861i.e., the identifier itself treated as a reserved word. 1862The right-hand column is the so-called |mathness|, which is explained 1863further below. 1864 1865An identifier |c| of length 1 is translated as \.{\\\v c} instead of 1866as \.{\\\\\{c\}}. An identifier \.{CAPS} in all caps is translated as 1867\.{\\.\{CAPS\}} instead of as \.{\\\\\{CAPS\}}. An identifier that has 1868become a reserved word via |typedef| is translated with \.{\\\&} replacing 1869\.{\\\\} and |raw_int| replacing |exp|. 1870 1871A string of length greater than 20 is broken into pieces of size at most~20 1872with discretionary breaks in between. 1873 1874\yskip\halign{\quad#\hfil&\quad#\hfil&\quad\hfil#\hfil\cr 1875\.{!=}&|binop|: \.{\\I}&yes\cr 1876\.{<=}&|binop|: \.{\\Z}&yes\cr 1877\.{>=}&|binop|: \.{\\G}&yes\cr 1878\.{==}&|binop|: \.{\\E}&yes\cr 1879\.{\&\&}&|binop|: \.{\\W}&yes\cr 1880\.{\v\v}&|binop|: \.{\\V}&yes\cr 1881\.{++}&|unop|: \.{\\PP}&yes\cr 1882\.{--}&|unop|: \.{\\MM}&yes\cr 1883\.{->}&|binop|: \.{\\MG}&yes\cr 1884\.{>>}&|binop|: \.{\\GG}&yes\cr 1885\.{<<}&|binop|: \.{\\LL}&yes\cr 1886\.{::}&|colcol|: \.{\\DC}&maybe\cr 1887\.{.*}&|binop|: \.{\\PA}&yes\cr 1888\.{->*}&|binop|: \.{\\MGA}&yes\cr 1889\.{...}&|raw_int|: \.{\\,\\ldots\\,}&yes\cr 1890\."string\."&|exp|: \.{\\.\{}string with special characters quoted\.\}&maybe\cr 1891\.{@@=}string\.{@@>}&|exp|: \.{\\vb\{}string with special characters 1892 quoted\.\}&maybe\cr 1893\.{@@'7'}&|exp|: \.{\\.\{@@'7'\}}&maybe\cr 1894\.{077} or \.{\\77}&|exp|: \.{\\T\{\\\~77\}}&maybe\cr 1895\.{0x7f}&|exp|: \.{\\T\{\\\^7f\}}&maybe\cr 1896\.{77}&|exp|: \.{\\T\{77\}}&maybe\cr 1897\.{77L}&|exp|: \.{\\T\{77\\\$L\}}&maybe\cr 1898\.{0.1E5}&|exp|: \.{\\T\{0.1\\\_5\}}&maybe\cr 1899\.+&|ubinop|: \.+&yes\cr 1900\.-&|ubinop|: \.-&yes\cr 1901\.*&|raw_ubin|: \.*&yes\cr 1902\./&|binop|: \./&yes\cr 1903\.<&|prelangle|: \.{\\langle}&yes\cr 1904\.=&|binop|: \.{\\K}&yes\cr 1905\.>&|prerangle|: \.{\\rangle}&yes\cr 1906\..&|binop|: \..&yes\cr 1907\.{\v}&|binop|: \.{\\OR}&yes\cr 1908\.\^&|binop|: \.{\\XOR}&yes\cr 1909\.\%&|binop|: \.{\\MOD}&yes\cr 1910\.?&|question|: \.{\\?}&yes\cr 1911\.!&|unop|: \.{\\R}&yes\cr 1912\.\~&|unop|: \.{\\CM}&yes\cr 1913\.\&&|raw_ubin|: \.{\\AND}&yes\cr 1914\.(&|lpar|: \.(&maybe\cr 1915\.[&|lpar|: \.[&maybe\cr 1916\.)&|rpar|: \.)&maybe\cr 1917\.]&|rpar|: \.]&maybe\cr 1918\.\{&|lbrace|: \.\{&yes\cr 1919\.\}&|lbrace|: \.\}&yes\cr 1920\.,&|comma|: \.,&yes\cr 1921\.;&|semi|: \.;&maybe\cr 1922\.:&|colon|: \.:&no\cr 1923\.\# (within line)&|ubinop|: \.{\\\#}&yes\cr 1924\.\# (at beginning)&|lproc|: |force| |preproc_line| \.{\\\#}&no\cr 1925end of \.\# line&|rproc|: |force|&no\cr 1926identifier&|exp|: \.{\\\\\{}identifier with underlines and 1927 dollar signs quoted\.\}&maybe\cr 1928\.{and}&|alfop|: \stars&yes\cr 1929\.{and\_eq}&|alfop|: \stars&yes\cr 1930\.{asm}&|sizeof_like|: \stars&maybe\cr 1931\.{auto}&|int_like|: \stars&maybe\cr 1932\.{bitand}&|alfop|: \stars&yes\cr 1933\.{bitor}&|alfop|: \stars&yes\cr 1934\.{bool}&|raw_int|: \stars&maybe\cr 1935\.{break}&|case_like|: \stars&maybe\cr 1936\.{case}&|case_like|: \stars&maybe\cr 1937\.{catch}&|catch_like|: \stars&maybe\cr 1938\.{char}&|raw_int|: \stars&maybe\cr 1939\.{class}&|struct_like|: \stars&maybe\cr 1940\.{clock\_t}&|raw_int|: \stars&maybe\cr 1941\.{compl}&|alfop|: \stars&yes\cr 1942\.{const}&|const_like|: \stars&maybe\cr 1943\.{const\_cast}&|raw_int|: \stars&maybe\cr 1944\.{continue}&|case_like|: \stars&maybe\cr 1945\.{default}&|case_like|: \stars&maybe\cr 1946\.{define}&|define_like|: \stars&maybe\cr 1947\.{defined}&|sizeof_like|: \stars&maybe\cr 1948\.{delete}&|delete_like|: \stars&maybe\cr 1949\.{div\_t}&|raw_int|: \stars&maybe\cr 1950\.{do}&|do_like|: \stars&maybe\cr 1951\.{double}&|raw_int|: \stars&maybe\cr 1952\.{dynamic\_cast}&|raw_int|: \stars&maybe\cr 1953\.{elif}&|if_like|: \stars&maybe\cr 1954\.{else}&|else_like|: \stars&maybe\cr 1955\.{endif}&|if_like|: \stars&maybe\cr 1956\.{enum}&|struct_like|: \stars&maybe\cr 1957\.{error}&|if_like|: \stars&maybe\cr 1958\.{explicit}&|int_like|: \stars&maybe\cr 1959\.{export}&|int_like|: \stars&maybe\cr 1960\.{extern}&|int_like|: \stars&maybe\cr 1961\.{FILE}&|raw_int|: \stars&maybe\cr 1962\.{float}&|raw_int|: \stars&maybe\cr 1963\.{for}&|for_like|: \stars&maybe\cr 1964\.{fpos\_t}&|raw_int|: \stars&maybe\cr 1965\.{friend}&|int_like|: \stars&maybe\cr 1966\.{goto}&|case_like|: \stars&maybe\cr 1967\.{if}&|if_like|: \stars&maybe\cr 1968\.{ifdef}&|if_like|: \stars&maybe\cr 1969\.{ifndef}&|if_like|: \stars&maybe\cr 1970\.{include}&|if_like|: \stars&maybe\cr 1971\.{inline}&|int_like|: \stars&maybe\cr 1972\.{int}&|raw_int|: \stars&maybe\cr 1973\.{jmp\_buf}&|raw_int|: \stars&maybe\cr 1974\.{ldiv\_t}&|raw_int|: \stars&maybe\cr 1975\.{line}&|if_like|: \stars&maybe\cr 1976\.{long}&|raw_int|: \stars&maybe\cr 1977\.{make\_pair}&|ftemplate|: \.{\\\\\{make\\\_pair\}}&maybe\cr 1978\.{mutable}&|int_like|: \stars&maybe\cr 1979\.{namespace}&|struct_like|: \stars&maybe\cr 1980\.{new}&|new_like|: \stars&maybe\cr 1981\.{not}&|alfop|: \stars&yes\cr 1982\.{not\_eq}&|alfop|: \stars&yes\cr 1983\.{NULL}&|exp|: \.{\\NULL}&yes\cr 1984\.{offsetof}&|raw_int|: \stars&maybe\cr 1985\.{operator}&|operator_like|: \stars&maybe\cr 1986\.{or}&|alfop|: \stars&yes\cr 1987\.{or\_eq}&|alfop|: \stars&yes\cr 1988\.{pragma}&|if_like|: \stars&maybe\cr 1989\.{private}&|public_like|: \stars&maybe\cr 1990\.{protected}&|public_like|: \stars&maybe\cr 1991\.{ptrdiff\_t}&|raw_int|: \stars&maybe\cr 1992\.{public}&|public_like|: \stars&maybe\cr 1993\.{register}&|int_like|: \stars&maybe\cr 1994\.{reinterpret\_cast}&|raw_int|: \stars&maybe\cr 1995\.{return}&|case_like|: \stars&maybe\cr 1996\.{short}&|raw_int|: \stars&maybe\cr 1997\.{sig\_atomic\_t}&|raw_int|: \stars&maybe\cr 1998\.{signed}&|raw_int|: \stars&maybe\cr 1999\.{size\_t}&|raw_int|: \stars&maybe\cr 2000\.{sizeof}&|sizeof_like|: \stars&maybe\cr 2001\.{static}&|int_like|: \stars&maybe\cr 2002\.{static\_cast}&|raw_int|: \stars&maybe\cr 2003\.{struct}&|struct_like|: \stars&maybe\cr 2004\.{switch}&|for_like|: \stars&maybe\cr 2005\.{template}&|template_like|: \stars&maybe\cr 2006\.{TeX}&|exp|: \.{\\TeX}&yes\cr 2007\.{this}&|exp|: \.{\\this}&yes\cr 2008\.{throw}&|case_like|: \stars&maybe\cr 2009\.{time\_t}&|raw_int|: \stars&maybe\cr 2010\.{try}&|else_like|: \stars&maybe\cr 2011\.{typedef}&|typedef_like|: \stars&maybe\cr 2012\.{typeid}&|raw_int|: \stars&maybe\cr 2013\.{typename}&|struct_like|: \stars&maybe\cr 2014\.{undef}&|if_like|: \stars&maybe\cr 2015\.{union}&|struct_like|: \stars&maybe\cr 2016\.{unsigned}&|raw_int|: \stars&maybe\cr 2017\.{using}&|int_like|: \stars&maybe\cr 2018\.{va\_dcl}&|decl|: \stars&maybe\cr 2019\.{va\_list}&|raw_int|: \stars&maybe\cr 2020\.{virtual}&|int_like|: \stars&maybe\cr 2021\.{void}&|raw_int|: \stars&maybe\cr 2022\.{volatile}&|const_like|: \stars&maybe\cr 2023\.{wchar\_t}&|raw_int|: \stars&maybe\cr 2024\.{while}&|for_like|: \stars&maybe\cr 2025\.{xor}&|alfop|: \stars&yes\cr 2026\.{xor\_eq}&|alfop|: \stars&yes\cr 2027\.{@@,}&|insert|: \.{\\,}&maybe\cr 2028\.{@@\v}&|insert|: |opt| \.0&maybe\cr 2029\.{@@/}&|insert|: |force|&no\cr 2030\.{@@\#}&|insert|: |big_force|&no\cr 2031\.{@@+}&|insert|: |big_cancel| \.{\{\}} |break_space| 2032 \.{\{\}} |big_cancel|&no\cr 2033\.{@@;}&|semi|: &maybe\cr 2034\.{@@[@q]@>}&|begin_arg|: &maybe\cr 2035\.{@q[@>@@]}&|end_arg|: &maybe\cr 2036\.{@@\&}&|insert|: \.{\\J}&maybe\cr 2037\.{@@h}&|insert|: |force| \.{\\ATH} |force|&no\cr 2038\.{@@<}\thinspace section name\thinspace\.{@@>}&|section_scrap|: 2039 \.{\\X}$n$\.:translated section name\.{\\X}&maybe\cr 2040\.{@@(@q)@>}\thinspace section name\thinspace\.{@@>}&|section_scrap|: 2041 \.{\\X}$n$\.{:\\.\{}section name with special characters 2042 quoted\.{\ \}\\X}&maybe\cr 2043\.{/*}comment\.{*/}&|insert|: |cancel| 2044 \.{\\C\{}translated comment\.\} |force|&no\cr 2045\.{//}comment&|insert|: |cancel| 2046 \.{\\SHC\{}translated comment\.\} |force|&no\cr 2047} 2048 2049\smallskip 2050The construction \.{@@t}\thinspace stuff\/\thinspace\.{@@>} contributes 2051\.{\\hbox\{}\thinspace stuff\/\thinspace\.\} to the following scrap. 2052 2053@i prod.w 2054 2055@* Implementing the productions. 2056More specifically, a scrap is a structure consisting of a category 2057|cat| and a |text_pointer| |trans|, which points to the translation in 2058|tok_start|. When \CEE/ text is to be processed with the grammar above, 2059we form an array |scrap_info| containing the initial scraps. 2060Our production rules have the nice property that the right-hand side is never 2061longer than the left-hand side. Therefore it is convenient to use sequential 2062allocation for the current sequence of scraps. Five pointers are used to 2063manage the parsing: 2064 2065\yskip\hang |pp| is a pointer into |scrap_info|. We will try to match 2066the category codes |pp->cat,@,@,(pp+1)->cat|$,\,\,\ldots\,$ 2067to the left-hand sides of productions. 2068 2069\yskip\hang |scrap_base|, |lo_ptr|, |hi_ptr|, and |scrap_ptr| are such that 2070the current sequence of scraps appears in positions |scrap_base| through 2071|lo_ptr| and |hi_ptr| through |scrap_ptr|, inclusive, in the |cat| and 2072|trans| arrays. Scraps located between |scrap_base| and |lo_ptr| have 2073been examined, while those in positions |>=hi_ptr| have not yet been 2074looked at by the parsing process. 2075 2076\yskip\noindent Initially |scrap_ptr| is set to the position of the final 2077scrap to be parsed, and it doesn't change its value. The parsing process 2078makes sure that |lo_ptr>=pp+3|, since productions have as many as four terms, 2079by moving scraps from |hi_ptr| to |lo_ptr|. If there are 2080fewer than |pp+3| scraps left, the positions up to |pp+3| are filled with 2081blanks that will not match in any productions. Parsing stops when 2082|pp==lo_ptr+1| and |hi_ptr==scrap_ptr+1|. 2083 2084Since the |scrap| structure will later be used for other purposes, we 2085declare its second element as a union. 2086 2087@<Type...@>= 2088typedef struct { 2089 eight_bits cat; 2090 eight_bits mathness; 2091 union { 2092 text_pointer Trans; 2093 @<Rest of |trans_plus| union@>@; 2094 } trans_plus; 2095} scrap; 2096typedef scrap *scrap_pointer; 2097 2098@ @d trans trans_plus.Trans /* translation texts of scraps */ 2099 2100@<Global...@>= 2101scrap scrap_info[max_scraps]; /* memory array for scraps */ 2102scrap_pointer scrap_info_end=scrap_info+max_scraps -1; /* end of |scrap_info| */ 2103scrap_pointer pp; /* current position for reducing productions */ 2104scrap_pointer scrap_base; /* beginning of the current scrap sequence */ 2105scrap_pointer scrap_ptr; /* ending of the current scrap sequence */ 2106scrap_pointer lo_ptr; /* last scrap that has been examined */ 2107scrap_pointer hi_ptr; /* first scrap that has not been examined */ 2108scrap_pointer max_scr_ptr; /* largest value assumed by |scrap_ptr| */ 2109 2110@ @<Set init...@>= 2111scrap_base=scrap_info+1; 2112max_scr_ptr=scrap_ptr=scrap_info; 2113 2114@ Token lists in |@!tok_mem| are composed of the following kinds of 2115items for \TEX/ output. 2116 2117\yskip\item{$\bullet$}Character codes and special codes like |force| and 2118|math_rel| represent themselves; 2119 2120\item{$\bullet$}|id_flag+p| represents \.{\\\\\{{\rm identifier $p$}\}}; 2121 2122\item{$\bullet$}|res_flag+p| represents \.{\\\&\{{\rm identifier $p$}\}}; 2123 2124\item{$\bullet$}|section_flag+p| represents section name |p|; 2125 2126\item{$\bullet$}|tok_flag+p| represents token list number |p|; 2127 2128\item{$\bullet$}|inner_tok_flag+p| represents token list number |p|, to be 2129translated without line-break controls. 2130 2131@d id_flag 10240 /* signifies an identifier */ 2132@d res_flag 2*id_flag /* signifies a reserved word */ 2133@d section_flag 3*id_flag /* signifies a section name */ 2134@d tok_flag 4*id_flag /* signifies a token list */ 2135@d inner_tok_flag 5*id_flag /* signifies a token list in `\pb' */ 2136 2137@c 2138void 2139print_text(p) /* prints a token list for debugging; not used in |main| */ 2140text_pointer p; 2141{ 2142 token_pointer j; /* index into |tok_mem| */ 2143 sixteen_bits r; /* remainder of token after the flag has been stripped off */ 2144 if (p>=text_ptr) printf("BAD"); 2145 else for (j=*p; j<*(p+1); j++) { 2146 r=*j%id_flag; 2147 switch (*j/id_flag) { 2148 case 1: printf("\\\\{"@q}@>); print_id((name_dir+r)); printf(@q{@>"}"); 2149 break; /* |id_flag| */ 2150 case 2: printf("\\&{"@q}@>); print_id((name_dir+r)); printf(@q{@>"}"); 2151 break; /* |res_flag| */ 2152 case 3: printf("<"); print_section_name((name_dir+r)); printf(">"); 2153 break; /* |section_flag| */ 2154 case 4: printf("[[%d]]",r); break; /* |tok_flag| */ 2155 case 5: printf("|[[%d]]|",r); break; /* |inner_tok_flag| */ 2156 default: @<Print token |r| in symbolic form@>; 2157 } 2158 } 2159 fflush(stdout); 2160} 2161 2162@ @<Print token |r|...@>= 2163switch (r) { 2164 case math_rel: printf("\\mathrel{"@q}@>); break; 2165 case big_cancel: printf("[ccancel]"); break; 2166 case cancel: printf("[cancel]"); break; 2167 case indent: printf("[indent]"); break; 2168 case outdent: printf("[outdent]"); break; 2169 case backup: printf("[backup]"); break; 2170 case opt: printf("[opt]"); break; 2171 case break_space: printf("[break]"); break; 2172 case force: printf("[force]"); break; 2173 case big_force: printf("[fforce]"); break; 2174 case preproc_line: printf("[preproc]"); break; 2175 case quoted_char: j++; printf("[%o]",(unsigned)*j); break; 2176 case end_translation: printf("[quit]"); break; 2177 case inserted: printf("[inserted]"); break; 2178 default: putxchar(r); 2179} 2180 2181@ The production rules listed above are embedded directly into \.{CWEAVE}, 2182since it is easier to do this than to write an interpretive system 2183that would handle production systems in general. Several macros are defined 2184here so that the program for each production is fairly short. 2185 2186All of our productions conform to the general notion that some |k| 2187consecutive scraps starting at some position |j| are to be replaced by a 2188single scrap of some category |c| whose translation is composed from the 2189translations of the disappearing scraps. After this production has been 2190applied, the production pointer |pp| should change by an amount |d|. Such 2191a production can be represented by the quadruple |(j,k,c,d)|. For example, 2192the production `|exp@,comma@,exp| $\RA$ |exp|' would be represented by 2193`|(pp,3,exp,-2)|'; in this case the pointer |pp| should decrease by 2 2194after the production has been applied, because some productions with 2195|exp| in their second or third positions might now match, 2196but no productions have 2197|exp| in the fourth position of their left-hand sides. Note that 2198the value of |d| is determined by the whole collection of productions, not 2199by an individual one. 2200The determination of |d| has been 2201done by hand in each case, based on the full set of productions but not on 2202the grammar of \CEE/ or on the rules for constructing the initial 2203scraps. 2204 2205We also attach a serial number to each production, so that additional 2206information is available when debugging. For example, the program below 2207contains the statement `|reduce(pp,3,exp,-2,4)|' when it implements 2208the production just mentioned. 2209 2210Before calling |reduce|, the program should have appended the tokens of 2211the new translation to the |tok_mem| array. We commonly want to append 2212copies of several existing translations, and macros are defined to 2213simplify these common cases. For example, \\{app2}|(pp)| will append the 2214translations of two consecutive scraps, |pp->trans| and |(pp+1)->trans|, to 2215the current token list. If the entire new translation is formed in this 2216way, we write `|squash(j,k,c,d,n)|' instead of `|reduce(j,k,c,d,n)|'. For 2217example, `|squash(pp,3,exp,-2,3)|' is an abbreviation for `\\{app3}|(pp); 2218reduce(pp,3,exp,-2,3)|'. 2219 2220A couple more words of explanation: 2221Both |big_app| and |app| append a token (while |big_app1| to |big_app4| 2222append the specified number of scrap translations) to the current token list. 2223The difference between |big_app| and |app| is simply that |big_app| 2224checks whether there can be a conflict between math and non-math 2225tokens, and intercalates a `\.{\$}' token if necessary. When in 2226doubt what to use, use |big_app|. 2227 2228The |mathness| is an attribute of scraps that says whether they are 2229to be printed in a math mode context or not. It is separate from the 2230``part of speech'' (the |cat|) because to make each |cat| have 2231a fixed |mathness| (as in the original \.{WEAVE}) would multiply the 2232number of necessary production rules. 2233 2234The low two bits (i.e. |mathness % 4|) control the left boundary. 2235(We need two bits because we allow cases |yes_math|, |no_math| and 2236|maybe_math|, which can go either way.) 2237The next two bits (i.e. |mathness / 4|) control the right boundary. 2238If we combine two scraps and the right boundary of the first has 2239a different mathness from the left boundary of the second, we 2240insert a \.{\$} in between. Similarly, if at printing time some 2241irreducible scrap has a |yes_math| boundary the scrap gets preceded 2242or followed by a \.{\$}. The left boundary is |maybe_math| if and 2243only if the right boundary is. 2244 2245The code below is an exact translation of the production rules into 2246\CEE/, using such macros, and the reader should have no difficulty 2247understanding the format by comparing the code with the symbolic 2248productions as they were listed earlier. 2249 2250@d no_math 2 /* should be in horizontal mode */ 2251@d yes_math 1 /* should be in math mode */ 2252@d maybe_math 0 /* works in either horizontal or math mode */ 2253@d big_app2(a) big_app1(a);big_app1(a+1) 2254@d big_app3(a) big_app2(a);big_app1(a+2) 2255@d big_app4(a) big_app3(a);big_app1(a+3) 2256@d app(a) *(tok_ptr++)=a 2257@d app1(a) *(tok_ptr++)=tok_flag+(int)((a)->trans-tok_start) 2258 2259@<Global...@>= 2260int cur_mathness, init_mathness; 2261 2262@ @c 2263void 2264app_str(s) 2265char *s; 2266{ 2267 while (*s) app_tok(*(s++)); 2268} 2269 2270void 2271big_app(a) 2272token a; 2273{ 2274 if (a==' ' || (a>=big_cancel && a<=big_force)) /* non-math token */ { 2275 if (cur_mathness==maybe_math) init_mathness=no_math; 2276 else if (cur_mathness==yes_math) app_str("{}$"); 2277 cur_mathness=no_math; 2278 } 2279 else { 2280 if (cur_mathness==maybe_math) init_mathness=yes_math; 2281 else if (cur_mathness==no_math) app_str("${}"); 2282 cur_mathness=yes_math; 2283 } 2284 app(a); 2285} 2286 2287void 2288big_app1(a) 2289scrap_pointer a; 2290{ 2291 switch (a->mathness % 4) { /* left boundary */ 2292 case (no_math): 2293 if (cur_mathness==maybe_math) init_mathness=no_math; 2294 else if (cur_mathness==yes_math) app_str("{}$"); 2295 cur_mathness=a->mathness / 4; /* right boundary */ 2296 break; 2297 case (yes_math): 2298 if (cur_mathness==maybe_math) init_mathness=yes_math; 2299 else if (cur_mathness==no_math) app_str("${}"); 2300 cur_mathness=a->mathness / 4; /* right boundary */ 2301 break; 2302 case (maybe_math): /* no changes */ break; 2303 } 2304 app(tok_flag+(int)((a)->trans-tok_start)); 2305} 2306 2307@ Let us consider the big switch for productions now, before looking 2308at its context. We want to design the program so that this switch 2309works, so we might as well not keep ourselves in suspense about exactly what 2310code needs to be provided with a proper environment. 2311 2312@d cat1 (pp+1)->cat 2313@d cat2 (pp+2)->cat 2314@d cat3 (pp+3)->cat 2315@d lhs_not_simple (pp->cat!=public_like 2316 && pp->cat!=semi 2317 && pp->cat!=prelangle 2318 && pp->cat!=prerangle 2319 && pp->cat!=template_like 2320 && pp->cat!=new_like 2321 && pp->cat!=new_exp 2322 && pp->cat!=ftemplate 2323 && pp->cat!=raw_ubin 2324 && pp->cat!=const_like 2325 && pp->cat!=raw_int 2326 && pp->cat!=operator_like) 2327 /* not a production with left side length 1 */ 2328 2329@<Match a production at |pp|, or increase |pp| if there is no match@>= { 2330 if (cat1==end_arg && lhs_not_simple) 2331 if (pp->cat==begin_arg) squash(pp,2,exp,-2,124); 2332 else squash(pp,2,end_arg,-1,125); 2333 else if (cat1==insert) squash(pp,2,pp->cat,-2,0); 2334 else if (cat2==insert) squash(pp+1,2,(pp+1)->cat,-1,0); 2335 else if (cat3==insert) squash(pp+2,2,(pp+2)->cat,0,0); 2336 else 2337 switch (pp->cat) { 2338 case exp: @<Cases for |exp|@>; @+break; 2339 case lpar: @<Cases for |lpar|@>; @+break; 2340 case unop: @<Cases for |unop|@>; @+break; 2341 case ubinop: @<Cases for |ubinop|@>; @+break; 2342 case binop: @<Cases for |binop|@>; @+break; 2343 case cast: @<Cases for |cast|@>; @+break; 2344 case sizeof_like: @<Cases for |sizeof_like|@>; @+break; 2345 case int_like: @<Cases for |int_like|@>; @+break; 2346 case public_like: @<Cases for |public_like|@>; @+break; 2347 case colcol: @<Cases for |colcol|@>; @+break; 2348 case decl_head: @<Cases for |decl_head|@>; @+break; 2349 case decl: @<Cases for |decl|@>; @+break; 2350 case base: @<Cases for |base|@>; @+break; 2351 case struct_like: @<Cases for |struct_like|@>; @+break; 2352 case struct_head: @<Cases for |struct_head|@>; @+break; 2353 case fn_decl: @<Cases for |fn_decl|@>; @+break; 2354 case function: @<Cases for |function|@>; @+break; 2355 case lbrace: @<Cases for |lbrace|@>; @+break; 2356 case if_like: @<Cases for |if_like|@>; @+break; 2357 case else_like: @<Cases for |else_like|@>; @+break; 2358 case else_head: @<Cases for |else_head|@>; @+break; 2359 case if_clause: @<Cases for |if_clause|@>; @+break; 2360 case if_head: @<Cases for |if_head|@>; @+break; 2361 case do_like: @<Cases for |do_like|@>; @+break; 2362 case case_like: @<Cases for |case_like|@>; @+break; 2363 case catch_like: @<Cases for |catch_like|@>; @+break; 2364 case tag: @<Cases for |tag|@>; @+break; 2365 case stmt: @<Cases for |stmt|@>; @+break; 2366 case semi: @<Cases for |semi|@>; @+break; 2367 case lproc: @<Cases for |lproc|@>; @+break; 2368 case section_scrap: @<Cases for |section_scrap|@>; @+break; 2369 case insert: @<Cases for |insert|@>; @+break; 2370 case prelangle: @<Cases for |prelangle|@>; @+break; 2371 case prerangle: @<Cases for |prerangle|@>; @+break; 2372 case langle: @<Cases for |langle|@>; @+break; 2373 case template_like: @<Cases for |template_like|@>; @+break; 2374 case new_like: @<Cases for |new_like|@>; @+break; 2375 case new_exp: @<Cases for |new_exp|@>; @+break; 2376 case ftemplate: @<Cases for |ftemplate|@>; @+break; 2377 case for_like: @<Cases for |for_like|@>; @+break; 2378 case raw_ubin: @<Cases for |raw_ubin|@>; @+break; 2379 case const_like: @<Cases for |const_like|@>; @+break; 2380 case raw_int: @<Cases for |raw_int|@>; @+break; 2381 case operator_like: @<Cases for |operator_like|@>; @+break; 2382 case typedef_like: @<Cases for |typedef_like|@>; @+break; 2383 case delete_like: @<Cases for |delete_like|@>; @+break; 2384 case question: @<Cases for |question|@>; @+break; 2385 } 2386 pp++; /* if no match was found, we move to the right */ 2387} 2388 2389@ In \CEE/, new specifier names can be defined via |typedef|, and we want 2390to make the parser recognize future occurrences of the identifier thus 2391defined as specifiers. This is done by the procedure |make_reserved|, 2392which changes the |ilk| of the relevant identifier. 2393 2394We first need a procedure to recursively seek the first 2395identifier in a token list, because the identifier might 2396be enclosed in parentheses, as when one defines a function 2397returning a pointer. 2398 2399If the first identifier found is a keyword like `\&{case}', we 2400return the special value |case_found|; this prevents underlining 2401of identifiers in case labels. 2402 2403If the first identifier is the keyword `\&{operator}', we give up; 2404users who want to index definitions of overloaded \CPLUSPLUS/ operators 2405should say, for example, `\.{@@!@@\^\\\&\{operator\} \$+\{=\}\$@@>}' (or, 2406more properly alphebetized, 2407`\.{@@!@@:operator+=\}\{\\\&\{operator\} \$+\{=\}\$@@>}'). 2408 2409@d no_ident_found (token_pointer)0 /* distinct from any identifier token */ 2410@d case_found (token_pointer)1 /* likewise */ 2411@d operator_found (token_pointer)2 /* likewise */ 2412 2413@c 2414token_pointer 2415find_first_ident(p) 2416text_pointer p; 2417{ 2418 token_pointer q; /* token to be returned */ 2419 token_pointer j; /* token being looked at */ 2420 sixteen_bits r; /* remainder of token after the flag has been stripped off */ 2421 if (p>=text_ptr) confusion("find_first_ident"); 2422 for (j=*p; j<*(p+1); j++) { 2423 r=*j%id_flag; 2424 switch (*j/id_flag) { 2425 case 2: /* |res_flag| */ 2426 if (name_dir[r].ilk==case_like) return case_found; 2427 if (name_dir[r].ilk==operator_like) return operator_found; 2428 if (name_dir[r].ilk!=raw_int) break; 2429 case 1: return j; 2430 case 4: case 5: /* |tok_flag| or |inner_tok_flag| */ 2431 if ((q=find_first_ident(tok_start+r))!=no_ident_found) 2432 return q; 2433 default: ; /* char, |section_flag|, fall thru: move on to next token */ 2434 if (*j==inserted) return no_ident_found; /* ignore inserts */ 2435 else if (*j==qualifier) j++; /* bypass namespace qualifier */ 2436 } 2437 } 2438 return no_ident_found; 2439} 2440 2441@ The scraps currently being parsed must be inspected for any 2442occurrence of the identifier that we're making reserved; hence 2443the |for| loop below. 2444 2445@c 2446void 2447make_reserved(p) /* make the first identifier in |p->trans| like |int| */ 2448scrap_pointer p; 2449{ 2450 sixteen_bits tok_value; /* the name of this identifier, plus its flag*/ 2451 token_pointer tok_loc; /* pointer to |tok_value| */ 2452 if ((tok_loc=find_first_ident(p->trans))<=operator_found) 2453 return; /* this should not happen */ 2454 tok_value=*tok_loc; 2455 for (;p<=scrap_ptr; p==lo_ptr? p=hi_ptr: p++) { 2456 if (p->cat==exp) { 2457 if (**(p->trans)==tok_value) { 2458 p->cat=raw_int; 2459 **(p->trans)=tok_value%id_flag+res_flag; 2460 } 2461 } 2462 } 2463 (name_dir+(sixteen_bits)(tok_value%id_flag))->ilk=raw_int; 2464 *tok_loc=tok_value%id_flag+res_flag; 2465} 2466 2467@ In the following situations we want to mark the occurrence of 2468an identifier as a definition: when |make_reserved| is just about to be 2469used; after a specifier, as in |char **argv|; 2470before a colon, as in \\{found}:; and in the declaration of a function, 2471as in \\{main}()$\{\ldots;\}$. This is accomplished by the invocation 2472of |make_underlined| at appropriate times. Notice that, in the declaration 2473of a function, we find out that the identifier is being defined only after 2474it has been swallowed up by an |exp|. 2475 2476@c 2477void 2478make_underlined(p) 2479/* underline the entry for the first identifier in |p->trans| */ 2480scrap_pointer p; 2481{ 2482 token_pointer tok_loc; /* where the first identifier appears */ 2483 if ((tok_loc=find_first_ident(p->trans))<=operator_found) 2484 return; /* this happens, for example, in |case found:| */ 2485 xref_switch=def_flag; 2486 underline_xref(*tok_loc%id_flag+name_dir); 2487} 2488 2489@ We cannot use |new_xref| to underline a cross-reference at this point 2490because this would just make a new cross-reference at the end of the list. 2491We actually have to search through the list for the existing 2492cross-reference. 2493 2494@<Predecl...@>= 2495void underline_xref(); 2496 2497@ @c 2498void 2499underline_xref(p) 2500name_pointer p; 2501{ 2502 xref_pointer q=(xref_pointer)p->xref; /* pointer to cross-reference being examined */ 2503 xref_pointer r; /* temporary pointer for permuting cross-references */ 2504 sixteen_bits m; /* cross-reference value to be installed */ 2505 sixteen_bits n; /* cross-reference value being examined */ 2506 if (no_xref) return; 2507 m=section_count+xref_switch; 2508 while (q != xmem) { 2509 n=q->num; 2510 if (n==m) return; 2511 else if (m==n+def_flag) { 2512 q->num=m; return; 2513 } 2514 else if (n>=def_flag && n<m) break; 2515 q=q->xlink; 2516 } 2517 @<Insert new cross-reference at |q|, not at beginning of list@>; 2518} 2519 2520@ We get to this section only when the identifier is one letter long, 2521so it didn't get a non-underlined entry during phase one. But it may 2522have got some explicitly underlined entries in later sections, so in order 2523to preserve the numerical order of the entries in the index, we have 2524to insert the new cross-reference not at the beginning of the list 2525(namely, at |p->xref|), but rather right before |q|. 2526 2527@<Insert new cross-reference at |q|...@>= 2528 append_xref(0); /* this number doesn't matter */ 2529 xref_ptr->xlink=(xref_pointer)p->xref; r=xref_ptr; 2530 p->xref=(char*)xref_ptr; 2531 while (r->xlink!=q) {r->num=r->xlink->num; r=r->xlink;} 2532 r->num=m; /* everything from |q| on is left undisturbed */ 2533 2534@ Now comes the code that tries to match each production starting 2535with a particular type of scrap. Whenever a match is discovered, 2536the |squash| or |reduce| macro will cause the appropriate action 2537to be performed, followed by |goto found|. 2538 2539@<Cases for |exp|@>= 2540if (cat1==lbrace || cat1==int_like || cat1==decl) { 2541 make_underlined(pp); big_app1(pp); big_app(indent); app(indent); 2542 reduce(pp,1,fn_decl,0,1); 2543} 2544else if (cat1==unop) squash(pp,2,exp,-2,2); 2545else if ((cat1==binop || cat1==ubinop) && cat2==exp) 2546 squash(pp,3,exp,-2,3); 2547else if (cat1==comma && cat2==exp) { 2548 big_app2(pp); 2549 app(opt); app('9'); big_app1(pp+2); reduce(pp,3,exp,-2,4); 2550} 2551else if (cat1==lpar && cat2==rpar && cat3==colon) squash(pp+3,1,base,0,5); 2552else if (cat1==cast && cat2==colon) squash(pp+2,1,base,0,5); 2553else if (cat1==semi) squash(pp,2,stmt,-1,6); 2554else if (cat1==colon) { 2555 make_underlined (pp); squash(pp,2,tag,-1,7); 2556} 2557else if (cat1==rbrace) squash(pp,1,stmt,-1,8); 2558else if (cat1==lpar && cat2==rpar && (cat3==const_like || cat3==case_like)) { 2559 big_app1(pp+2); big_app(' '); big_app1(pp+3); reduce(pp+2,2,rpar,0,9); 2560} 2561else if (cat1==cast && (cat2==const_like || cat2==case_like)) { 2562 big_app1(pp+1); big_app(' '); big_app1(pp+2); reduce(pp+1,2,cast,0,9); 2563} 2564else if (cat1==exp || cat1==cast) squash(pp,2,exp,-2,10); 2565 2566@ @<Cases for |lpar|@>= 2567if ((cat1==exp||cat1==ubinop) && cat2==rpar) squash(pp,3,exp,-2,11); 2568else if (cat1==rpar) { 2569 big_app1(pp); app('\\'); app(','); big_app1(pp+1); 2570@.\\,@> 2571 reduce(pp,2,exp,-2,12); 2572} 2573else if ((cat1==decl_head || cat1==int_like || cat1==cast) && cat2==rpar) 2574 squash(pp,3,cast,-2,13); 2575else if ((cat1==decl_head || cat1==int_like || cat1==exp) && cat2==comma) { 2576 big_app3(pp); app(opt); app('9'); reduce(pp,3,lpar,-1,14); 2577} 2578else if (cat1==stmt || cat1==decl) { 2579 big_app2(pp); big_app(' '); reduce(pp,2,lpar,-1,15); 2580} 2581 2582@ @<Cases for |unop|@>= 2583if (cat1==exp || cat1==int_like) squash(pp,2,exp,-2,16); 2584 2585@ @<Cases for |ubinop|@>= 2586if (cat1==cast && cat2==rpar) { 2587 big_app('{'); big_app1(pp); big_app('}'); big_app1(pp+1); 2588 reduce(pp,2,cast,-2,17); 2589} 2590else if (cat1==exp || cat1==int_like) { 2591 big_app('{'); big_app1(pp); big_app('}'); big_app1(pp+1); 2592 reduce(pp,2,cat1,-2,18); 2593} 2594else if (cat1==binop) { 2595 big_app(math_rel); big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}'); 2596 big_app('}'); reduce(pp,2,binop,-1,19); 2597} 2598 2599@ @<Cases for |binop|@>= 2600if (cat1==binop) { 2601 big_app(math_rel); big_app('{'); big_app1(pp); big_app('}'); 2602 big_app('{'); big_app1(pp+1); big_app('}'); 2603 big_app('}'); reduce(pp,2,binop,-1,20); 2604} 2605 2606@ @<Cases for |cast|@>= 2607if (cat1==lpar) squash(pp,2,lpar,-1,21); 2608else if (cat1==exp) { 2609 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,21); 2610} 2611else if (cat1==semi) squash(pp,1,exp,-2,22); 2612 2613@ @<Cases for |sizeof_like|@>= 2614if (cat1==cast) squash(pp,2,exp,-2,23); 2615else if (cat1==exp) { 2616 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,24); 2617} 2618 2619@ @<Cases for |int_like|@>= 2620if (cat1==int_like|| cat1==struct_like) { 2621 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,cat1,-2,25); 2622} 2623else if (cat1==exp && (cat2==raw_int||cat2==struct_like)) 2624 squash(pp,2,int_like,-2,26); 2625else if (cat1==exp || cat1==ubinop || cat1==colon) { 2626 big_app1(pp); big_app(' '); reduce(pp,1,decl_head,-1,27); 2627} 2628else if (cat1==semi || cat1==binop) squash(pp,1,decl_head,0,28); 2629 2630@ @<Cases for |public_like|@>= 2631if (cat1==colon) squash(pp,2,tag,-1,29); 2632else squash(pp,1,int_like,-2,30); 2633 2634@ @<Cases for |colcol|@>= 2635if (cat1==exp||cat1==int_like) { 2636 app(qualifier); squash(pp,2,cat1,-2,31); 2637}@+else if (cat1==colcol) squash(pp,2,colcol,-1,32); 2638 2639@ @<Cases for |decl_head|@>= 2640if (cat1==comma) { 2641 big_app2(pp); big_app(' '); reduce(pp,2,decl_head,-1,33); 2642} 2643else if (cat1==ubinop) { 2644 big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}'); 2645 reduce(pp,2,decl_head,-1,34); 2646} 2647else if (cat1==exp && cat2!=lpar && cat2!=exp && cat2!=cast) { 2648 make_underlined(pp+1); squash(pp,2,decl_head,-1,35); 2649} 2650else if ((cat1==binop||cat1==colon) && cat2==exp && (cat3==comma || 2651 cat3==semi || cat3==rpar)) 2652 squash(pp,3,decl_head,-1,36); 2653else if (cat1==cast) squash(pp,2,decl_head,-1,37); 2654else if (cat1==lbrace || cat1==int_like || cat1==decl) { 2655 big_app1(pp); big_app(indent); app(indent); reduce(pp,1,fn_decl,0,38); 2656} 2657else if (cat1==semi) squash(pp,2,decl,-1,39); 2658 2659@ @<Cases for |decl|@>= 2660if (cat1==decl) { 2661 big_app1(pp); big_app(force); big_app1(pp+1); 2662 reduce(pp,2,decl,-1,40); 2663} 2664else if (cat1==stmt || cat1==function) { 2665 big_app1(pp); big_app(big_force); 2666 big_app1(pp+1); reduce(pp,2,cat1,-1,41); 2667} 2668 2669@ @<Cases for |base|@>= 2670if (cat1==int_like || cat1==exp) { 2671 if (cat2==comma) { 2672 big_app1(pp); big_app(' '); big_app2(pp+1); 2673 app(opt); app('9'); reduce(pp,3,base,0,42); 2674 } 2675 else if (cat2==lbrace) { 2676 big_app1(pp); big_app(' '); big_app1(pp+1); big_app(' '); big_app1(pp+2); 2677 reduce(pp,3,lbrace,-2,43); 2678 } 2679} 2680 2681@ @<Cases for |struct_like|@>= 2682if (cat1==lbrace) { 2683 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,struct_head,0,44); 2684} 2685else if (cat1==exp||cat1==int_like) { 2686 if (cat2==lbrace || cat2==semi) { 2687 make_underlined(pp+1); make_reserved(pp+1); 2688 big_app1(pp); big_app(' '); big_app1(pp+1); 2689 if (cat2==semi) reduce(pp,2,decl_head,0,45); 2690 else { 2691 big_app(' '); big_app1(pp+2);reduce(pp,3,struct_head,0,46); 2692 } 2693 } 2694 else if (cat2==colon) squash(pp+2,1,base,2,47); 2695 else if (cat2!=base) { 2696 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,int_like,-2,48); 2697 } 2698} 2699 2700@ @<Cases for |struct_head|@>= 2701if ((cat1==decl || cat1==stmt || cat1==function) && cat2==rbrace) { 2702 big_app1(pp); big_app(indent); big_app(force); big_app1(pp+1); 2703 big_app(outdent); big_app(force); big_app1(pp+2); 2704 reduce(pp,3,int_like,-2,49); 2705} 2706else if (cat1==rbrace) { 2707 big_app1(pp); app_str("\\,"); big_app1(pp+1); 2708@.\\,@> 2709 reduce(pp,2,int_like,-2,50); 2710} 2711 2712@ @<Cases for |fn_decl|@>= 2713if (cat1==decl) { 2714 big_app1(pp); big_app(force); big_app1(pp+1); reduce(pp,2,fn_decl,0,51); 2715} 2716else if (cat1==stmt) { 2717 big_app1(pp); app(outdent); app(outdent); big_app(force); 2718 big_app1(pp+1); reduce(pp,2,function,-1,52); 2719} 2720 2721@ @<Cases for |function|@>= 2722if (cat1==function || cat1==decl || cat1==stmt) { 2723 big_app1(pp); big_app(big_force); big_app1(pp+1); reduce(pp,2,cat1,-1,53); 2724} 2725 2726@ @<Cases for |lbrace|@>= 2727if (cat1==rbrace) { 2728 big_app1(pp); app('\\'); app(','); big_app1(pp+1); 2729@.\\,@> 2730 reduce(pp,2,stmt,-1,54); 2731} 2732else if ((cat1==stmt||cat1==decl||cat1==function) && cat2==rbrace) { 2733 big_app(force); big_app1(pp); big_app(indent); big_app(force); 2734 big_app1(pp+1); big_app(force); big_app(backup); big_app1(pp+2); 2735 big_app(outdent); big_app(force); reduce(pp,3,stmt,-1,55); 2736} 2737else if (cat1==exp) { 2738 if (cat2==rbrace) squash(pp,3,exp,-2,56); 2739 else if (cat2==comma && cat3==rbrace) squash(pp,4,exp,-2,56); 2740} 2741 2742@ @<Cases for |if_like|@>= 2743if (cat1==exp) { 2744 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,if_clause,0,57); 2745} 2746 2747@ @<Cases for |else_like|@>= 2748if (cat1==colon) squash(pp+1,1,base,1,58); 2749else if (cat1==lbrace) squash(pp,1,else_head,0,59); 2750else if (cat1==stmt) { 2751 big_app(force); big_app1(pp); big_app(indent); big_app(break_space); 2752 big_app1(pp+1); big_app(outdent); big_app(force); 2753 reduce(pp,2,stmt,-1,60); 2754} 2755 2756@ @<Cases for |else_head|@>= 2757if (cat1==stmt || cat1==exp) { 2758 big_app(force); big_app1(pp); big_app(break_space); app(noop); 2759 big_app(cancel); big_app1(pp+1); big_app(force); 2760 reduce(pp,2,stmt,-1,61); 2761} 2762 2763@ @<Cases for |if_clause|@>= 2764if (cat1==lbrace) squash(pp,1,if_head,0,62); 2765else if (cat1==stmt) { 2766 if (cat2==else_like) { 2767 big_app(force); big_app1(pp); big_app(indent); big_app(break_space); 2768 big_app1(pp+1); big_app(outdent); big_app(force); big_app1(pp+2); 2769 if (cat3==if_like) { 2770 big_app(' '); big_app1(pp+3); reduce(pp,4,if_like,0,63); 2771 }@+else reduce(pp,3,else_like,0,64); 2772 } 2773 else squash(pp,1,else_like,0,65); 2774} 2775 2776@ @<Cases for |if_head|@>= 2777if (cat1==stmt || cat1==exp) { 2778 if (cat2==else_like) { 2779 big_app(force); big_app1(pp); big_app(break_space); app(noop); 2780 big_app(cancel); big_app1(pp+1); big_app(force); big_app1(pp+2); 2781 if (cat3==if_like) { 2782 big_app(' '); big_app1(pp+3); reduce(pp,4,if_like,0,66); 2783 }@+else reduce(pp,3,else_like,0,67); 2784 } 2785 else squash(pp,1,else_head,0,68); 2786} 2787 2788@ @<Cases for |do_like|@>= 2789if (cat1==stmt && cat2==else_like && cat3==semi) { 2790 big_app1(pp); big_app(break_space); app(noop); big_app(cancel); 2791 big_app1(pp+1); big_app(cancel); app(noop); big_app(break_space); 2792 big_app2(pp+2); reduce(pp,4,stmt,-1,69); 2793} 2794 2795@ @<Cases for |case_like|@>= 2796if (cat1==semi) squash(pp,2,stmt,-1,70); 2797else if (cat1==colon) squash(pp,2,tag,-1,71); 2798else if (cat1==exp) { 2799 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,72); 2800} 2801 2802@ @<Cases for |catch_like|@>= 2803if (cat1==cast || cat1==exp) { 2804 big_app2(pp); big_app(indent); big_app(indent); reduce(pp,2,fn_decl,0,73); 2805} 2806 2807@ @<Cases for |tag|@>= 2808if (cat1==tag) { 2809 big_app1(pp); big_app(break_space); big_app1(pp+1); reduce(pp,2,tag,-1,74); 2810} 2811else if (cat1==stmt||cat1==decl||cat1==function) { 2812 big_app(force); big_app(backup); big_app1(pp); big_app(break_space); 2813 big_app1(pp+1); reduce(pp,2,cat1,-1,75); 2814} 2815 2816@ The user can decide at run-time whether short statements should be 2817grouped together on the same line. 2818 2819@d force_lines flags['f'] /* should each statement be on its own line? */ 2820@<Cases for |stmt|@>= 2821if (cat1==stmt||cat1==decl||cat1==function) { 2822 big_app1(pp); 2823 if (cat1==function) big_app(big_force); 2824 else if (cat1==decl) big_app(big_force); 2825 else if (force_lines) big_app(force); 2826 else big_app(break_space); 2827 big_app1(pp+1); reduce(pp,2,cat1,-1,76); 2828} 2829 2830@ @<Cases for |semi|@>= 2831big_app(' '); big_app1(pp); reduce(pp,1,stmt,-1,77); 2832 2833@ @<Cases for |lproc|@>= 2834if (cat1==define_like) make_underlined(pp+2); 2835if (cat1==else_like || cat1==if_like ||cat1==define_like) 2836 squash(pp,2,lproc,0,78); 2837else if (cat1==rproc) { 2838 app(inserted); big_app2(pp); reduce(pp,2,insert,-1,79); 2839} else if (cat1==exp || cat1==function) { 2840 if (cat2==rproc) { 2841 app(inserted); big_app1(pp); big_app(' '); big_app2(pp+1); 2842 reduce(pp,3,insert,-1,80); 2843 } 2844 else if (cat2==exp && cat3==rproc && cat1==exp) { 2845 app(inserted); big_app1(pp); big_app(' '); big_app1(pp+1); app_str(" \\5"); 2846@.\\5@> 2847 big_app2(pp+2); reduce(pp,4,insert,-1,80); 2848 } 2849} 2850 2851@ @<Cases for |section_scrap|@>= 2852if (cat1==semi) { 2853 big_app2(pp); big_app(force); reduce(pp,2,stmt,-2,81); 2854} 2855else squash(pp,1,exp,-2,82); 2856 2857@ @<Cases for |insert|@>= 2858if (cat1) 2859 squash(pp,2,cat1,0,83); 2860 2861@ @<Cases for |prelangle|@>= 2862init_mathness=cur_mathness=yes_math; 2863app('<'); reduce(pp,1,binop,-2,84); 2864 2865@ @<Cases for |prerangle|@>= 2866init_mathness=cur_mathness=yes_math; 2867app('>'); reduce(pp,1,binop,-2,85); 2868 2869@ @<Cases for |langle|@>= 2870if (cat1==prerangle) { 2871 big_app1(pp); app('\\'); app(','); big_app1(pp+1); 2872@.\\,@> 2873 reduce(pp,2,cast,-1,86); 2874} 2875else if (cat1==decl_head || cat1==int_like || cat1==exp) { 2876 if (cat2==prerangle) squash(pp,3,cast,-1,87); 2877 else if (cat2==comma) { 2878 big_app3(pp); app(opt); app('9'); reduce(pp,3,langle,0,88); 2879 } 2880} 2881 2882@ @<Cases for |template_like|@>= 2883if (cat1==exp && cat2==prelangle) squash(pp+2,1,langle,2,89); 2884else if (cat1==exp || cat1==raw_int) { 2885 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,cat1,-2,90); 2886}@+ else squash(pp,1,raw_int,0,91); 2887 2888@ @<Cases for |new_like|@>= 2889if (cat1==lpar && cat2==exp && cat3==rpar) squash(pp,4,new_like,0,92); 2890else if (cat1==cast) { 2891 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,93); 2892} 2893else if (cat1!=lpar) squash(pp,1,new_exp,0,94); 2894 2895@ @<Cases for |new_exp|@>= 2896if (cat1==int_like || cat1==const_like) { 2897 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,new_exp,0,95); 2898} 2899else if (cat1==struct_like && (cat2==exp || cat2==int_like)) { 2900 big_app1(pp); big_app(' '); big_app1(pp+1); big_app(' '); 2901 big_app1(pp+2); reduce(pp,3,new_exp,0,96); 2902} 2903else if (cat1==raw_ubin) { 2904 big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}'); 2905 reduce(pp,2,new_exp,0,97); 2906} 2907else if (cat1==lpar) squash(pp,1,exp,-2,98); 2908else if (cat1==exp) { 2909 big_app1(pp); big_app(' '); reduce(pp,1,exp,-2,98); 2910} 2911else if (cat1!=raw_int && cat1!=struct_like && cat1!=colcol) 2912 squash(pp,1,exp,-2,99); 2913 2914@ @<Cases for |ftemplate|@>= 2915if (cat1==prelangle) squash(pp+1,1,langle,1,100); 2916else squash(pp,1,exp,-2,101); 2917 2918@ @<Cases for |for_like|@>= 2919if (cat1==exp) { 2920 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,else_like,-2,102); 2921} 2922 2923@ @<Cases for |raw_ubin|@>= 2924if (cat1==const_like) { 2925 big_app2(pp); app_str("\\ "); reduce(pp,2,raw_ubin,0,103); 2926@.\\\ @> 2927} else squash(pp,1,ubinop,-2,104); 2928 2929@ @<Cases for |const_like|@>= 2930squash(pp,1,int_like,-2,105); 2931 2932@ @<Cases for |raw_int|@>= 2933if (cat1==prelangle) squash(pp+1,1,langle,1,106); 2934else if (cat1==colcol) squash(pp,2,colcol,-1,107); 2935else if (cat1==cast) squash(pp,2,raw_int,0,108); 2936else if (cat1==lpar) squash(pp,1,exp,-2,109); 2937else if (cat1!=langle) squash(pp,1,int_like,-3,110); 2938 2939@ @<Cases for |operator_like|@>= 2940if (cat1==binop || cat1==unop || cat1==ubinop) { 2941 if (cat2==binop) break; 2942 big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}'); 2943 reduce(pp,2,exp,-2,111); 2944} 2945else if (cat1==new_like || cat1==delete_like) { 2946 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,112); 2947} 2948else if (cat1==comma) squash(pp,2,exp,-2,113); 2949else if (cat1!=raw_ubin) squash(pp,1,new_exp,0,114); 2950 2951@ @<Cases for |typedef_like|@>= 2952if ((cat1==int_like || cat1==cast) && (cat2==comma || cat2==semi)) 2953 squash(pp+1,1,exp,-1,115); 2954else if (cat1==int_like) { 2955 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,typedef_like,0,116); 2956} 2957else if (cat1==exp && cat2!=lpar && cat2!=exp && cat2!=cast) { 2958 make_underlined(pp+1); make_reserved(pp+1); 2959 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,typedef_like,0,117); 2960} 2961else if (cat1==comma) { 2962 big_app2(pp); big_app(' '); reduce(pp,2,typedef_like,0,118); 2963} 2964else if (cat1==semi) squash(pp,2,decl,-1,119); 2965else if (cat1==ubinop && (cat2==ubinop || cat2==cast)) { 2966 big_app('{'); big_app1(pp+1); big_app('}'); big_app1(pp+2); 2967 reduce(pp+1,2,cat2,0,120); 2968} 2969 2970@ @<Cases for |delete_like|@>= 2971if (cat1==lpar && cat2==rpar) { 2972 big_app2(pp); app('\\'); app(','); big_app1(pp+2); 2973@.\\,@> 2974 reduce(pp,3,delete_like,0,121); 2975} 2976else if (cat1==exp) { 2977 big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,122); 2978} 2979 2980@ @<Cases for |question|@>= 2981if (cat1==exp && (cat2==colon || cat2==base)) { 2982 (pp+2)->mathness=5*yes_math; /* this colon should be in math mode */ 2983 squash(pp,3,binop,-2,123); 2984} 2985 2986@ Now here's the |reduce| procedure used in our code for productions. 2987 2988The `|freeze_text|' macro is used to give official status to a token list. 2989Before saying |freeze_text|, items are appended to the current token list, 2990and we know that the eventual number of this token list will be the current 2991value of |text_ptr|. But no list of that number really exists as yet, 2992because no ending point for the current list has been 2993stored in the |tok_start| array. After saying |freeze_text|, the 2994old current token list becomes legitimate, and its number is the current 2995value of |text_ptr-1| since |text_ptr| has been increased. The new 2996current token list is empty and ready to be appended to. 2997Note that |freeze_text| does not check to see that |text_ptr| hasn't gotten 2998too large, since it is assumed that this test was done beforehand. 2999 3000@d freeze_text *(++text_ptr)=tok_ptr 3001 3002@c 3003void 3004reduce(j,k,c,d,n) 3005scrap_pointer j; 3006eight_bits c; 3007short k, d, n; 3008{ 3009 scrap_pointer i, i1; /* pointers into scrap memory */ 3010 j->cat=c; j->trans=text_ptr; 3011 j->mathness=4*cur_mathness+init_mathness; 3012 freeze_text; 3013 if (k>1) { 3014 for (i=j+k, i1=j+1; i<=lo_ptr; i++, i1++) { 3015 i1->cat=i->cat; i1->trans=i->trans; 3016 i1->mathness=i->mathness; 3017 } 3018 lo_ptr=lo_ptr-k+1; 3019 } 3020 pp=(pp+d<scrap_base? scrap_base: pp+d); 3021 @<Print a snapshot of the scrap list if debugging @>; 3022 pp--; /* we next say |pp++| */ 3023} 3024 3025@ Here's the |squash| procedure, which 3026takes advantage of the simplification that occurs when |k==1|. 3027 3028@c 3029void 3030squash(j,k,c,d,n) 3031scrap_pointer j; 3032eight_bits c; 3033short k, d, n; 3034{ 3035 scrap_pointer i; /* pointers into scrap memory */ 3036 if (k==1) { 3037 j->cat=c; pp=(pp+d<scrap_base? scrap_base: pp+d); 3038 @<Print a snapshot...@>; 3039 pp--; /* we next say |pp++| */ 3040 return; 3041 } 3042 for (i=j; i<j+k; i++) big_app1(i); 3043 reduce(j,k,c,d,n); 3044} 3045 3046@ And here now is the code that applies productions as long as possible. 3047Before applying the production mechanism, we must make sure 3048it has good input (at least four scraps, the length of the lhs of the 3049longest rules), and that there is enough room in the memory arrays 3050to hold the appended tokens and texts. Here we use a very 3051conservative test; it's more important to make sure the program 3052will still work if we change the production rules (within reason) 3053than to squeeze the last bit of space from the memory arrays. 3054 3055@d safe_tok_incr 20 3056@d safe_text_incr 10 3057@d safe_scrap_incr 10 3058 3059@<Reduce the scraps using the productions until no more rules apply@>= 3060while (1) { 3061 @<Make sure the entries |pp| through |pp+3| of |cat| are defined@>; 3062 if (tok_ptr+safe_tok_incr>tok_mem_end) { 3063 if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr; 3064 overflow("token"); 3065 } 3066 if (text_ptr+safe_text_incr>tok_start_end) { 3067 if (text_ptr>max_text_ptr) max_text_ptr=text_ptr; 3068 overflow("text"); 3069 } 3070 if (pp>lo_ptr) break; 3071 init_mathness=cur_mathness=maybe_math; 3072 @<Match a production...@>; 3073} 3074 3075@ If we get to the end of the scrap list, category codes equal to zero are 3076stored, since zero does not match anything in a production. 3077 3078@<Make sure the entries...@>= 3079if (lo_ptr<pp+3) { 3080 while (hi_ptr<=scrap_ptr && lo_ptr!=pp+3) { 3081 (++lo_ptr)->cat=hi_ptr->cat; lo_ptr->mathness=(hi_ptr)->mathness; 3082 lo_ptr->trans=(hi_ptr++)->trans; 3083 } 3084 for (i=lo_ptr+1;i<=pp+3;i++) i->cat=0; 3085} 3086 3087@ If \.{CWEAVE} is being run in debugging mode, the production numbers and 3088current stack categories will be printed out when |tracing| is set to 2; 3089a sequence of two or more irreducible scraps will be printed out when 3090|tracing| is set to 1. 3091 3092@<Global...@>= 3093int tracing; /* can be used to show parsing details */ 3094 3095@ @<Print a snapsh...@>= 3096{ scrap_pointer k; /* pointer into |scrap_info| */ 3097 if (tracing==2) { 3098 printf("\n%d:",n); 3099 for (k=scrap_base; k<=lo_ptr; k++) { 3100 if (k==pp) putxchar('*'); else putxchar(' '); 3101 if (k->mathness %4 == yes_math) putchar('+'); 3102 else if (k->mathness %4 == no_math) putchar('-'); 3103 print_cat(k->cat); 3104 if (k->mathness /4 == yes_math) putchar('+'); 3105 else if (k->mathness /4 == no_math) putchar('-'); 3106 } 3107 if (hi_ptr<=scrap_ptr) printf("..."); /* indicate that more is coming */ 3108 } 3109} 3110 3111@ The |translate| function assumes that scraps have been stored in 3112positions |scrap_base| through |scrap_ptr| of |cat| and |trans|. It 3113applies productions as much as 3114possible. The result is a token list containing the translation of 3115the given sequence of scraps. 3116 3117After calling |translate|, we will have |text_ptr+3<=max_texts| and 3118|tok_ptr+6<=max_toks|, so it will be possible to create up to three token 3119lists with up to six tokens without checking for overflow. Before calling 3120|translate|, we should have |text_ptr<max_texts| and |scrap_ptr<max_scraps|, 3121since |translate| might add a new text and a new scrap before it checks 3122for overflow. 3123 3124@c 3125text_pointer 3126translate() /* converts a sequence of scraps */ 3127{ 3128 scrap_pointer i, /* index into |cat| */ 3129 j; /* runs through final scraps */ 3130 pp=scrap_base; lo_ptr=pp-1; hi_ptr=pp; 3131 @<If tracing, print an indication of where we are@>; 3132 @<Reduce the scraps...@>; 3133 @<Combine the irreducible scraps that remain@>; 3134} 3135 3136@ If the initial sequence of scraps does not reduce to a single scrap, 3137we concatenate the translations of all remaining scraps, separated by 3138blank spaces, with dollar signs surrounding the translations of scraps 3139where appropriate. 3140 3141@<Combine the irreducible...@>= { 3142 @<If semi-tracing, show the irreducible scraps@>; 3143 for (j=scrap_base; j<=lo_ptr; j++) { 3144 if (j!=scrap_base) app(' '); 3145 if (j->mathness % 4 == yes_math) app('$'); 3146 app1(j); 3147 if (j->mathness / 4 == yes_math) app('$'); 3148 if (tok_ptr+6>tok_mem_end) overflow("token"); 3149 } 3150 freeze_text; return(text_ptr-1); 3151} 3152 3153@ @<If semi-tracing, show the irreducible scraps@>= 3154if (lo_ptr>scrap_base && tracing==1) { 3155 printf("\nIrreducible scrap sequence in section %d:",section_count); 3156@.Irreducible scrap sequence...@> 3157 mark_harmless; 3158 for (j=scrap_base; j<=lo_ptr; j++) { 3159 printf(" "); print_cat(j->cat); 3160 } 3161} 3162 3163@ @<If tracing,...@>= 3164if (tracing==2) { 3165 printf("\nTracing after l. %d:\n",cur_line); mark_harmless; 3166@.Tracing after...@> 3167 if (loc>buffer+50) { 3168 printf("..."); 3169 term_write(loc-51,51); 3170 } 3171 else term_write(buffer,loc-buffer); 3172} 3173 3174@* Initializing the scraps. 3175If we are going to use the powerful production mechanism just developed, we 3176must get the scraps set up in the first place, given a \CEE/ text. A table 3177of the initial scraps corresponding to \CEE/ tokens appeared above in the 3178section on parsing; our goal now is to implement that table. We shall do this 3179by implementing a subroutine called |C_parse| that is analogous to the 3180|C_xref| routine used during phase one. 3181 3182Like |C_xref|, the |C_parse| procedure starts with the current 3183value of |next_control| and it uses the operation |next_control=get_next()| 3184repeatedly to read \CEE/ text until encountering the next `\.{\v}' or 3185`\.{/*}', or until |next_control>=format_code|. The scraps corresponding to 3186what it reads are appended into the |cat| and |trans| arrays, and |scrap_ptr| 3187is advanced. 3188 3189@c 3190void 3191C_parse(spec_ctrl) /* creates scraps from \CEE/ tokens */ 3192 eight_bits spec_ctrl; 3193{ 3194 int count; /* characters remaining before string break */ 3195 while (next_control<format_code || next_control==spec_ctrl) { 3196 @<Append the scrap appropriate to |next_control|@>; 3197 next_control=get_next(); 3198 if (next_control=='|' || next_control==begin_comment || 3199 next_control==begin_short_comment) return; 3200 } 3201} 3202 3203@ The following macro is used to append a scrap whose tokens have just 3204been appended: 3205 3206@d app_scrap(c,b) { 3207 (++scrap_ptr)->cat=(c); scrap_ptr->trans=text_ptr; 3208 scrap_ptr->mathness=5*(b); /* no no, yes yes, or maybe maybe */ 3209 freeze_text; 3210} 3211 3212@ @<Append the scr...@>= 3213@<Make sure that there is room for the new scraps, tokens, and texts@>; 3214switch (next_control) { 3215 case section_name: 3216 app(section_flag+(int)(cur_section-name_dir)); 3217 app_scrap(section_scrap,maybe_math); 3218 app_scrap(exp,yes_math);@+break; 3219 case string: case constant: case verbatim: @<Append a string or constant@>; 3220 @+break; 3221 case identifier: app_cur_id(1);@+break; 3222 case TeX_string: @<Append a \TEX/ string, without forming a scrap@>;@+break; 3223 case '/': case '.': 3224 app(next_control); app_scrap(binop,yes_math);@+break; 3225 case '<': app_str("\\langle");@+app_scrap(prelangle,yes_math);@+break; 3226@.\\langle@> 3227 case '>': app_str("\\rangle");@+app_scrap(prerangle,yes_math);@+break; 3228@.\\rangle@> 3229 case '=': app_str("\\K"); app_scrap(binop,yes_math);@+break; 3230@.\\K@> 3231 case '|': app_str("\\OR"); app_scrap(binop,yes_math);@+break; 3232@.\\OR@> 3233 case '^': app_str("\\XOR"); app_scrap(binop,yes_math);@+break; 3234@.\\XOR@> 3235 case '%': app_str("\\MOD"); app_scrap(binop,yes_math);@+break; 3236@.\\MOD@> 3237 case '!': app_str("\\R"); app_scrap(unop,yes_math);@+break; 3238@.\\R@> 3239 case '~': app_str("\\CM"); app_scrap(unop,yes_math);@+break; 3240@.\\CM@> 3241 case '+': case '-': app(next_control); app_scrap(ubinop,yes_math);@+break; 3242 case '*': app(next_control); app_scrap(raw_ubin,yes_math);@+break; 3243 case '&': app_str("\\AND"); app_scrap(raw_ubin,yes_math);@+break; 3244@.\\AND@> 3245 case '?': app_str("\\?"); app_scrap(question,yes_math);@+break; 3246@.\\?@> 3247 case '#': app_str("\\#"); app_scrap(ubinop,yes_math);@+break; 3248@.\\\#@> 3249 case ignore: case xref_roman: case xref_wildcard: 3250 case xref_typewriter: case noop:@+break; 3251 case '(': case '[': app(next_control); app_scrap(lpar,maybe_math);@+break; 3252 case ')': case ']': app(next_control); app_scrap(rpar,maybe_math);@+break; 3253 case '{': app_str("\\{"@q}@>); app_scrap(lbrace,yes_math);@+break; 3254@.\\\{@>@q}@> 3255 case '}': app_str(@q{@>"\\}"); app_scrap(rbrace,yes_math);@+break; 3256@q{@>@.\\\}@> 3257 case ',': app(','); app_scrap(comma,yes_math);@+break; 3258 case ';': app(';'); app_scrap(semi,maybe_math);@+break; 3259 case ':': app(':'); app_scrap(colon,no_math);@+break;@/ 3260 @t\4@> @<Cases involving nonstandard characters@>@; 3261 case thin_space: app_str("\\,"); app_scrap(insert,maybe_math);@+break; 3262@.\\,@> 3263 case math_break: app(opt); app_str("0"); 3264 app_scrap(insert,maybe_math);@+break; 3265 case line_break: app(force); app_scrap(insert,no_math);@+break; 3266 case left_preproc: app(force); app(preproc_line); 3267 app_str("\\#"); app_scrap(lproc,no_math);@+break; 3268@.\\\#@> 3269 case right_preproc: app(force); app_scrap(rproc,no_math);@+break; 3270 case big_line_break: app(big_force); app_scrap(insert,no_math);@+break; 3271 case no_line_break: app(big_cancel); app(noop); app(break_space); 3272 app(noop); app(big_cancel); 3273 app_scrap(insert,no_math);@+break; 3274 case pseudo_semi: app_scrap(semi,maybe_math);@+break; 3275 case macro_arg_open: app_scrap(begin_arg,maybe_math);@+break; 3276 case macro_arg_close: app_scrap(end_arg,maybe_math);@+break; 3277 case join: app_str("\\J"); app_scrap(insert,no_math);@+break; 3278@.\\J@> 3279 case output_defs_code: app(force); app_str("\\ATH"); app(force); 3280 app_scrap(insert,no_math);@+break; 3281@.\\ATH@> 3282 default: app(inserted); app(next_control); 3283 app_scrap(insert,maybe_math);@+break; 3284} 3285 3286@ @<Make sure that there is room for the new...@>= 3287if (scrap_ptr+safe_scrap_incr>scrap_info_end || 3288 tok_ptr+safe_tok_incr>tok_mem_end @| || 3289 text_ptr+safe_text_incr>tok_start_end) { 3290 if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr; 3291 if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr; 3292 if (text_ptr>max_text_ptr) max_text_ptr=text_ptr; 3293 overflow("scrap/token/text"); 3294} 3295 3296@ Some nonstandard characters may have entered \.{CWEAVE} by means of 3297standard ones. They are converted to \TEX/ control sequences so that it is 3298possible to keep \.{CWEAVE} from outputting unusual |char| codes. 3299 3300@<Cases involving nonstandard...@>= 3301case not_eq: app_str("\\I");@+app_scrap(binop,yes_math);@+break; 3302@.\\I@> 3303case lt_eq: app_str("\\Z");@+app_scrap(binop,yes_math);@+break; 3304@.\\Z@> 3305case gt_eq: app_str("\\G");@+app_scrap(binop,yes_math);@+break; 3306@.\\G@> 3307case eq_eq: app_str("\\E");@+app_scrap(binop,yes_math);@+break; 3308@.\\E@> 3309case and_and: app_str("\\W");@+app_scrap(binop,yes_math);@+break; 3310@.\\W@> 3311case or_or: app_str("\\V");@+app_scrap(binop,yes_math);@+break; 3312@.\\V@> 3313case plus_plus: app_str("\\PP");@+app_scrap(unop,yes_math);@+break; 3314@.\\PP@> 3315case minus_minus: app_str("\\MM");@+app_scrap(unop,yes_math);@+break; 3316@.\\MM@> 3317case minus_gt: app_str("\\MG");@+app_scrap(binop,yes_math);@+break; 3318@.\\MG@> 3319case gt_gt: app_str("\\GG");@+app_scrap(binop,yes_math);@+break; 3320@.\\GG@> 3321case lt_lt: app_str("\\LL");@+app_scrap(binop,yes_math);@+break; 3322@.\\LL@> 3323case dot_dot_dot: app_str("\\,\\ldots\\,");@+app_scrap(raw_int,yes_math); 3324 @+break; 3325@.\\,@> 3326@.\\ldots@> 3327case colon_colon: app_str("\\DC");@+app_scrap(colcol,maybe_math);@+break; 3328@.\\DC@> 3329case period_ast: app_str("\\PA");@+app_scrap(binop,yes_math);@+break; 3330@.\\PA@> 3331case minus_gt_ast: app_str("\\MGA");@+app_scrap(binop,yes_math);@+break; 3332@.\\MGA@> 3333 3334@ The following code must use |app_tok| instead of |app| in order to 3335protect against overflow. Note that |tok_ptr+1<=max_toks| after |app_tok| 3336has been used, so another |app| is legitimate before testing again. 3337 3338Many of the special characters in a string must be prefixed by `\.\\' so that 3339\TEX/ will print them properly. 3340@^special string characters@> 3341 3342@<Append a string or...@>= 3343count= -1; 3344if (next_control==constant) app_str("\\T{"@q}@>); 3345@.\\T@> 3346else if (next_control==string) { 3347 count=20; app_str("\\.{"@q}@>); 3348} 3349@.\\.@> 3350else app_str("\\vb{"@q}@>); 3351@.\\vb@> 3352while (id_first<id_loc) { 3353 if (count==0) { /* insert a discretionary break in a long string */ 3354 app_str(@q(@>@q{@>"}\\)\\.{"@q}@>); count=20; 3355@q(@>@.\\)@> 3356 } 3357@^high-bit character handling@> 3358 if((eight_bits)(*id_first)>0177) { 3359 app_tok(quoted_char); 3360 app_tok((eight_bits)(*id_first++)); 3361 } 3362 else { 3363 switch (*id_first) { 3364 case ' ':case '\\':case '#':case '%':case '$':case '^': 3365 case '{': case '}': case '~': case '&': case '_': app('\\'); break; 3366@.\\\ @> 3367@.\\\\@> 3368@.\\\#@> 3369@.\\\%@> 3370@.\\\$@> 3371@.\\\^@> 3372@.\\\{@>@q}@> 3373@q{@>@.\\\}@> 3374@.\\\~@> 3375@.\\\&@> 3376@.\\\_@> 3377 case '@@': if (*(id_first+1)=='@@') id_first++; 3378 else err_print("! Double @@ should be used in strings"); 3379@.Double @@ should be used...@> 3380 } 3381 app_tok(*id_first++); 3382 } 3383 count--; 3384} 3385app(@q{@>'}'); 3386app_scrap(exp,maybe_math); 3387 3388@ We do not make the \TEX/ string into a scrap, because there is no 3389telling what the user will be putting into it; instead we leave it 3390open, to be picked up by the next scrap. If it comes at the end of a 3391section, it will be made into a scrap when |finish_C| is called. 3392 3393There's a known bug here, in cases where an adjacent scrap is 3394|prelangle| or |prerangle|. Then the \TEX/ string can disappear 3395when the \.{\\langle} or \.{\\rangle} becomes \.{<} or \.{>}. 3396For example, if the user writes \.{\v x<@@ty@@>\v}, the \TEX/ string 3397\.{\\hbox\{y\}} eventually becomes part of an |insert| scrap, which is combined 3398with a |prelangle| scrap and eventually lost. The best way to work around 3399this bug is probably to enclose the \.{@@t...@@>} in \.{@@[...@@]} so that 3400the \TEX/ string is treated as an expression. 3401@^bug, known@> 3402 3403@<Append a \TEX/ string, without forming a scrap@>= 3404app_str("\\hbox{"@q}@>); 3405@^high-bit character handling@> 3406while (id_first<id_loc) 3407 if((eight_bits)(*id_first)>0177) { 3408 app_tok(quoted_char); 3409 app_tok((eight_bits)(*id_first++)); 3410 } 3411 else { 3412 if (*id_first=='@@') id_first++; 3413 app_tok(*id_first++); 3414 } 3415app(@q{@>'}'); 3416 3417@ The function |app_cur_id| appends the current identifier to the 3418token list; it also builds a new scrap if |scrapping==1|. 3419 3420@<Predec...@>= 3421void app_cur_id(); 3422 3423@ @c 3424void 3425app_cur_id(scrapping) 3426boolean scrapping; /* are we making this into a scrap? */ 3427{ 3428 name_pointer p=id_lookup(id_first,id_loc,normal); 3429 if (p->ilk<=custom) { /* not a reserved word */ 3430 app(id_flag+(int)(p-name_dir)); 3431 if (scrapping) app_scrap(p->ilk==func_template? ftemplate: exp, 3432 p->ilk==custom? yes_math: maybe_math); 3433@.\\NULL@> 3434 } else { 3435 app(res_flag+(int)(p-name_dir)); 3436 if (scrapping) { 3437 if (p->ilk==alfop) app_scrap(ubinop,yes_math)@; 3438 else app_scrap(p->ilk,maybe_math); 3439 } 3440 } 3441} 3442 3443@ When the `\.{\v}' that introduces \CEE/ text is sensed, a call on 3444|C_translate| will return a pointer to the \TEX/ translation of 3445that text. If scraps exist in |scrap_info|, they are 3446unaffected by this translation process. 3447 3448@c 3449text_pointer 3450C_translate() 3451{ 3452 text_pointer p; /* points to the translation */ 3453 scrap_pointer save_base; /* holds original value of |scrap_base| */ 3454 save_base=scrap_base; scrap_base=scrap_ptr+1; 3455 C_parse(section_name); /* get the scraps together */ 3456 if (next_control!='|') err_print("! Missing '|' after C text"); 3457@.Missing '|'...@> 3458 app_tok(cancel); app_scrap(insert,maybe_math); 3459 /* place a |cancel| token as a final ``comment'' */ 3460 p=translate(); /* make the translation */ 3461 if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr; 3462 scrap_ptr=scrap_base-1; scrap_base=save_base; /* scrap the scraps */ 3463 return(p); 3464} 3465 3466@ The |outer_parse| routine is to |C_parse| as |outer_xref| 3467is to |C_xref|: It constructs a sequence of scraps for \CEE/ text 3468until |next_control>=format_code|. Thus, it takes care of embedded comments. 3469 3470The token list created from within `\pb' brackets is output as an argument 3471to \.{\\PB}, if the user has invoked \.{CWEAVE} with the \.{+e} flag. 3472Although \.{cwebmac} ignores \.{\\PB}, other macro packages 3473might use it to localize the special meaning of the macros that mark up 3474program text. 3475 3476@d make_pb flags['e'] 3477 3478@c 3479void 3480outer_parse() /* makes scraps from \CEE/ tokens and comments */ 3481{ 3482 int bal; /* brace level in comment */ 3483 text_pointer p, q; /* partial comments */ 3484 while (next_control<format_code) 3485 if (next_control!=begin_comment && next_control!=begin_short_comment) 3486 C_parse(ignore); 3487 else { 3488 boolean is_long_comment=(next_control==begin_comment); 3489 @<Make sure that there is room for the new...@>; 3490 app(cancel); app(inserted); 3491 if (is_long_comment) app_str("\\C{"@q}@>); 3492@.\\C@> 3493 else app_str("\\SHC{"@q}@>); 3494@.\\SHC@> 3495 bal=copy_comment(is_long_comment,1); next_control=ignore; 3496 while (bal>0) { 3497 p=text_ptr; freeze_text; q=C_translate(); 3498 /* at this point we have |tok_ptr+6<=max_toks| */ 3499 app(tok_flag+(int)(p-tok_start)); 3500 if (make_pb) app_str("\\PB{"); 3501@.\\PB@> 3502 app(inner_tok_flag+(int)(q-tok_start)); 3503 if (make_pb) app_tok('}'); 3504 if (next_control=='|') { 3505 bal=copy_comment(is_long_comment,bal); 3506 next_control=ignore; 3507 } 3508 else bal=0; /* an error has been reported */ 3509 } 3510 app(force); app_scrap(insert,no_math); 3511 /* the full comment becomes a scrap */ 3512 } 3513} 3514 3515@* Output of tokens. 3516So far our programs have only built up multi-layered token lists in 3517\.{CWEAVE}'s internal memory; we have to figure out how to get them into 3518the desired final form. The job of converting token lists to characters in 3519the \TEX/ output file is not difficult, although it is an implicitly 3520recursive process. Four main considerations had to be kept in mind when 3521this part of \.{CWEAVE} was designed. (a) There are two modes of output: 3522|outer| mode, which translates tokens like |force| into line-breaking 3523control sequences, and |inner| mode, which ignores them except that blank 3524spaces take the place of line breaks. (b) The |cancel| instruction applies 3525to adjacent token or tokens that are output, and this cuts across levels 3526of recursion since `|cancel|' occurs at the beginning or end of a token 3527list on one level. (c) The \TEX/ output file will be semi-readable if line 3528breaks are inserted after the result of tokens like |break_space| and 3529|force|. (d) The final line break should be suppressed, and there should 3530be no |force| token output immediately after `\.{\\Y\\B}'. 3531 3532@ The output process uses a stack to keep track of what is going on at 3533different ``levels'' as the token lists are being written out. Entries on 3534this stack have three parts: 3535 3536\yskip\hang |end_field| is the |tok_mem| location where the token list of a 3537particular level will end; 3538 3539\yskip\hang |tok_field| is the |tok_mem| location from which the next token 3540on a particular level will be read; 3541 3542\yskip\hang |mode_field| is the current mode, either |inner| or |outer|. 3543 3544\yskip\noindent The current values of these quantities are referred to 3545quite frequently, so they are stored in a separate place instead of in the 3546|stack| array. We call the current values |cur_end|, |cur_tok|, and 3547|cur_mode|. 3548 3549The global variable |stack_ptr| tells how many levels of output are 3550currently in progress. The end of output occurs when an |end_translation| 3551token is found, so the stack is never empty except when we first begin the 3552output process. 3553 3554@d inner 0 /* value of |mode| for \CEE/ texts within \TEX/ texts */ 3555@d outer 1 /* value of |mode| for \CEE/ texts in sections */ 3556 3557@<Typed...@>= typedef int mode; 3558typedef struct { 3559 token_pointer end_field; /* ending location of token list */ 3560 token_pointer tok_field; /* present location within token list */ 3561 boolean mode_field; /* interpretation of control tokens */ 3562} output_state; 3563typedef output_state *stack_pointer; 3564 3565@ @d cur_end cur_state.end_field /* current ending location in |tok_mem| */ 3566@d cur_tok cur_state.tok_field /* location of next output token in |tok_mem| */ 3567@d cur_mode cur_state.mode_field /* current mode of interpretation */ 3568@d init_stack stack_ptr=stack;cur_mode=outer /* initialize the stack */ 3569 3570@<Global...@>= 3571output_state cur_state; /* |cur_end|, |cur_tok|, |cur_mode| */ 3572output_state stack[stack_size]; /* info for non-current levels */ 3573stack_pointer stack_ptr; /* first unused location in the output state stack */ 3574stack_pointer stack_end=stack+stack_size-1; /* end of |stack| */ 3575stack_pointer max_stack_ptr; /* largest value assumed by |stack_ptr| */ 3576 3577@ @<Set init...@>= 3578max_stack_ptr=stack; 3579 3580@ To insert token-list |p| into the output, the |push_level| subroutine 3581is called; it saves the old level of output and gets a new one going. 3582The value of |cur_mode| is not changed. 3583 3584@c 3585void 3586push_level(p) /* suspends the current level */ 3587text_pointer p; 3588{ 3589 if (stack_ptr==stack_end) overflow("stack"); 3590 if (stack_ptr>stack) { /* save current state */ 3591 stack_ptr->end_field=cur_end; 3592 stack_ptr->tok_field=cur_tok; 3593 stack_ptr->mode_field=cur_mode; 3594 } 3595 stack_ptr++; 3596 if (stack_ptr>max_stack_ptr) max_stack_ptr=stack_ptr; 3597 cur_tok=*p; cur_end=*(p+1); 3598} 3599 3600@ Conversely, the |pop_level| routine restores the conditions that were in 3601force when the current level was begun. This subroutine will never be 3602called when |stack_ptr==1|. 3603 3604@c 3605void 3606pop_level() 3607{ 3608 cur_end=(--stack_ptr)->end_field; 3609 cur_tok=stack_ptr->tok_field; cur_mode=stack_ptr->mode_field; 3610} 3611 3612@ The |get_output| function returns the next byte of output that is not a 3613reference to a token list. It returns the values |identifier| or |res_word| 3614or |section_code| if the next token is to be an identifier (typeset in 3615italics), a reserved word (typeset in boldface), or a section name (typeset 3616by a complex routine that might generate additional levels of output). 3617In these cases |cur_name| points to the identifier or section name in 3618question. 3619 3620@<Global...@>= 3621name_pointer cur_name; 3622 3623@ @d res_word 0201 /* returned by |get_output| for reserved words */ 3624@d section_code 0200 /* returned by |get_output| for section names */ 3625 3626@c 3627eight_bits 3628get_output() /* returns the next token of output */ 3629{ 3630 sixteen_bits a; /* current item read from |tok_mem| */ 3631 restart: while (cur_tok==cur_end) pop_level(); 3632 a=*(cur_tok++); 3633 if (a>=0400) { 3634 cur_name=a % id_flag + name_dir; 3635 switch (a / id_flag) { 3636 case 2: return(res_word); /* |a==res_flag+cur_name| */ 3637 case 3: return(section_code); /* |a==section_flag+cur_name| */ 3638 case 4: push_level(a % id_flag + tok_start); goto restart; 3639 /* |a==tok_flag+cur_name| */ 3640 case 5: push_level(a % id_flag + tok_start); cur_mode=inner; goto restart; 3641 /* |a==inner_tok_flag+cur_name| */ 3642 default: return(identifier); /* |a==id_flag+cur_name| */ 3643 } 3644 } 3645 return(a); 3646} 3647 3648@ The real work associated with token output is done by |make_output|. 3649This procedure appends an |end_translation| token to the current token list, 3650and then it repeatedly calls |get_output| and feeds characters to the output 3651buffer until reaching the |end_translation| sentinel. It is possible for 3652|make_output| to be called recursively, since a section name may include 3653embedded \CEE/ text; however, the depth of recursion never exceeds one 3654level, since section names cannot be inside of section names. 3655 3656A procedure called |output_C| does the scanning, translation, and 3657output of \CEE/ text within `\pb' brackets, and this procedure uses 3658|make_output| to output the current token list. Thus, the recursive call 3659of |make_output| actually occurs when |make_output| calls |output_C| 3660while outputting the name of a section. 3661@^recursion@> 3662 3663@c 3664void 3665output_C() /* outputs the current token list */ 3666{ 3667 token_pointer save_tok_ptr; 3668 text_pointer save_text_ptr; 3669 sixteen_bits save_next_control; /* values to be restored */ 3670 text_pointer p; /* translation of the \CEE/ text */ 3671 save_tok_ptr=tok_ptr; save_text_ptr=text_ptr; 3672 save_next_control=next_control; next_control=ignore; p=C_translate(); 3673 app(inner_tok_flag+(int)(p-tok_start)); 3674 if (make_pb) { 3675 out_str("\\PB{"); make_output(); out('}'); 3676@.\\PB@> 3677 }@+else make_output(); /* output the list */ 3678 if (text_ptr>max_text_ptr) max_text_ptr=text_ptr; 3679 if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr; 3680 text_ptr=save_text_ptr; tok_ptr=save_tok_ptr; /* forget the tokens */ 3681 next_control=save_next_control; /* restore |next_control| to original state */ 3682} 3683 3684@ Here is \.{CWEAVE}'s major output handler. 3685 3686@<Predecl...@>= 3687void make_output(); 3688 3689@ @c 3690void 3691make_output() /* outputs the equivalents of tokens */ 3692{ 3693 eight_bits a, /* current output byte */ 3694 b; /* next output byte */ 3695 int c; /* count of |indent| and |outdent| tokens */ 3696 char scratch[longest_name]; /* scratch area for section names */ 3697 char *k, *k_limit; /* indices into |scratch| */ 3698 char *j; /* index into |buffer| */ 3699 char *p; /* index into |byte_mem| */ 3700 char delim; /* first and last character of string being copied */ 3701 char *save_loc, *save_limit; /* |loc| and |limit| to be restored */ 3702 name_pointer cur_section_name; /* name of section being output */ 3703 boolean save_mode; /* value of |cur_mode| before a sequence of breaks */ 3704 app(end_translation); /* append a sentinel */ 3705 freeze_text; push_level(text_ptr-1); 3706 while (1) { 3707 a=get_output(); 3708 reswitch: switch(a) { 3709 case end_translation: return; 3710 case identifier: case res_word: @<Output an identifier@>; break; 3711 case section_code: @<Output a section name@>; break; 3712 case math_rel: out_str("\\MRL{"@q}@>); 3713@.\\MRL@> 3714 case noop: case inserted: break; 3715 case cancel: case big_cancel: c=0; b=a; 3716 while (1) { 3717 a=get_output(); 3718 if (a==inserted) continue; 3719 if ((a<indent && !(b==big_cancel&&a==' ')) || a>big_force) break; 3720 if (a==indent) c++; else if (a==outdent) c--; 3721 else if (a==opt) a=get_output(); 3722 } 3723 @<Output saved |indent| or |outdent| tokens@>; 3724 goto reswitch; 3725 case indent: case outdent: case opt: case backup: case break_space: 3726 case force: case big_force: case preproc_line: @<Output a control, 3727 look ahead in case of line breaks, possibly |goto reswitch|@>; break; 3728 case quoted_char: out(*(cur_tok++)); 3729 case qualifier: break; 3730 default: out(a); /* otherwise |a| is an ordinary character */ 3731 } 3732 } 3733} 3734 3735@ An identifier of length one does not have to be enclosed in braces, and it 3736looks slightly better if set in a math-italic font instead of a (slightly 3737narrower) text-italic font. Thus we output `\.{\\\v}\.{a}' but 3738`\.{\\\\\{aa\}}'. 3739 3740@<Output an identifier@>= 3741out('\\'); 3742if (a==identifier) { 3743 if (cur_name->ilk==custom && !doing_format) { 3744 custom_out: 3745 for (p=cur_name->byte_start;p<(cur_name+1)->byte_start;p++) 3746 out(*p=='_'? 'x': *p=='$'? 'X': *p); 3747 break; 3748 } else if (is_tiny(cur_name)) out('|')@; 3749@.\\|@> 3750 else { delim='.'; 3751 for (p=cur_name->byte_start;p<(cur_name+1)->byte_start;p++) 3752 if (xislower(*p)) { /* not entirely uppercase */ 3753 delim='\\'; break; 3754 } 3755 out(delim); 3756 } 3757@.\\\\@> 3758@.\\.@> 3759}@+else if (cur_name->ilk==alfop) { 3760 out('X'); 3761 goto custom_out; 3762}@+else out('&'); /* |a==res_word| */ 3763@.\\\&@> 3764if (is_tiny(cur_name)) { 3765 if (isxalpha((cur_name->byte_start)[0])) 3766 out('\\'); 3767 out((cur_name->byte_start)[0]); 3768} 3769else out_name(cur_name,1); 3770 3771@ The current mode does not affect the behavior of \.{CWEAVE}'s output routine 3772except when we are outputting control tokens. 3773 3774@<Output a control...@>= 3775if (a<break_space || a==preproc_line) { 3776 if (cur_mode==outer) { 3777 out('\\'); out(a-cancel+'0'); 3778@.\\1@> 3779@.\\2@> 3780@.\\3@> 3781@.\\4@> 3782@.\\8@> 3783 if (a==opt) { 3784 b=get_output(); /* |opt| is followed by a digit */ 3785 if (b!='0' || force_lines==0) out(b)@; 3786 else out_str("{-1}"); /* |force_lines| encourages more \.{@@\v} breaks */ 3787 } 3788 } else if (a==opt) b=get_output(); /* ignore digit following |opt| */ 3789 } 3790else @<Look ahead for strongest line break, |goto reswitch|@> 3791 3792@ If several of the tokens |break_space|, |force|, |big_force| occur in a 3793row, possibly mixed with blank spaces (which are ignored), 3794the largest one is used. A line break also occurs in the output file, 3795except at the very end of the translation. The very first line break 3796is suppressed (i.e., a line break that follows `\.{\\Y\\B}'). 3797 3798@<Look ahead for st...@>= { 3799 b=a; save_mode=cur_mode; c=0; 3800 while (1) { 3801 a=get_output(); 3802 if (a==inserted) continue; 3803 if (a==cancel || a==big_cancel) { 3804 @<Output saved |indent| or |outdent| tokens@>; 3805 goto reswitch; /* |cancel| overrides everything */ 3806 } 3807 if ((a!=' ' && a<indent) || a==backup || a>big_force) { 3808 if (save_mode==outer) { 3809 if (out_ptr>out_buf+3 && strncmp(out_ptr-3,"\\Y\\B",4)==0) 3810 goto reswitch; 3811 @<Output saved |indent| or |outdent| tokens@>; 3812 out('\\'); out(b-cancel+'0'); 3813@.\\5@> 3814@.\\6@> 3815@.\\7@> 3816 if (a!=end_translation) finish_line(); 3817 } 3818 else if (a!=end_translation && cur_mode==inner) out(' '); 3819 goto reswitch; 3820 } 3821 if (a==indent) c++; 3822 else if (a==outdent) c--; 3823 else if (a==opt) a=get_output(); 3824 else if (a>b) b=a; /* if |a==' '| we have |a<b| */ 3825 } 3826} 3827 3828@ @<Output saved...@>= 3829 for (;c>0;c--) out_str("\\1"); 3830@.\\1@> 3831 for (;c<0;c++) out_str("\\2"); 3832@.\\2@> 3833 3834@ The remaining part of |make_output| is somewhat more complicated. When we 3835output a section name, we may need to enter the parsing and translation 3836routines, since the name may contain \CEE/ code embedded in 3837\pb\ constructions. This \CEE/ code is placed at the end of the active 3838input buffer and the translation process uses the end of the active 3839|tok_mem| area. 3840 3841@<Output a section name@>= { 3842 out_str("\\X"); 3843@.\\X@> 3844 cur_xref=(xref_pointer)cur_name->xref; 3845 if (cur_xref->num==file_flag) {an_output=1; cur_xref=cur_xref->xlink;} 3846 else an_output=0; 3847 if (cur_xref->num>=def_flag) { 3848 out_section(cur_xref->num-def_flag); 3849 if (phase==3) { 3850 cur_xref=cur_xref->xlink; 3851 while (cur_xref->num>=def_flag) { 3852 out_str(", "); 3853 out_section(cur_xref->num-def_flag); 3854 cur_xref=cur_xref->xlink; 3855 } 3856 } 3857 } 3858 else out('0'); /* output the section number, or zero if it was undefined */ 3859 out(':'); 3860 if (an_output) out_str("\\.{"@q}@>); 3861@.\\.@> 3862 @<Output the text of the section name@>; 3863 if (an_output) out_str(@q{@>" }"); 3864 out_str("\\X"); 3865} 3866 3867@ @<Output the text...@>= 3868sprint_section_name(scratch,cur_name); 3869k=scratch; 3870k_limit=scratch+strlen(scratch); 3871cur_section_name=cur_name; 3872while (k<k_limit) { 3873 b=*(k++); 3874 if (b=='@@') @<Skip next character, give error if not `\.{@@}'@>; 3875 if (an_output) 3876 switch (b) { 3877 case ' ':case '\\':case '#':case '%':case '$':case '^': 3878 case '{': case '}': case '~': case '&': case '_': 3879 out('\\'); /* falls through */ 3880@.\\\ @> 3881@.\\\\@> 3882@.\\\#@> 3883@.\\\%@> 3884@.\\\$@> 3885@.\\\^@> 3886@.\\\{@>@q}@> 3887@q{@>@.\\\}@> 3888@.\\\~@> 3889@.\\\&@> 3890@.\\\_@> 3891 default: out(b); 3892 } 3893 else if (b!='|') out(b) 3894 else { 3895 @<Copy the \CEE/ text into the |buffer| array@>; 3896 save_loc=loc; save_limit=limit; loc=limit+2; limit=j+1; 3897 *limit='|'; output_C(); 3898 loc=save_loc; limit=save_limit; 3899 } 3900} 3901 3902@ @<Skip next char...@>= 3903if (*k++!='@@') { 3904 printf("\n! Illegal control code in section name: <"); 3905@.Illegal control code...@> 3906 print_section_name(cur_section_name); printf("> "); mark_error; 3907} 3908 3909@ The \CEE/ text enclosed in \pb\ should not contain `\.{\v}' characters, 3910except within strings. We put a `\.{\v}' at the front of the buffer, so that an 3911error message that displays the whole buffer will look a little bit sensible. 3912The variable |delim| is zero outside of strings, otherwise it 3913equals the delimiter that began the string being copied. 3914 3915@<Copy the \CEE/ text into...@>= 3916j=limit+1; *j='|'; delim=0; 3917while (1) { 3918 if (k>=k_limit) { 3919 printf("\n! C text in section name didn't end: <"); 3920@.C text...didn't end@> 3921 print_section_name(cur_section_name); printf("> "); mark_error; break; 3922 } 3923 b=*(k++); 3924 if (b=='@@' || (b=='\\' && delim!=0)) 3925 @<Copy a quoted character into the buffer@> 3926 else { 3927 if (b=='\'' || b=='"') 3928 if (delim==0) delim=b; 3929 else if (delim==b) delim=0; 3930 if (b!='|' || delim!=0) { 3931 if (j>buffer+long_buf_size-3) overflow("buffer"); 3932 *(++j)=b; 3933 } 3934 else break; 3935 } 3936} 3937 3938@ @<Copy a quoted char...@>= { 3939 if (j>buffer+long_buf_size-4) overflow("buffer"); 3940 *(++j)=b; *(++j)=*(k++); 3941} 3942 3943@** Phase two processing. 3944We have assembled enough pieces of the puzzle in order to be ready to specify 3945the processing in \.{CWEAVE}'s main pass over the source file. Phase two 3946is analogous to phase one, except that more work is involved because we must 3947actually output the \TEX/ material instead of merely looking at the 3948\.{CWEB} specifications. 3949 3950@<Predecl...@>= 3951void phase_two(); 3952 3953@ @c 3954void 3955phase_two() { 3956reset_input(); if (show_progress) printf("\nWriting the output file..."); 3957@.Writing the output file...@> 3958section_count=0; format_visible=1; copy_limbo(); 3959finish_line(); flush_buffer(out_buf,0,0); /* insert a blank line, it looks nice */ 3960while (!input_has_ended) @<Translate the current section@>; 3961} 3962 3963@ The output file will contain the control sequence \.{\\Y} between non-null 3964sections of a section, e.g., between the \TEX/ and definition parts if both 3965are nonempty. This puts a little white space between the parts when they are 3966printed. However, we don't want \.{\\Y} to occur between two definitions 3967within a single section. The variables |out_line| or |out_ptr| will 3968change if a section is non-null, so the following macros `|save_position|' 3969and `|emit_space_if_needed|' are able to handle the situation: 3970 3971@d save_position save_line=out_line; save_place=out_ptr 3972@d emit_space_if_needed if (save_line!=out_line || save_place!=out_ptr) 3973 out_str("\\Y"); 3974 space_checked=1 3975@.\\Y@> 3976 3977@<Global...@>= 3978int save_line; /* former value of |out_line| */ 3979char *save_place; /* former value of |out_ptr| */ 3980int sec_depth; /* the integer, if any, following \.{@@*} */ 3981boolean space_checked; /* have we done |emit_space_if_needed|? */ 3982boolean format_visible; /* should the next format declaration be output? */ 3983boolean doing_format=0; /* are we outputting a format declaration? */ 3984boolean group_found=0; /* has a starred section occurred? */ 3985 3986@ @<Translate the current section@>= { 3987 section_count++; 3988 @<Output the code for the beginning of a new section@>; 3989 save_position; 3990 @<Translate the \TEX/ part of the current section@>; 3991 @<Translate the definition part of the current section@>; 3992 @<Translate the \CEE/ part of the current section@>; 3993 @<Show cross-references to this section@>; 3994 @<Output the code for the end of a section@>; 3995} 3996 3997@ Sections beginning with the \.{CWEB} control sequence `\.{@@\ }' start in the 3998output with the \TEX/ control sequence `\.{\\M}', followed by the section 3999number. Similarly, `\.{@@*}' sections lead to the control sequence `\.{\\N}'. 4000In this case there's an additional parameter, representing one plus the 4001specified depth, immediately after the \.{\\N}. 4002If the section has changed, we put \.{\\*} just after the section number. 4003 4004@<Output the code for the beginning...@>= 4005if (*(loc-1)!='*') out_str("\\M"); 4006@.\\M@> 4007else { 4008 while (*loc == ' ') loc++; 4009 if (*loc=='*') { /* ``top'' level */ 4010 sec_depth = -1; 4011 loc++; 4012 } 4013 else { 4014 for (sec_depth=0; xisdigit(*loc);loc++) 4015 sec_depth = sec_depth*10 + (*loc) -'0'; 4016 } 4017 while (*loc == ' ') loc++; /* remove spaces before group title */ 4018 group_found=1; 4019 out_str("\\N"); 4020@.\\N@> 4021 {@+ char s[32];@+sprintf(s,"{%d}",sec_depth+1);@+out_str(s);@+} 4022 if (show_progress) 4023 printf("*%d",section_count); update_terminal; /* print a progress report */ 4024} 4025out_str("{");out_section(section_count); out_str("}"); 4026 4027@ In the \TEX/ part of a section, we simply copy the source text, except that 4028index entries are not copied and \CEE/ text within \pb\ is translated. 4029 4030@<Translate the \T...@>= do { 4031 next_control=copy_TeX(); 4032 switch (next_control) { 4033 case '|': init_stack; output_C(); break; 4034 case '@@': out('@@'); break; 4035 case TeX_string: case noop: 4036 case xref_roman: case xref_wildcard: case xref_typewriter: 4037 case section_name: loc-=2; next_control=get_next(); /* skip to \.{@@>} */ 4038 if (next_control==TeX_string) 4039 err_print("! TeX string should be in C text only"); break; 4040@.TeX string should be...@> 4041 case thin_space: case math_break: case ord: 4042 case line_break: case big_line_break: case no_line_break: case join: 4043 case pseudo_semi: case macro_arg_open: case macro_arg_close: 4044 case output_defs_code: 4045 err_print("! You can't do that in TeX text"); break; 4046@.You can't do that...@> 4047 } 4048} while (next_control<format_code); 4049 4050@ When we get to the following code we have |next_control>=format_code|, and 4051the token memory is in its initial empty state. 4052 4053@<Translate the d...@>= 4054space_checked=0; 4055while (next_control<=definition) { /* |format_code| or |definition| */ 4056 init_stack; 4057 if (next_control==definition) @<Start a macro definition@>@; 4058 else @<Start a format definition@>; 4059 outer_parse(); finish_C(format_visible); format_visible=1; 4060 doing_format=0; 4061} 4062 4063@ The |finish_C| procedure outputs the translation of the current 4064scraps, preceded by the control sequence `\.{\\B}' and followed by the 4065control sequence `\.{\\par}'. It also restores the token and scrap 4066memories to their initial empty state. 4067 4068A |force| token is appended to the current scraps before translation 4069takes place, so that the translation will normally end with \.{\\6} or 4070\.{\\7} (the \TEX/ macros for |force| and |big_force|). This \.{\\6} or 4071\.{\\7} is replaced by the concluding \.{\\par} or by \.{\\Y\\par}. 4072 4073@<Predecl...@>= 4074void finish_C(); 4075 4076@ @c 4077void 4078finish_C(visible) /* finishes a definition or a \CEE/ part */ 4079 boolean visible; /* nonzero if we should produce \TEX/ output */ 4080{ 4081 text_pointer p; /* translation of the scraps */ 4082 if (visible) { 4083 out_str("\\B"); app_tok(force); app_scrap(insert,no_math); 4084 p=translate(); 4085@.\\B@> 4086 app(tok_flag+(int)(p-tok_start)); make_output(); /* output the list */ 4087 if (out_ptr>out_buf+1) 4088 if (*(out_ptr-1)=='\\') 4089@.\\6@> 4090@.\\7@> 4091@.\\Y@> 4092 if (*out_ptr=='6') out_ptr-=2; 4093 else if (*out_ptr=='7') *out_ptr='Y'; 4094 out_str("\\par"); finish_line(); 4095 } 4096 if (text_ptr>max_text_ptr) max_text_ptr=text_ptr; 4097 if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr; 4098 if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr; 4099 tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrap_ptr=scrap_info; 4100 /* forget the tokens and the scraps */ 4101} 4102 4103@ Keeping in line with the conventions of the \CEE/ preprocessor (and 4104otherwise contrary to the rules of \.{CWEB}) we distinguish here 4105between the case that `\.(' immediately follows an identifier and the 4106case that the two are separated by a space. In the latter case, and 4107if the identifier is not followed by `\.(' at all, the replacement 4108text starts immediately after the identifier. In the former case, 4109it starts after we scan the matching `\.)'. 4110 4111@<Start a macro...@>= { 4112 if (save_line!=out_line || save_place!=out_ptr || space_checked) app(backup); 4113 if(!space_checked){emit_space_if_needed;save_position;} 4114 app_str("\\D"); /* this will produce `\&{define }' */ 4115@.\\D@> 4116 if ((next_control=get_next())!=identifier) 4117 err_print("! Improper macro definition"); 4118@.Improper macro definition@> 4119 else { 4120 app('$'); app_cur_id(0); 4121 if (*loc=='(') 4122 reswitch: switch (next_control=get_next()) { 4123 case '(': case ',': app(next_control); goto reswitch; 4124 case identifier: app_cur_id(0); goto reswitch; 4125 case ')': app(next_control); next_control=get_next(); break; 4126 default: err_print("! Improper macro definition"); break; 4127 } 4128 else next_control=get_next(); 4129 app_str("$ "); app(break_space); 4130 app_scrap(dead,no_math); /* scrap won't take part in the parsing */ 4131 } 4132} 4133 4134@ @<Start a format...@>= { 4135 doing_format=1; 4136 if(*(loc-1)=='s' || *(loc-1)=='S') format_visible=0; 4137 if(!space_checked){emit_space_if_needed;save_position;} 4138 app_str("\\F"); /* this will produce `\&{format }' */ 4139@.\\F@> 4140 next_control=get_next(); 4141 if (next_control==identifier) { 4142 app(id_flag+(int)(id_lookup(id_first, id_loc,normal)-name_dir)); 4143 app(' '); 4144 app(break_space); /* this is syntactically separate from what follows */ 4145 next_control=get_next(); 4146 if (next_control==identifier) { 4147 app(id_flag+(int)(id_lookup(id_first, id_loc,normal)-name_dir)); 4148 app_scrap(exp,maybe_math); app_scrap(semi,maybe_math); 4149 next_control=get_next(); 4150 } 4151 } 4152 if (scrap_ptr!=scrap_info+2) err_print("! Improper format definition"); 4153@.Improper format definition@> 4154} 4155 4156@ Finally, when the \TEX/ and definition parts have been treated, we have 4157|next_control>=begin_C|. We will make the global variable |this_section| 4158point to the current section name, if it has a name. 4159 4160@<Global...@>= 4161name_pointer this_section; /* the current section name, or zero */ 4162 4163@ @<Translate the \CEE/...@>= 4164this_section=name_dir; 4165if (next_control<=section_name) { 4166 emit_space_if_needed; init_stack; 4167 if (next_control==begin_C) next_control=get_next(); 4168 else { 4169 this_section=cur_section; 4170 @<Check that '=' or '==' follows this section name, and 4171 emit the scraps to start the section definition@>; 4172 } 4173 while (next_control<=section_name) { 4174 outer_parse(); 4175 @<Emit the scrap for a section name if present@>; 4176 } 4177 finish_C(1); 4178} 4179 4180@ The title of the section and an $\E$ or $\mathrel+\E$ are made 4181into a scrap that should not take part in the parsing. 4182 4183@<Check that '='...@>= 4184do next_control=get_next(); 4185 while (next_control=='+'); /* allow optional `\.{+=}' */ 4186if (next_control!='=' && next_control!=eq_eq) 4187 err_print("! You need an = sign after the section name"); 4188@.You need an = sign...@> 4189 else next_control=get_next(); 4190if (out_ptr>out_buf+1 && *out_ptr=='Y' && *(out_ptr-1)=='\\') app(backup); 4191 /* the section name will be flush left */ 4192@.\\Y@> 4193app(section_flag+(int)(this_section-name_dir)); 4194cur_xref=(xref_pointer)this_section->xref; 4195if(cur_xref->num==file_flag) cur_xref=cur_xref->xlink; 4196app_str("${}"); 4197if (cur_xref->num!=section_count+def_flag) { 4198 app_str("\\mathrel+"); /*section name is multiply defined*/ 4199 this_section=name_dir; /*so we won't give cross-reference info here*/ 4200} 4201app_str("\\E"); /* output an equivalence sign */ 4202@.\\E@> 4203app_str("{}$"); 4204app(force); app_scrap(dead,no_math); 4205 /* this forces a line break unless `\.{@@+}' follows */ 4206 4207@ @<Emit the scrap...@>= 4208if (next_control<section_name) { 4209 err_print("! You can't do that in C text"); 4210@.You can't do that...@> 4211 next_control=get_next(); 4212} 4213else if (next_control==section_name) { 4214 app(section_flag+(int)(cur_section-name_dir)); 4215 app_scrap(section_scrap,maybe_math); 4216 next_control=get_next(); 4217} 4218 4219@ Cross references relating to a named section are given 4220after the section ends. 4221 4222@<Show cross...@>= 4223if (this_section>name_dir) { 4224 cur_xref=(xref_pointer)this_section->xref; 4225 if (cur_xref->num==file_flag){an_output=1;cur_xref=cur_xref->xlink;} 4226 else an_output=0; 4227 if (cur_xref->num>def_flag) 4228 cur_xref=cur_xref->xlink; /* bypass current section number */ 4229 footnote(def_flag); footnote(cite_flag); footnote(0); 4230} 4231 4232@ The |footnote| procedure gives cross-reference information about 4233multiply defined section names (if the |flag| parameter is 4234|def_flag|), or about references to a section name 4235(if |flag==cite_flag|), or to its uses (if |flag==0|). It assumes that 4236|cur_xref| points to the first cross-reference entry of interest, and it 4237leaves |cur_xref| pointing to the first element not printed. Typical outputs: 4238`\.{\\A101.}'; `\.{\\Us 370\\ET1009.}'; 4239`\.{\\As 8, 27\\*\\ETs64.}'. 4240 4241Note that the output of \.{CWEAVE} is not English-specific; users may 4242supply new definitions for the macros \.{\\A}, \.{\\As}, etc. 4243 4244@<Predecl...@>= 4245void footnote(); 4246 4247@ @c 4248void 4249footnote(flag) /* outputs section cross-references */ 4250sixteen_bits flag; 4251{ 4252 xref_pointer q; /* cross-reference pointer variable */ 4253 if (cur_xref->num<=flag) return; 4254 finish_line(); out('\\'); 4255@.\\A@> 4256@.\\Q@> 4257@.\\U@> 4258 out(flag==0? 'U': flag==cite_flag? 'Q': 'A'); 4259 @<Output all the section numbers on the reference list |cur_xref|@>; 4260 out('.'); 4261} 4262 4263@ The following code distinguishes three cases, according as the number 4264of cross-references is one, two, or more than two. Variable |q| points 4265to the first cross-reference, and the last link is a zero. 4266 4267@<Output all the section numbers...@>= 4268q=cur_xref; if (q->xlink->num>flag) out('s'); /* plural */ 4269while (1) { 4270 out_section(cur_xref->num-flag); 4271 cur_xref=cur_xref->xlink; /* point to the next cross-reference to output */ 4272 if (cur_xref->num<=flag) break; 4273 if (cur_xref->xlink->num>flag) out_str(", "); /* not the last */ 4274 else {out_str("\\ET"); /* the last */ 4275@.\\ET@> 4276 if (cur_xref != q->xlink) out('s'); /* the last of more than two */ 4277 } 4278} 4279 4280@ @<Output the code for the end of a section@>= 4281out_str("\\fi"); finish_line(); 4282@.\\fi@> 4283flush_buffer(out_buf,0,0); /* insert a blank line, it looks nice */ 4284 4285@** Phase three processing. 4286We are nearly finished! \.{CWEAVE}'s only remaining task is to write out the 4287index, after sorting the identifiers and index entries. 4288 4289If the user has set the |no_xref| flag (the \.{-x} option on the command line), 4290just finish off the page, omitting the index, section name list, and table of 4291contents. 4292 4293@<Predecl...@>= 4294void phase_three(); 4295 4296@ @c 4297void 4298phase_three() { 4299if (no_xref) { 4300 finish_line(); 4301 out_str("\\end"); 4302@.\\end@> 4303 finish_line(); 4304} 4305else { 4306 phase=3; if (show_progress) printf("\nWriting the index..."); 4307@.Writing the index...@> 4308 finish_line(); 4309 if ((idx_file=fopen(idx_file_name,"w"))==NULL) 4310 fatal("! Cannot open index file ",idx_file_name); 4311@.Cannot open index file@> 4312 if (change_exists) { 4313 @<Tell about changed sections@>; finish_line(); finish_line(); 4314 } 4315 out_str("\\inx"); finish_line(); 4316@.\\inx@> 4317 active_file=idx_file; /* change active file to the index file */ 4318 @<Do the first pass of sorting@>; 4319 @<Sort and output the index@>; 4320 finish_line(); fclose(active_file); /* finished with |idx_file| */ 4321 active_file=tex_file; /* switch back to |tex_file| for a tic */ 4322 out_str("\\fin"); finish_line(); 4323@.\\fin@> 4324 if ((scn_file=fopen(scn_file_name,"w"))==NULL) 4325 fatal("! Cannot open section file ",scn_file_name); 4326@.Cannot open section file@> 4327 active_file=scn_file; /* change active file to section listing file */ 4328 @<Output all the section names@>; 4329 finish_line(); fclose(active_file); /* finished with |scn_file| */ 4330 active_file=tex_file; 4331 if (group_found) out_str("\\con");@+else out_str("\\end"); 4332@.\\con@> 4333@.\\end@> 4334 finish_line(); 4335 fclose(active_file); 4336} 4337if (show_happiness) printf("\nDone."); 4338check_complete(); /* was all of the change file used? */ 4339} 4340 4341@ Just before the index comes a list of all the changed sections, including 4342the index section itself. 4343 4344@<Global...@>= 4345sixteen_bits k_section; /* runs through the sections */ 4346 4347@ @<Tell about changed sections@>= { 4348 /* remember that the index is already marked as changed */ 4349 k_section=0; 4350 while (!changed_section[++k_section]); 4351 out_str("\\ch "); 4352@.\\ch@> 4353 out_section(k_section); 4354 while (k_section<section_count) { 4355 while (!changed_section[++k_section]); 4356 out_str(", "); out_section(k_section); 4357 } 4358 out('.'); 4359} 4360 4361@ A left-to-right radix sorting method is used, since this makes it easy to 4362adjust the collating sequence and since the running time will be at worst 4363proportional to the total length of all entries in the index. We put the 4364identifiers into 102 different lists based on their first characters. 4365(Uppercase letters are put into the same list as the corresponding lowercase 4366letters, since we want to have `$t<\\{TeX}<\&{to}$'.) The 4367list for character |c| begins at location |bucket[c]| and continues through 4368the |blink| array. 4369 4370@<Global...@>= 4371name_pointer bucket[256]; 4372name_pointer next_name; /* successor of |cur_name| when sorting */ 4373name_pointer blink[max_names]; /* links in the buckets */ 4374 4375@ To begin the sorting, we go through all the hash lists and put each entry 4376having a nonempty cross-reference list into the proper bucket. 4377 4378@<Do the first pass...@>= { 4379int c; 4380for (c=0; c<=255; c++) bucket[c]=NULL; 4381for (h=hash; h<=hash_end; h++) { 4382 next_name=*h; 4383 while (next_name) { 4384 cur_name=next_name; next_name=cur_name->link; 4385 if (cur_name->xref!=(char*)xmem) { 4386 c=(eight_bits)((cur_name->byte_start)[0]); 4387 if (xisupper(c)) c=tolower(c); 4388 blink[cur_name-name_dir]=bucket[c]; bucket[c]=cur_name; 4389 } 4390 } 4391} 4392} 4393 4394@ During the sorting phase we shall use the |cat| and |trans| arrays from 4395\.{CWEAVE}'s parsing algorithm and rename them |depth| and |head|. They now 4396represent a stack of identifier lists for all the index entries that have 4397not yet been output. The variable |sort_ptr| tells how many such lists are 4398present; the lists are output in reverse order (first |sort_ptr|, then 4399|sort_ptr-1|, etc.). The |j|th list starts at |head[j]|, and if the first 4400|k| characters of all entries on this list are known to be equal we have 4401|depth[j]==k|. 4402 4403@ @<Rest of |trans_plus| union@>= 4404name_pointer Head; 4405 4406@ @d depth cat /* reclaims memory that is no longer needed for parsing */ 4407@d head trans_plus.Head /* ditto */ 4408@f sort_pointer int 4409@d sort_pointer scrap_pointer /* ditto */ 4410@d sort_ptr scrap_ptr /* ditto */ 4411@d max_sorts max_scraps /* ditto */ 4412 4413@<Global...@>= 4414eight_bits cur_depth; /* depth of current buckets */ 4415char *cur_byte; /* index into |byte_mem| */ 4416sixteen_bits cur_val; /* current cross-reference number */ 4417sort_pointer max_sort_ptr; /* largest value of |sort_ptr| */ 4418 4419@ @<Set init...@>= 4420max_sort_ptr=scrap_info; 4421 4422@ The desired alphabetic order is specified by the |collate| array; namely, 4423$|collate|[0]<|collate|[1]<\cdots<|collate|[100]$. 4424 4425@<Global...@>= 4426eight_bits collate[102+128]; /* collation order */ 4427@^high-bit character handling@> 4428 4429@ We use the order $\hbox{null}<\.\ <\hbox{other characters}<{}$\.\_${}< 4430\.A=\.a<\cdots<\.Z=\.z<\.0<\cdots<\.9.$ Warning: The collation mapping 4431needs to be changed if ASCII code is not being used. 4432@^ASCII code dependencies@> 4433@^high-bit character handling@> 4434 4435We initialize |collate| by copying a few characters at a time, because 4436some \CEE/ compilers choke on long strings. 4437 4438@<Set init...@>= 4439collate[0]=0; 4440strcpy(collate+1," \1\2\3\4\5\6\7\10\11\12\13\14\15\16\17"); 4441/* 16 characters + 1 = 17 */ 4442strcpy(collate+17,"\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37"); 4443/* 16 characters + 17 = 33 */ 4444strcpy(collate+33,"!\42#$%&'()*+,-./:;<=>?@@[\\]^`{|}~_"); 4445/* 32 characters + 33 = 65 */ 4446strcpy(collate+65,"abcdefghijklmnopqrstuvwxyz0123456789"); 4447/* (26 + 10) characters + 65 = 101 */ 4448strcpy(collate+101,"\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217"); 4449/* 16 characters + 101 = 117 */ 4450strcpy(collate+117,"\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237"); 4451/* 16 characters + 117 = 133 */ 4452strcpy(collate+133,"\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257"); 4453/* 16 characters + 133 = 149 */ 4454strcpy(collate+149,"\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277"); 4455/* 16 characters + 149 = 165 */ 4456strcpy(collate+165,"\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317"); 4457/* 16 characters + 165 = 181 */ 4458strcpy(collate+181,"\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337"); 4459/* 16 characters + 181 = 197 */ 4460strcpy(collate+197,"\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357"); 4461/* 16 characters + 197 = 213 */ 4462strcpy(collate+213,"\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"); 4463/* 16 characters + 213 = 229 */ 4464 4465@ Procedure |unbucket| goes through the buckets and adds nonempty lists 4466to the stack, using the collating sequence specified in the |collate| array. 4467The parameter to |unbucket| tells the current depth in the buckets. 4468Any two sequences that agree in their first 255 character positions are 4469regarded as identical. 4470 4471@d infinity 255 /* $\infty$ (approximately) */ 4472 4473@<Predecl...@>= 4474void unbucket(); 4475 4476@ @c 4477void 4478unbucket(d) /* empties buckets having depth |d| */ 4479eight_bits d; 4480{ 4481 int c; /* index into |bucket|; cannot be a simple |char| because of sign 4482 comparison below*/ 4483 for (c=100+128; c>= 0; c--) if (bucket[collate[c]]) { 4484@^high-bit character handling@> 4485 if (sort_ptr>=scrap_info_end) overflow("sorting"); 4486 sort_ptr++; 4487 if (sort_ptr>max_sort_ptr) max_sort_ptr=sort_ptr; 4488 if (c==0) sort_ptr->depth=infinity; 4489 else sort_ptr->depth=d; 4490 sort_ptr->head=bucket[collate[c]]; bucket[collate[c]]=NULL; 4491 } 4492} 4493 4494@ @<Sort and output...@>= 4495sort_ptr=scrap_info; unbucket(1); 4496while (sort_ptr>scrap_info) { 4497 cur_depth=sort_ptr->depth; 4498 if (blink[sort_ptr->head-name_dir]==0 || cur_depth==infinity) 4499 @<Output index entries for the list at |sort_ptr|@>@; 4500 else @<Split the list at |sort_ptr| into further lists@>; 4501} 4502 4503@ @<Split the list...@>= { 4504 eight_bits c; 4505 next_name=sort_ptr->head; 4506 do { 4507 cur_name=next_name; next_name=blink[cur_name-name_dir]; 4508 cur_byte=cur_name->byte_start+cur_depth; 4509 if (cur_byte==(cur_name+1)->byte_start) c=0; /* hit end of the name */ 4510 else { 4511 c=(eight_bits) *cur_byte; 4512 if (xisupper(c)) c=tolower(c); 4513 } 4514 blink[cur_name-name_dir]=bucket[c]; bucket[c]=cur_name; 4515 } while (next_name); 4516 --sort_ptr; unbucket(cur_depth+1); 4517} 4518 4519@ @<Output index...@>= { 4520 cur_name=sort_ptr->head; 4521 do { 4522 out_str("\\I"); 4523@.\\I@> 4524 @<Output the name at |cur_name|@>; 4525 @<Output the cross-references at |cur_name|@>; 4526 cur_name=blink[cur_name-name_dir]; 4527 } while (cur_name); 4528 --sort_ptr; 4529} 4530 4531@ @<Output the name...@>= 4532switch (cur_name->ilk) { 4533 case normal: case func_template: if (is_tiny(cur_name)) out_str("\\|"); 4534 else {char *j; 4535 for (j=cur_name->byte_start;j<(cur_name+1)->byte_start;j++) 4536 if (xislower(*j)) goto lowcase; 4537 out_str("\\."); break; 4538lowcase: out_str("\\\\"); 4539 } 4540 break; 4541@.\\|@> 4542@.\\.@> 4543@.\\\\@> 4544 case wildcard: out_str("\\9");@+ goto not_an_identifier; 4545@.\\9@> 4546 case typewriter: out_str("\\."); 4547@.\\.@> 4548 case roman: not_an_identifier: out_name(cur_name,0); goto name_done; 4549 case custom: {char *j; out_str("$\\"); 4550 for (j=cur_name->byte_start;j<(cur_name+1)->byte_start;j++) 4551 out(*j=='_'? 'x': *j=='$'? 'X': *j); 4552 out('$'); 4553 goto name_done; 4554 } 4555 default: out_str("\\&"); 4556@.\\\&@> 4557} 4558out_name(cur_name,1); 4559name_done:@; 4560 4561@ Section numbers that are to be underlined are enclosed in 4562`\.{\\[}$\,\ldots\,$\.]'. 4563 4564@<Output the cross-references...@>= 4565@<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>; 4566do { 4567 out_str(", "); cur_val=cur_xref->num; 4568 if (cur_val<def_flag) out_section(cur_val); 4569 else {out_str("\\["); out_section(cur_val-def_flag); out(']');} 4570@.\\[@> 4571 cur_xref=cur_xref->xlink; 4572} while (cur_xref!=xmem); 4573out('.'); finish_line(); 4574 4575@ List inversion is best thought of as popping elements off one stack and 4576pushing them onto another. In this case |cur_xref| will be the head of 4577the stack that we push things onto. 4578@<Global...@>= 4579xref_pointer next_xref, this_xref; 4580 /* pointer variables for rearranging a list */ 4581 4582@ @<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>= 4583this_xref=(xref_pointer)cur_name->xref; cur_xref=xmem; 4584do { 4585 next_xref=this_xref->xlink; this_xref->xlink=cur_xref; 4586 cur_xref=this_xref; this_xref=next_xref; 4587} while (this_xref!=xmem); 4588 4589@ The following recursive procedure walks through the tree of section names and 4590prints them. 4591@^recursion@> 4592 4593@<Predecl...@>= 4594void section_print(); 4595 4596@ @c 4597void 4598section_print(p) /* print all section names in subtree |p| */ 4599name_pointer p; 4600{ 4601 if (p) { 4602 section_print(p->llink); out_str("\\I"); 4603@.\\I@> 4604 tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrap_ptr=scrap_info; init_stack; 4605 app(p-name_dir+section_flag); make_output(); 4606 footnote(cite_flag); 4607 footnote(0); /* |cur_xref| was set by |make_output| */ 4608 finish_line();@/ 4609 section_print(p->rlink); 4610 } 4611} 4612 4613@ @<Output all the section names@>=section_print(root) 4614 4615@ Because on some systems the difference between two pointers is a |long| 4616rather than an |int|, we use \.{\%ld} to print these quantities. 4617 4618@c 4619void 4620print_stats() { 4621 printf("\nMemory usage statistics:\n"); 4622@.Memory usage statistics:@> 4623 printf("%ld names (out of %ld)\n", 4624 (long)(name_ptr-name_dir),(long)max_names); 4625 printf("%ld cross-references (out of %ld)\n", 4626 (long)(xref_ptr-xmem),(long)max_refs); 4627 printf("%ld bytes (out of %ld)\n", 4628 (long)(byte_ptr-byte_mem),(long)max_bytes); 4629 printf("Parsing:\n"); 4630 printf("%ld scraps (out of %ld)\n", 4631 (long)(max_scr_ptr-scrap_info),(long)max_scraps); 4632 printf("%ld texts (out of %ld)\n", 4633 (long)(max_text_ptr-tok_start),(long)max_texts); 4634 printf("%ld tokens (out of %ld)\n", 4635 (long)(max_tok_ptr-tok_mem),(long)max_toks); 4636 printf("%ld levels (out of %ld)\n", 4637 (long)(max_stack_ptr-stack),(long)stack_size); 4638 printf("Sorting:\n"); 4639 printf("%ld levels (out of %ld)\n", 4640 (long)(max_sort_ptr-scrap_info),(long)max_scraps); 4641} 4642 4643@** Index. 4644If you have read and understood the code for Phase III above, you know what 4645is in this index and how it got here. All sections in which an identifier is 4646used are listed with that identifier, except that reserved words are 4647indexed only when they appear in format definitions, and the appearances 4648of identifiers in section names are not indexed. Underlined entries 4649correspond to where the identifier was declared. Error messages, control 4650sequences put into the output, and a few 4651other things like ``recursion'' are indexed here too. 4652