1% This file is part of CWEB.
2% This program by Silvio Levy and Donald E. Knuth
3% is based on a program by Knuth.
4% It is distributed WITHOUT ANY WARRANTY, express or implied.
5% Version 3.64 --- February 2002
6% (essentially the same as version 3.6, which added
7%  recently introduced features of standard C++ to version 3.4)
8
9% Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth
10
11% Permission is granted to make and distribute verbatim copies of this
12% document provided that the copyright notice and this permission notice
13% are preserved on all copies.
14
15% Permission is granted to copy and distribute modified versions of this
16% document under the conditions for verbatim copying, provided that the
17% entire resulting derived work is given a different name and distributed
18% under the terms of a permission notice identical to this one.
19
20% Here is TeX material that gets inserted after \input cwebmac
21\def\hang{\hangindent 3em\indent\ignorespaces}
22\def\pb{$\.|\ldots\.|$} % C brackets (|...|)
23\def\v{\char'174} % vertical (|) in typewriter font
24\def\dleft{[\![} \def\dright{]\!]} % double brackets
25\mathchardef\RA="3221 % right arrow
26\mathchardef\BA="3224 % double arrow
27\def\({} % ) kludge for alphabetizing certain section names
28\def\TeXxstring{\\{\TEX/\_string}}
29\def\skipxTeX{\\{skip\_\TEX/}}
30\def\copyxTeX{\\{copy\_\TEX/}}
31
32\def\title{CWEAVE (Version 3.64)}
33\def\topofcontents{\null\vfill
34  \centerline{\titlefont The {\ttitlefont CWEAVE} processor}
35  \vskip 15pt
36  \centerline{(Version 3.64)}
37  \vfill}
38\def\botofcontents{\vfill
39\noindent
40Copyright \copyright\ 1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth
41\bigskip\noindent
42Permission is granted to make and distribute verbatim copies of this
43document provided that the copyright notice and this permission notice
44are preserved on all copies.
45
46\smallskip\noindent
47Permission is granted to copy and distribute modified versions of this
48document under the conditions for verbatim copying, provided that the
49entire resulting derived work is given a different name and distributed
50under the terms of a permission notice identical to this one.
51}
52\pageno=\contentspagenumber \advance\pageno by 1
53\let\maybe=\iftrue
54@s not_eq normal @q unreserve a C++ keyword @>
55
56@** Introduction.
57This is the \.{CWEAVE} program by Silvio Levy and Donald E. Knuth,
58based on \.{WEAVE} by Knuth.
59We are thankful to Steve Avery,
60Nelson Beebe, Hans-Hermann Bode (to whom the original \CPLUSPLUS/ adaptation
61is due), Klaus Guntermann, Norman Ramsey, Tomas Rokicki, Joachim Schnitter,
62Joachim Schrod, Lee Wittenberg, Saroj Mahapatra, Cesar Augusto Rorato
63Crusius, and others who have contributed improvements.
64
65The ``banner line'' defined here should be changed whenever \.{CWEAVE}
66is modified.
67
68@d banner "This is CWEAVE (Version 3.64)\n"
69
70@c @<Include files@>@/
71@h
72@<Common code for \.{CWEAVE} and \.{CTANGLE}@>@/
73@<Typedef declarations@>@/
74@<Global variables@>@/
75@<Predeclaration of procedures@>
76
77@ We predeclare several standard system functions here instead of including
78their system header files, because the names of the header files are not as
79standard as the names of the functions. (For example, some \CEE/ environments
80have \.{<string.h>} where others have \.{<strings.h>}.)
81
82@<Predecl...@>=
83extern int strlen(); /* length of string */
84extern int strcmp(); /* compare strings lexicographically */
85extern char* strcpy(); /* copy one string to another */
86extern int strncmp(); /* compare up to $n$ string characters */
87extern char* strncpy(); /* copy up to $n$ string characters */
88
89@ \.{CWEAVE} has a fairly straightforward outline.  It operates in
90three phases: First it inputs the source file and stores cross-reference
91data, then it inputs the source once again and produces the \TEX/ output
92file, finally it sorts and outputs the index.
93
94Please read the documentation for \.{common}, the set of routines common
95to \.{CTANGLE} and \.{CWEAVE}, before proceeding further.
96
97@c
98int main (ac, av)
99int ac; /* argument count */
100char **av; /* argument values */
101{
102  argc=ac; argv=av;
103  program=cweave;
104  make_xrefs=force_lines=make_pb=1; /* controlled by command-line options */
105  common_init();
106  @<Set initial values@>;
107  if (show_banner) printf(banner); /* print a ``banner line'' */
108  @<Store all the reserved words@>;
109  phase_one(); /* read all the user's text and store the cross-references */
110  phase_two(); /* read all the text again and translate it to \TEX/ form */
111  phase_three(); /* output the cross-reference index */
112  return wrap_up(); /* and exit gracefully */
113}
114
115@ The following parameters were sufficient in the original \.{WEAVE} to
116handle \TEX/, so they should be sufficient for most applications of \.{CWEAVE}.
117If you change |max_bytes|, |max_names|, |hash_size|, or |buf_size|
118you have to change them also in the file |"common.w"|.
119
120@d max_bytes 90000 /* the number of bytes in identifiers,
121  index entries, and section names */
122@d max_names 4000 /* number of identifiers, strings, section names;
123  must be less than 10240; used in |"common.w"| */
124@d max_sections 2000 /* greater than the total number of sections */
125@d hash_size 353 /* should be prime */
126@d buf_size 100 /* maximum length of input line, plus one */
127@d longest_name 10000 /* section names and strings shouldn't be longer than this */
128@d long_buf_size (buf_size+longest_name)
129@d line_length 80 /* lines of \TEX/ output have at most this many characters;
130  should be less than 256 */
131@d max_refs 20000 /* number of cross-references; must be less than 65536 */
132@d max_toks 20000 /* number of symbols in \CEE/ texts being parsed;
133  must be less than 65536 */
134@d max_texts 4000 /* number of phrases in \CEE/ texts being parsed;
135  must be less than 10240 */
136@d max_scraps 2000 /* number of tokens in \CEE/ texts being parsed */
137@d stack_size 400 /* number of simultaneous output levels */
138
139@ The next few sections contain stuff from the file |"common.w"| that must
140be included in both |"ctangle.w"| and |"cweave.w"|. It appears in
141file |"common.h"|, which needs to be updated when |"common.w"| changes.
142
143@i common.h
144
145@* Data structures exclusive to {\tt CWEAVE}.
146As explained in \.{common.w}, the field of a |name_info| structure
147that contains the |rlink| of a section name is used for a completely
148different purpose in the case of identifiers.  It is then called the
149|ilk| of the identifier, and it is used to
150distinguish between various types of identifiers, as follows:
151
152\yskip\hang |normal| and |func_template| identifiers are part of the
153\CEE/ program that will  appear in italic type (or in typewriter type
154if all uppercase).
155
156\yskip\hang |custom| identifiers are part of the \CEE/ program that
157will be typeset in special ways.
158
159\yskip\hang |roman| identifiers are index entries that appear after
160\.{@@\^} in the \.{CWEB} file.
161
162\yskip\hang |wildcard| identifiers are index entries that appear after
163\.{@@:} in the \.{CWEB} file.
164
165\yskip\hang |typewriter| identifiers are index entries that appear after
166\.{@@.} in the \.{CWEB} file.
167
168\yskip\hang |alfop|, \dots, |template_like|
169identifiers are \CEE/ or \CPLUSPLUS/ reserved words whose |ilk|
170explains how they are to be treated when \CEE/ code is being
171formatted.
172
173@d ilk dummy.Ilk
174@d normal 0 /* ordinary identifiers have |normal| ilk */
175@d roman 1 /* normal index entries have |roman| ilk */
176@d wildcard 2 /* user-formatted index entries have |wildcard| ilk */
177@d typewriter 3 /* `typewriter type' entries have |typewriter| ilk */
178@d abnormal(a) (a->ilk>typewriter) /* tells if a name is special */
179@d func_template 4 /* identifiers that can be followed by optional template */
180@d custom 5 /* identifiers with user-given control sequence */
181@d alfop 22 /* alphabetic operators like \&{and} or \&{not\_eq} */
182@d else_like 26 /* \&{else} */
183@d public_like 40 /* \&{public}, \&{private}, \&{protected} */
184@d operator_like 41 /* \&{operator} */
185@d new_like 42 /* \&{new} */
186@d catch_like 43 /* \&{catch} */
187@d for_like 45 /* \&{for}, \&{switch}, \&{while} */
188@d do_like 46 /* \&{do} */
189@d if_like 47 /* \&{if}, \&{ifdef}, \&{endif}, \&{pragma}, \dots */
190@d delete_like 48 /* \&{delete} */
191@d raw_ubin 49 /* `\.\&' or `\.*' when looking for \&{const} following */
192@d const_like 50 /* \&{const}, \&{volatile} */
193@d raw_int 51 /* \&{int}, \&{char}, \dots; also structure and class names  */
194@d int_like 52 /* same, when not followed by left parenthesis or \DC\ */
195@d case_like 53 /* \&{case}, \&{return}, \&{goto}, \&{break}, \&{continue} */
196@d sizeof_like 54 /* \&{sizeof} */
197@d struct_like 55 /* \&{struct}, \&{union}, \&{enum}, \&{class} */
198@d typedef_like 56 /* \&{typedef} */
199@d define_like 57 /* \&{define} */
200@d template_like 58 /* \&{template} */
201
202@ We keep track of the current section number in |section_count|, which
203is the total number of sections that have started.  Sections which have
204been altered by a change file entry have their |changed_section| flag
205turned on during the first phase.
206
207@<Global...@>=
208boolean change_exists; /* has any section changed? */
209
210@ The other large memory area in \.{CWEAVE} keeps the cross-reference data.
211All uses of the name |p| are recorded in a linked list beginning at
212|p->xref|, which points into the |xmem| array. The elements of |xmem|
213are structures consisting of an integer, |num|, and a pointer |xlink|
214to another element of |xmem|.  If |x=p->xref| is a pointer into |xmem|,
215the value of |x->num| is either a section number where |p| is used,
216or |cite_flag| plus a section number where |p| is mentioned,
217or |def_flag| plus a section number where |p| is defined;
218and |x->xlink| points to the next such cross-reference for |p|,
219if any. This list of cross-references is in decreasing order by
220section number. The next unused slot in |xmem| is |xref_ptr|.
221The linked list ends at |&xmem[0]|.
222
223The global variable |xref_switch| is set either to |def_flag| or to zero,
224depending on whether the next cross-reference to an identifier is to be
225underlined or not in the index. This switch is set to |def_flag| when
226\.{@@!} or \.{@@d} is scanned, and it is cleared to zero when
227the next identifier or index entry cross-reference has been made.
228Similarly, the global variable |section_xref_switch| is either
229|def_flag| or |cite_flag| or zero, depending
230on whether a section name is being defined, cited or used in \CEE/ text.
231
232@<Type...@>=
233typedef struct xref_info {
234  sixteen_bits num; /* section number plus zero or |def_flag| */
235  struct xref_info *xlink; /* pointer to the previous cross-reference */
236} xref_info;
237typedef xref_info *xref_pointer;
238
239@ @<Global...@>=
240xref_info xmem[max_refs]; /* contains cross-reference information */
241xref_pointer xmem_end = xmem+max_refs-1;
242xref_pointer xref_ptr; /* the largest occupied position in |xmem| */
243sixteen_bits xref_switch,section_xref_switch; /* either zero or |def_flag| */
244
245@ A section that is used for multi-file output (with the \.{@@(} feature)
246has a special first cross-reference whose |num| field is |file_flag|.
247
248@d file_flag (3*cite_flag)
249@d def_flag (2*cite_flag)
250@d cite_flag 10240 /* must be strictly larger than |max_sections| */
251@d xref equiv_or_xref
252
253@<Set init...@>=
254xref_ptr=xmem; name_dir->xref=(char*)xmem; xref_switch=0; section_xref_switch=0;
255xmem->num=0; /* sentinel value */
256
257@ A new cross-reference for an identifier is formed by calling |new_xref|,
258which discards duplicate entries and ignores non-underlined references
259to one-letter identifiers or \CEE/'s reserved words.
260
261If the user has sent the |no_xref| flag (the \.{-x} option of the command line),
262it is unnecessary to keep track of cross-references for identifiers.
263If one were careful, one could probably make more changes around section
264100 to avoid a lot of identifier looking up.
265
266@d append_xref(c) if (xref_ptr==xmem_end) overflow("cross-reference");
267  else (++xref_ptr)->num=c;
268@d no_xref (flags['x']==0)
269@d make_xrefs flags['x'] /* should cross references be output? */
270@d is_tiny(p) ((p+1)->byte_start==(p)->byte_start+1)
271@d unindexed(a) (a<res_wd_end && a->ilk>=custom)
272      /* tells if uses of a name are to be indexed */
273
274@c
275void
276new_xref(p)
277name_pointer p;
278{
279  xref_pointer q; /* pointer to previous cross-reference */
280  sixteen_bits m, n; /* new and previous cross-reference value */
281  if (no_xref) return;
282  if ((unindexed(p) || is_tiny(p)) && xref_switch==0) return;
283  m=section_count+xref_switch; xref_switch=0; q=(xref_pointer)p->xref;
284  if (q != xmem) {
285    n=q->num;
286    if (n==m || n==m+def_flag) return;
287    else if (m==n+def_flag) {
288        q->num=m; return;
289    }
290  }
291  append_xref(m); xref_ptr->xlink=q; p->xref=(char*)xref_ptr;
292}
293
294@ The cross-reference lists for section names are slightly different.
295Suppose that a section name is defined in sections $m_1$, \dots,
296$m_k$, cited in sections $n_1$, \dots, $n_l$, and used in sections
297$p_1$, \dots, $p_j$.  Then its list will contain $m_1+|def_flag|$,
298\dots, $m_k+|def_flag|$, $n_1+|cite_flag|$, \dots,
299$n_l+|cite_flag|$, $p_1$, \dots, $p_j$, in this order.
300
301Although this method of storage takes quadratic time with respect to
302the length of the list, under foreseeable uses of \.{CWEAVE} this inefficiency
303is insignificant.
304
305@c
306void
307new_section_xref(p)
308name_pointer p;
309{
310  xref_pointer q,r; /* pointers to previous cross-references */
311  q=(xref_pointer)p->xref; r=xmem;
312  if (q>xmem)
313        while (q->num>section_xref_switch) {r=q; q=q->xlink;}
314  if (r->num==section_count+section_xref_switch)
315        return; /* don't duplicate entries */
316  append_xref(section_count+section_xref_switch);
317  xref_ptr->xlink=q; section_xref_switch=0;
318  if (r==xmem) p->xref=(char*)xref_ptr;
319  else r->xlink=xref_ptr;
320}
321
322@ The cross-reference list for a section name may also begin with
323|file_flag|. Here's how that flag gets put~in.
324
325@c
326void
327set_file_flag(p)
328name_pointer p;
329{
330  xref_pointer q;
331  q=(xref_pointer)p->xref;
332  if (q->num==file_flag) return;
333  append_xref(file_flag);
334  xref_ptr->xlink = q;
335  p->xref = (char *)xref_ptr;
336}
337
338@ A third large area of memory is used for sixteen-bit `tokens', which appear
339in short lists similar to the strings of characters in |byte_mem|. Token lists
340are used to contain the result of \CEE/ code translated into \TEX/ form;
341further details about them will be explained later. A |text_pointer| variable
342is an index into |tok_start|.
343
344@<Typed...@>=
345typedef sixteen_bits token;
346typedef token *token_pointer;
347typedef token_pointer *text_pointer;
348
349@ The first position of |tok_mem|
350that is unoccupied by replacement text is called |tok_ptr|, and the first
351unused location of |tok_start| is called |text_ptr|.
352Thus, we usually have |*text_ptr==tok_ptr|.
353
354@<Global...@>=
355token tok_mem[max_toks]; /* tokens */
356token_pointer tok_mem_end = tok_mem+max_toks-1; /* end of |tok_mem| */
357token_pointer tok_start[max_texts]; /* directory into |tok_mem| */
358token_pointer tok_ptr; /* first unused position in |tok_mem| */
359text_pointer text_ptr; /* first unused position in |tok_start| */
360text_pointer tok_start_end = tok_start+max_texts-1; /* end of |tok_start| */
361token_pointer max_tok_ptr; /* largest value of |tok_ptr| */
362text_pointer max_text_ptr; /* largest value of |text_ptr| */
363
364@ @<Set init...@>=
365tok_ptr=tok_mem+1; text_ptr=tok_start+1; tok_start[0]=tok_mem+1;
366tok_start[1]=tok_mem+1;
367max_tok_ptr=tok_mem+1; max_text_ptr=tok_start+1;
368
369@ Here are the three procedures needed to complete |id_lookup|:
370@c
371int names_match(p,first,l,t)
372name_pointer p; /* points to the proposed match */
373char *first; /* position of first character of string */
374int l; /* length of identifier */
375eight_bits t; /* desired ilk */
376{
377  if (length(p)!=l) return 0;
378  if (p->ilk!=t && !(t==normal && abnormal(p))) return 0;
379  return !strncmp(first,p->byte_start,l);
380}
381
382void
383init_p(p,t)
384name_pointer p;
385eight_bits t;
386{
387  p->ilk=t; p->xref=(char*)xmem;
388}
389
390void
391init_node(p)
392name_pointer p;
393{
394  p->xref=(char*)xmem;
395}
396
397@ We have to get \CEE/'s
398reserved words into the hash table, and the simplest way to do this is
399to insert them every time \.{CWEAVE} is run.  Fortunately there are relatively
400few reserved words. (Some of these are not strictly ``reserved,'' but
401are defined in header files of the ISO Standard \CEE/ Library.)
402@^reserved words@>
403
404@<Store all the reserved words@>=
405id_lookup("and",NULL,alfop);
406id_lookup("and_eq",NULL,alfop);
407id_lookup("asm",NULL,sizeof_like);
408id_lookup("auto",NULL,int_like);
409id_lookup("bitand",NULL,alfop);
410id_lookup("bitor",NULL,alfop);
411id_lookup("bool",NULL,raw_int);
412id_lookup("break",NULL,case_like);
413id_lookup("case",NULL,case_like);
414id_lookup("catch",NULL,catch_like);
415id_lookup("char",NULL,raw_int);
416id_lookup("class",NULL,struct_like);
417id_lookup("clock_t",NULL,raw_int);
418id_lookup("compl",NULL,alfop);
419id_lookup("const",NULL,const_like);
420id_lookup("const_cast",NULL,raw_int);
421id_lookup("continue",NULL,case_like);
422id_lookup("default",NULL,case_like);
423id_lookup("define",NULL,define_like);
424id_lookup("defined",NULL,sizeof_like);
425id_lookup("delete",NULL,delete_like);
426id_lookup("div_t",NULL,raw_int);
427id_lookup("do",NULL,do_like);
428id_lookup("double",NULL,raw_int);
429id_lookup("dynamic_cast",NULL,raw_int);
430id_lookup("elif",NULL,if_like);
431id_lookup("else",NULL,else_like);
432id_lookup("endif",NULL,if_like);
433id_lookup("enum",NULL,struct_like);
434id_lookup("error",NULL,if_like);
435id_lookup("explicit",NULL,int_like);
436id_lookup("export",NULL,int_like);
437id_lookup("extern",NULL,int_like);
438id_lookup("FILE",NULL,raw_int);
439id_lookup("float",NULL,raw_int);
440id_lookup("for",NULL,for_like);
441id_lookup("fpos_t",NULL,raw_int);
442id_lookup("friend",NULL,int_like);
443id_lookup("goto",NULL,case_like);
444id_lookup("if",NULL,if_like);
445id_lookup("ifdef",NULL,if_like);
446id_lookup("ifndef",NULL,if_like);
447id_lookup("include",NULL,if_like);
448id_lookup("inline",NULL,int_like);
449id_lookup("int",NULL,raw_int);
450id_lookup("jmp_buf",NULL,raw_int);
451id_lookup("ldiv_t",NULL,raw_int);
452id_lookup("line",NULL,if_like);
453id_lookup("long",NULL,raw_int);
454id_lookup("mutable",NULL,int_like);
455id_lookup("namespace",NULL,struct_like);
456id_lookup("new",NULL,new_like);
457id_lookup("not",NULL,alfop);
458id_lookup("not_eq",NULL,alfop);
459id_lookup("NULL",NULL,custom);
460id_lookup("offsetof",NULL,raw_int);
461id_lookup("operator",NULL,operator_like);
462id_lookup("or",NULL,alfop);
463id_lookup("or_eq",NULL,alfop);
464id_lookup("pragma",NULL,if_like);
465id_lookup("private",NULL,public_like);
466id_lookup("protected",NULL,public_like);
467id_lookup("ptrdiff_t",NULL,raw_int);
468id_lookup("public",NULL,public_like);
469id_lookup("register",NULL,int_like);
470id_lookup("reinterpret_cast",NULL,raw_int);
471id_lookup("return",NULL,case_like);
472id_lookup("short",NULL,raw_int);
473id_lookup("sig_atomic_t",NULL,raw_int);
474id_lookup("signed",NULL,raw_int);
475id_lookup("size_t",NULL,raw_int);
476id_lookup("sizeof",NULL,sizeof_like);
477id_lookup("static",NULL,int_like);
478id_lookup("static_cast",NULL,raw_int);
479id_lookup("struct",NULL,struct_like);
480id_lookup("switch",NULL,for_like);
481id_lookup("template",NULL,template_like);
482id_lookup("this",NULL,custom);
483id_lookup("throw",NULL,case_like);
484id_lookup("time_t",NULL,raw_int);
485id_lookup("try",NULL,else_like);
486id_lookup("typedef",NULL,typedef_like);
487id_lookup("typeid",NULL,raw_int);
488id_lookup("typename",NULL,struct_like);
489id_lookup("undef",NULL,if_like);
490id_lookup("union",NULL,struct_like);
491id_lookup("unsigned",NULL,raw_int);
492id_lookup("using",NULL,int_like);
493id_lookup("va_dcl",NULL,decl); /* Berkeley's variable-arg-list convention */
494id_lookup("va_list",NULL,raw_int); /* ditto */
495id_lookup("virtual",NULL,int_like);
496id_lookup("void",NULL,raw_int);
497id_lookup("volatile",NULL,const_like);
498id_lookup("wchar_t",NULL,raw_int);
499id_lookup("while",NULL,for_like);
500id_lookup("xor",NULL,alfop);
501id_lookup("xor_eq",NULL,alfop);
502res_wd_end=name_ptr;
503id_lookup("TeX",NULL,custom);
504id_lookup("make_pair",NULL,func_template);
505
506@* Lexical scanning.
507Let us now consider the subroutines that read the \.{CWEB} source file
508and break it into meaningful units. There are four such procedures:
509One simply skips to the next `\.{@@\ }' or `\.{@@*}' that begins a
510section; another passes over the \TEX/ text at the beginning of a
511section; the third passes over the \TEX/ text in a \CEE/ comment;
512and the last, which is the most interesting, gets the next token of
513a \CEE/ text.  They all use the pointers |limit| and |loc| into
514the line of input currently being studied.
515
516@ Control codes in \.{CWEB}, which begin with `\.{@@}', are converted
517into a numeric code designed to simplify \.{CWEAVE}'s logic; for example,
518larger numbers are given to the control codes that denote more significant
519milestones, and the code of |new_section| should be the largest of
520all. Some of these numeric control codes take the place of |char|
521control codes that will not otherwise appear in the output of the
522scanning routines.
523@^ASCII code dependencies@>
524
525@d ignore 00 /* control code of no interest to \.{CWEAVE} */
526@d verbatim 02 /* takes the place of extended ASCII \.{\char2} */
527@d begin_short_comment 03 /* \CPLUSPLUS/ short comment */
528@d begin_comment '\t' /* tab marks will not appear */
529@d underline '\n' /* this code will be intercepted without confusion */
530@d noop 0177 /* takes the place of ASCII delete */
531@d xref_roman 0203 /* control code for `\.{@@\^}' */
532@d xref_wildcard 0204 /* control code for `\.{@@:}' */
533@d xref_typewriter 0205 /* control code for `\.{@@.}' */
534@d TeX_string 0206 /* control code for `\.{@@t}' */
535@f TeX_string TeX
536@d ord 0207 /* control code for `\.{@@'}' */
537@d join 0210 /* control code for `\.{@@\&}' */
538@d thin_space 0211 /* control code for `\.{@@,}' */
539@d math_break 0212 /* control code for `\.{@@\v}' */
540@d line_break 0213 /* control code for `\.{@@/}' */
541@d big_line_break 0214 /* control code for `\.{@@\#}' */
542@d no_line_break 0215 /* control code for `\.{@@+}' */
543@d pseudo_semi 0216 /* control code for `\.{@@;}' */
544@d macro_arg_open 0220 /* control code for `\.{@@[}' */
545@d macro_arg_close 0221 /* control code for `\.{@@]}' */
546@d trace 0222 /* control code for `\.{@@0}', `\.{@@1}' and `\.{@@2}' */
547@d translit_code 0223 /* control code for `\.{@@l}' */
548@d output_defs_code 0224 /* control code for `\.{@@h}' */
549@d format_code 0225 /* control code for `\.{@@f}' and `\.{@@s}' */
550@d definition 0226 /* control code for `\.{@@d}' */
551@d begin_C 0227 /* control code for `\.{@@c}' */
552@d section_name 0230 /* control code for `\.{@@<}' */
553@d new_section 0231 /* control code for `\.{@@\ }' and `\.{@@*}' */
554
555@ Control codes are converted to \.{CWEAVE}'s internal
556representation by means of the table |ccode|.
557
558@<Global...@>=
559eight_bits ccode[256]; /* meaning of a char following \.{@@} */
560
561@ @<Set ini...@>=
562{int c; for (c=0; c<256; c++) ccode[c]=0;}
563ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f']
564   =ccode['*']=new_section;
565ccode['@@']='@@'; /* `quoted' at sign */
566ccode['=']=verbatim;
567ccode['d']=ccode['D']=definition;
568ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code;
569ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C;
570ccode['t']=ccode['T']=TeX_string;
571ccode['l']=ccode['L']=translit_code;
572ccode['q']=ccode['Q']=noop;
573ccode['h']=ccode['H']=output_defs_code;
574ccode['&']=join; ccode['<']=ccode['(']=section_name;
575ccode['!']=underline; ccode['^']=xref_roman;
576ccode[':']=xref_wildcard; ccode['.']=xref_typewriter; ccode[',']=thin_space;
577ccode['|']=math_break; ccode['/']=line_break; ccode['#']=big_line_break;
578ccode['+']=no_line_break; ccode[';']=pseudo_semi;
579ccode['[']=macro_arg_open; ccode[']']=macro_arg_close;
580ccode['\'']=ord;
581@<Special control codes for debugging@>@;
582
583@ Users can write
584\.{@@2}, \.{@@1}, and \.{@@0} to turn tracing fully on, partly on,
585and off, respectively.
586
587@<Special control codes...@>=
588ccode['0']=ccode['1']=ccode['2']=trace;
589
590@ The |skip_limbo| routine is used on the first pass to skip through
591portions of the input that are not in any sections, i.e., that precede
592the first section. After this procedure has been called, the value of
593|input_has_ended| will tell whether or not a section has actually been found.
594
595There's a complication that we will postpone until later: If the \.{@@s}
596operation appears in limbo, we want to use it to adjust the default
597interpretation of identifiers.
598
599@<Predec...@>=
600void   skip_limbo();
601
602@ @c
603void
604skip_limbo() {
605  while(1) {
606    if (loc>limit && get_line()==0) return;
607    *(limit+1)='@@';
608    while (*loc!='@@') loc++; /* look for '@@', then skip two chars */
609    if (loc++ <=limit) { int c=ccode[(eight_bits)*loc++];
610      if (c==new_section) return;
611      if (c==noop) skip_restricted();
612      else if (c==format_code) @<Process simple format in limbo@>;
613    }
614  }
615}
616
617@ The |skip_TeX| routine is used on the first pass to skip through
618the \TEX/ code at the beginning of a section. It returns the next
619control code or `\.{\v}' found in the input. A |new_section| is
620assumed to exist at the very end of the file.
621
622@f skip_TeX TeX
623
624@c
625unsigned
626skip_TeX() /* skip past pure \TEX/ code */
627{
628  while (1) {
629    if (loc>limit && get_line()==0) return(new_section);
630    *(limit+1)='@@';
631    while (*loc!='@@' && *loc!='|') loc++;
632    if (*loc++ =='|') return('|');
633    if (loc<=limit) return(ccode[(eight_bits)*(loc++)]);
634  }
635}
636
637@*1 Inputting the next token.
638As stated above, \.{CWEAVE}'s most interesting lexical scanning routine is the
639|get_next| function that inputs the next token of \CEE/ input. However,
640|get_next| is not especially complicated.
641
642The result of |get_next| is either a |char| code for some special character,
643or it is a special code representing a pair of characters (e.g., `\.{!=}'),
644or it is the numeric value computed by the |ccode|
645table, or it is one of the following special codes:
646
647\yskip\hang |identifier|: In this case the global variables |id_first| and
648|id_loc| will have been set to the beginning and ending-plus-one locations
649in the buffer, as required by the |id_lookup| routine.
650
651\yskip\hang |string|: The string will have been copied into the array
652|section_text|; |id_first| and |id_loc| are set as above (now they are
653pointers into |section_text|).
654
655\yskip\hang |constant|: The constant is copied into |section_text|, with
656slight modifications; |id_first| and |id_loc| are set.
657
658\yskip\noindent Furthermore, some of the control codes cause
659|get_next| to take additional actions:
660
661\yskip\hang |xref_roman|, |xref_wildcard|, |xref_typewriter|, |TeX_string|,
662|verbatim|: The values of |id_first| and |id_loc| will have been set to
663the beginning and ending-plus-one locations in the buffer.
664
665\yskip\hang |section_name|: In this case the global variable |cur_section| will
666point to the |byte_start| entry for the section name that has just been scanned.
667The value of |cur_section_char| will be |'('| if the section name was
668preceded by \.{@@(} instead of \.{@@<}.
669
670\yskip\noindent If |get_next| sees `\.{@@!}'
671it sets |xref_switch| to |def_flag| and goes on to the next token.
672
673@d constant 0200 /* \CEE/ constant */
674@d string 0201 /* \CEE/ string */
675@d identifier 0202 /* \CEE/ identifier or reserved word */
676
677@<Global...@>=
678name_pointer cur_section; /* name of section just scanned */
679char cur_section_char; /* the character just before that name */
680
681@ @<Include...@>=
682#include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */
683#include <stdlib.h> /* definition of |exit| */
684
685@ As one might expect, |get_next| consists mostly of a big switch
686that branches to the various special cases that can arise.
687\CEE/ allows underscores to appear in identifiers, and some \CEE/
688compilers even allow the dollar sign.
689
690@d isxalpha(c) ((c)=='_' || (c)=='$')
691   /* non-alpha characters allowed in identifier */
692@d ishigh(c) ((eight_bits)(c)>0177)
693@^high-bit character handling@>
694
695@<Predecl...@>=
696eight_bits get_next();
697
698@ @c
699eight_bits
700get_next() /* produces the next input token */
701{@+eight_bits c; /* the current character */
702  while (1) {
703    @<Check if we're at the end of a preprocessor command@>;
704    if (loc>limit && get_line()==0) return(new_section);
705    c=*(loc++);
706    if (xisdigit(c) || c=='.') @<Get a constant@>@;
707    else if (c=='\'' || c=='"' || (c=='L'&&(*loc=='\'' || *loc=='"'))@|
708           || (c=='<' && sharp_include_line==1))
709        @<Get a string@>@;
710    else if (xisalpha(c) || isxalpha(c) || ishigh(c))
711      @<Get an identifier@>@;
712    else if (c=='@@') @<Get control code and possible section name@>@;
713    else if (xisspace(c)) continue; /* ignore spaces and tabs */
714    if (c=='#' && loc==buffer+1) @<Raise preprocessor flag@>;
715    mistake: @<Compress two-symbol operator@>@;
716    return(c);
717  }
718}
719
720@ Because preprocessor commands do not fit in with the rest of the syntax
721of \CEE/,
722we have to deal with them separately.  One solution is to enclose such
723commands between special markers.  Thus, when a \.\# is seen as the
724first character of a line, |get_next| returns a special code
725|left_preproc| and raises a flag |preprocessing|.
726
727We can use the same internal code number for |left_preproc| as we do
728for |ord|, since |get_next| changes |ord| into a string.
729
730@d left_preproc ord /* begins a preprocessor command */
731@d right_preproc 0217 /* ends a preprocessor command */
732
733@<Glob...@>=
734boolean preprocessing=0; /* are we scanning a preprocessor command? */
735
736@ @<Raise prep...@>= {
737  preprocessing=1;
738  @<Check if next token is |include|@>;
739  return (left_preproc);
740}
741
742@ An additional complication is the freakish use of \.< and \.> to delimit
743a file name in lines that start with \.{\#include}.  We must treat this file
744name as a string.
745
746@<Glob...@>=
747boolean sharp_include_line=0; /* are we scanning a |#include| line? */
748
749@ @<Check if next token is |include|@>=
750while (loc<=buffer_end-7 && xisspace(*loc)) loc++;
751if (loc<=buffer_end-6 && strncmp(loc,"include",7)==0) sharp_include_line=1;
752
753@ When we get to the end of a preprocessor line,
754we lower the flag and send a code |right_preproc|, unless
755the last character was a \.\\.
756
757@<Check if we're at...@>=
758  while (loc==limit-1 && preprocessing && *loc=='\\')
759    if (get_line()==0) return(new_section); /* still in preprocessor mode */
760  if (loc>=limit && preprocessing) {
761    preprocessing=sharp_include_line=0;
762    return(right_preproc);
763  }
764
765@ The following code assigns values to the combinations \.{++},
766\.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, \.{\v\v}, and
767\.{\&\&}, and to the \CPLUSPLUS/
768combinations \.{...}, \.{::}, \.{.*} and \.{->*}.
769The compound assignment operators (e.g., \.{+=}) are
770treated as separate tokens.
771
772@d compress(c) if (loc++<=limit) return(c)
773
774@<Compress tw...@>=
775switch(c) {
776  case '/': if (*loc=='*') {compress(begin_comment);}
777    else if (*loc=='/') compress(begin_short_comment); break;
778  case '+': if (*loc=='+') compress(plus_plus); break;
779  case '-': if (*loc=='-') {compress(minus_minus);}
780    else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);}
781                        else compress(minus_gt); break;
782  case '.': if (*loc=='*') {compress(period_ast);}
783            else if (*loc=='.' && *(loc+1)=='.') {
784              loc++; compress(dot_dot_dot);
785            }
786            break;
787  case ':': if (*loc==':') compress(colon_colon); break;
788  case '=': if (*loc=='=') compress(eq_eq); break;
789  case '>': if (*loc=='=') {compress(gt_eq);}
790    else if (*loc=='>') compress(gt_gt); break;
791  case '<': if (*loc=='=') {compress(lt_eq);}
792    else if (*loc=='<') compress(lt_lt); break;
793  case '&': if (*loc=='&') compress(and_and); break;
794  case '|': if (*loc=='|') compress(or_or); break;
795  case '!': if (*loc=='=') compress(not_eq); break;
796}
797
798@ @<Get an identifier@>= {
799  id_first=--loc;
800  while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc));
801  id_loc=loc; return(identifier);
802}
803
804@ Different conventions are followed by \TEX/ and \CEE/ to express octal
805and hexadecimal numbers; it is reasonable to stick to each convention
806within its realm.  Thus the \CEE/ part of a \.{CWEB} file has octals
807introduced by \.0 and hexadecimals by \.{0x}, but \.{CWEAVE} will print
808with \TEX/ macros that the user can redefine to fit the context.
809In order to simplify such macros, we replace some of the characters.
810
811Notice that in this section and the next, |id_first| and |id_loc|
812are pointers into the array |section_text|, not into |buffer|.
813
814@<Get a constant@>= {
815  id_first=id_loc=section_text+1;
816  if (*(loc-1)=='0') {
817    if (*loc=='x' || *loc=='X') {*id_loc++='^'; loc++;
818      while (xisxdigit(*loc)) *id_loc++=*loc++;} /* hex constant */
819    else if (xisdigit(*loc)) {*id_loc++='~';
820      while (xisdigit(*loc)) *id_loc++=*loc++;} /* octal constant */
821    else goto dec; /* decimal constant */
822  }
823  else { /* decimal constant */
824    if (*(loc-1)=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */
825    dec: *id_loc++=*(loc-1);
826    while (xisdigit(*loc) || *loc=='.') *id_loc++=*loc++;
827    if (*loc=='e' || *loc=='E') { /* float constant */
828      *id_loc++='_'; loc++;
829      if (*loc=='+' || *loc=='-') *id_loc++=*loc++;
830      while (xisdigit(*loc)) *id_loc++=*loc++;
831    }
832  }
833  while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L'
834         || *loc=='f' || *loc=='F') {
835    *id_loc++='$'; *id_loc++=toupper(*loc); loc++;
836  }
837  return(constant);
838}
839
840@ \CEE/ strings and character constants, delimited by double and single
841quotes, respectively, can contain newlines or instances of their own
842delimiters if they are protected by a backslash.  We follow this
843convention, but do not allow the string to be longer than |longest_name|.
844
845@<Get a string@>= {
846  char delim = c; /* what started the string */
847  id_first = section_text+1;
848  id_loc = section_text;
849  if (delim=='\'' && *(loc-2)=='@@') {*++id_loc='@@'; *++id_loc='@@';}
850  *++id_loc=delim;
851  if (delim=='L') { /* wide character constant */
852    delim=*loc++; *++id_loc=delim;
853  }
854  if (delim=='<') delim='>'; /* for file names in |#include| lines */
855  while (1) {
856    if (loc>=limit) {
857      if(*(limit-1)!='\\') {
858        err_print("! String didn't end"); loc=limit; break;
859@.String didn't end@>
860      }
861      if(get_line()==0) {
862        err_print("! Input ended in middle of string"); loc=buffer; break;
863@.Input ended in middle of string@>
864      }
865    }
866    if ((c=*loc++)==delim) {
867      if (++id_loc<=section_text_end) *id_loc=c;
868      break;
869    }
870    if (c=='\\') if (loc>=limit) continue;
871      else if (++id_loc<=section_text_end) {
872        *id_loc = '\\'; c=*loc++;
873      }
874    if (++id_loc<=section_text_end) *id_loc=c;
875  }
876  if (id_loc>=section_text_end) {
877    printf("\n! String too long: ");
878@.String too long@>
879    term_write(section_text+1,25);
880    printf("..."); mark_error;
881  }
882  id_loc++;
883  return(string);
884}
885
886@ After an \.{@@} sign has been scanned, the next character tells us
887whether there is more work to do.
888
889@<Get control code and possible section name@>= {
890  c=*loc++;
891  switch(ccode[(eight_bits)c]) {
892    case translit_code: err_print("! Use @@l in limbo only"); continue;
893@.Use @@l in limbo...@>
894    case underline: xref_switch=def_flag; continue;
895    case trace: tracing=c-'0'; continue;
896    case xref_roman: case xref_wildcard: case xref_typewriter:
897    case noop: case TeX_string: c=ccode[c]; skip_restricted(); return(c);
898    case section_name:
899      @<Scan the section name and make |cur_section| point to it@>;
900    case verbatim: @<Scan a verbatim string@>;
901    case ord: @<Get a string@>;
902    default: return(ccode[(eight_bits)c]);
903  }
904}
905
906@ The occurrence of a section name sets |xref_switch| to zero,
907because the section name might (for example) follow \&{int}.
908
909@<Scan the section name...@>= {
910  char *k; /* pointer into |section_text| */
911  cur_section_char=*(loc-1);
912  @<Put section name into |section_text|@>;
913  if (k-section_text>3 && strncmp(k-2,"...",3)==0)
914        cur_section=section_lookup(section_text+1,k-3,1); /* 1 indicates a prefix */
915  else cur_section=section_lookup(section_text+1,k,0);
916  xref_switch=0; return(section_name);
917}
918
919@ Section names are placed into the |section_text| array with consecutive spaces,
920tabs, and carriage-returns replaced by single spaces. There will be no
921spaces at the beginning or the end. (We set |section_text[0]=' '| to facilitate
922this, since the |section_lookup| routine uses |section_text[1]| as the first
923character of the name.)
924
925@<Set init...@>=section_text[0]=' ';
926
927@ @<Put section name...@>=
928k=section_text;
929while (1) {
930  if (loc>limit && get_line()==0) {
931    err_print("! Input ended in section name");
932@.Input ended in section name@>
933    loc=buffer+1; break;
934  }
935  c=*loc;
936  @<If end of name or erroneous control code, |break|@>;
937  loc++; if (k<section_text_end) k++;
938  if (xisspace(c)) {
939    c=' '; if (*(k-1)==' ') k--;
940  }
941*k=c;
942}
943if (k>=section_text_end) {
944  printf("\n! Section name too long: ");
945@.Section name too long@>
946  term_write(section_text+1,25);
947  printf("..."); mark_harmless;
948}
949if (*k==' ' && k>section_text) k--;
950
951@ @<If end of name...@>=
952if (c=='@@') {
953  c=*(loc+1);
954  if (c=='>') {
955    loc+=2; break;
956  }
957  if (ccode[(eight_bits)c]==new_section) {
958    err_print("! Section name didn't end"); break;
959@.Section name didn't end@>
960  }
961  if (c!='@@') {
962    err_print("! Control codes are forbidden in section name"); break;
963@.Control codes are forbidden...@>
964  }
965  *(++k)='@@'; loc++; /* now |c==*loc| again */
966}
967
968@ This function skips over a restricted context at relatively high speed.
969
970@<Predecl...@>=
971void skip_restricted();
972
973@ @c
974void
975skip_restricted()
976{
977  id_first=loc; *(limit+1)='@@';
978false_alarm:
979  while (*loc!='@@') loc++;
980  id_loc=loc;
981  if (loc++>limit) {
982    err_print("! Control text didn't end"); loc=limit;
983@.Control text didn't end@>
984  }
985  else {
986    if (*loc=='@@'&&loc<=limit) {loc++; goto false_alarm;}
987    if (*loc++!='>')
988      err_print("! Control codes are forbidden in control text");
989@.Control codes are forbidden...@>
990  }
991}
992
993@ At the present point in the program we
994have |*(loc-1)==verbatim|; we set |id_first| to the beginning
995of the string itself, and |id_loc| to its ending-plus-one location in the
996buffer.  We also set |loc| to the position just after the ending delimiter.
997
998@<Scan a verbatim string@>= {
999  id_first=loc++; *(limit+1)='@@'; *(limit+2)='>';
1000  while (*loc!='@@' || *(loc+1)!='>') loc++;
1001  if (loc>=limit) err_print("! Verbatim string didn't end");
1002@.Verbatim string didn't end@>
1003  id_loc=loc; loc+=2;
1004  return (verbatim);
1005}
1006
1007@** Phase one processing.
1008We now have accumulated enough subroutines to make it possible to carry out
1009\.{CWEAVE}'s first pass over the source file. If everything works right,
1010both phase one and phase two of \.{CWEAVE} will assign the same numbers to
1011sections, and these numbers will agree with what \.{CTANGLE} does.
1012
1013The global variable |next_control| often contains the most recent output of
1014|get_next|; in interesting cases, this will be the control code that
1015ended a section or part of a section.
1016
1017@<Global...@>=
1018eight_bits next_control; /* control code waiting to be acting upon */
1019
1020@ The overall processing strategy in phase one has the following
1021straightforward outline.
1022
1023@<Predecl...@>=
1024void phase_one();
1025
1026@ @c
1027void
1028phase_one() {
1029  phase=1; reset_input(); section_count=0;
1030  skip_limbo(); change_exists=0;
1031  while (!input_has_ended)
1032    @<Store cross-reference data for the current section@>;
1033  changed_section[section_count]=change_exists;
1034    /* the index changes if anything does */
1035  phase=2; /* prepare for second phase */
1036  @<Print error messages about unused or undefined section names@>;
1037}
1038
1039@ @<Store cross-reference data...@>=
1040{
1041  if (++section_count==max_sections) overflow("section number");
1042  changed_section[section_count]=changing;
1043     /* it will become 1 if any line changes */
1044  if (*(loc-1)=='*' && show_progress) {
1045    printf("*%d",section_count);
1046    update_terminal; /* print a progress report */
1047  }
1048  @<Store cross-references in the \TEX/ part of a section@>;
1049  @<Store cross-references in the definition part of a section@>;
1050  @<Store cross-references in the \CEE/ part of a section@>;
1051  if (changed_section[section_count]) change_exists=1;
1052}
1053
1054@ The |C_xref| subroutine stores references to identifiers in
1055\CEE/ text material beginning with the current value of |next_control|
1056and continuing until |next_control| is `\.\{' or `\.{\v}', or until the next
1057``milestone'' is passed (i.e., |next_control>=format_code|). If
1058|next_control>=format_code| when |C_xref| is called, nothing will happen;
1059but if |next_control=='|'| upon entry, the procedure assumes that this is
1060the `\.{\v}' preceding \CEE/ text that is to be processed.
1061
1062The parameter |spec_ctrl| is used to change this behavior. In most cases
1063|C_xref| is called with |spec_ctrl==ignore|, which triggers the default
1064processing described above. If |spec_ctrl==section_name|, section names will
1065be gobbled. This is used when \CEE/ text in the \TEX/ part or inside comments
1066is parsed: It allows for section names to appear in \pb, but these
1067strings will not be entered into the cross reference lists since they are not
1068definitions of section names.
1069
1070The program uses the fact that our internal code numbers satisfy
1071the relations |xref_roman==identifier+roman| and |xref_wildcard==identifier
1072+wildcard| and |xref_typewriter==identifier+typewriter|,
1073as well as |normal==0|.
1074
1075@<Predecl...@>=
1076void C_xref();
1077
1078@ @c
1079void
1080C_xref( spec_ctrl ) /* makes cross-references for \CEE/ identifiers */
1081  eight_bits spec_ctrl;
1082{
1083  name_pointer p; /* a referenced name */
1084  while (next_control<format_code || next_control==spec_ctrl) {
1085    if (next_control>=identifier && next_control<=xref_typewriter) {
1086      if (next_control>identifier) @<Replace |"@@@@"| by |"@@"| @>@;
1087      p=id_lookup(id_first, id_loc,next_control-identifier); new_xref(p);
1088    }
1089    if (next_control==section_name) {
1090      section_xref_switch=cite_flag;
1091      new_section_xref(cur_section);
1092    }
1093    next_control=get_next();
1094    if (next_control=='|' || next_control==begin_comment ||
1095        next_control==begin_short_comment) return;
1096  }
1097}
1098
1099@ The |outer_xref| subroutine is like |C_xref| except that it begins
1100with |next_control!='|'| and ends with |next_control>=format_code|. Thus, it
1101handles \CEE/ text with embedded comments.
1102
1103@<Predecl...@>=
1104void outer_xref();
1105
1106@ @c
1107void
1108outer_xref() /* extension of |C_xref| */
1109{
1110  int bal; /* brace level in comment */
1111  while (next_control<format_code)
1112    if (next_control!=begin_comment && next_control!=begin_short_comment)
1113      C_xref(ignore);
1114    else {
1115      boolean is_long_comment=(next_control==begin_comment);
1116      bal=copy_comment(is_long_comment,1); next_control='|';
1117      while (bal>0) {
1118        C_xref(section_name); /* do not reference section names in comments */
1119        if (next_control=='|') bal=copy_comment(is_long_comment,bal);
1120        else bal=0; /* an error message will occur in phase two */
1121      }
1122    }
1123}
1124
1125@ In the \TEX/ part of a section, cross-reference entries are made only for
1126the identifiers in \CEE/ texts enclosed in \pb, or for control texts
1127enclosed in \.{@@\^}$\,\ldots\,$\.{@@>} or \.{@@.}$\,\ldots\,$\.{@@>}
1128or \.{@@:}$\,\ldots\,$\.{@@>}.
1129
1130@<Store cross-references in the \T...@>=
1131while (1) {
1132  switch (next_control=skip_TeX()) {
1133    case translit_code: err_print("! Use @@l in limbo only"); continue;
1134@.Use @@l in limbo...@>
1135    case underline: xref_switch=def_flag; continue;
1136    case trace: tracing=*(loc-1)-'0'; continue;
1137    case '|': C_xref(section_name); break;
1138    case xref_roman: case xref_wildcard: case xref_typewriter:
1139    case noop: case section_name:
1140      loc-=2; next_control=get_next(); /* scan to \.{@@>} */
1141      if (next_control>=xref_roman && next_control<=xref_typewriter) {
1142        @<Replace |"@@@@"| by |"@@"| @>@;
1143        new_xref(id_lookup(id_first, id_loc,next_control-identifier));
1144      }
1145      break;
1146  }
1147  if (next_control>=format_code) break;
1148}
1149
1150@ @<Replace |"@@@@"| by |"@@"| @>=
1151{
1152  char *src=id_first,*dst=id_first;
1153  while(src<id_loc){
1154    if(*src=='@@') src++;
1155    *dst++=*src++;
1156  }
1157  id_loc=dst;
1158  while (dst<src) *dst++=' '; /* clean up in case of error message display */
1159}
1160
1161@ During the definition and \CEE/ parts of a section, cross-references
1162are made for all identifiers except reserved words. However, the right
1163identifier in a format definition is not referenced, and the left
1164identifier is referenced only if it has been explicitly
1165underlined (preceded by \.{@@!}).
1166The \TEX/ code in comments is, of course, ignored, except for
1167\CEE/ portions enclosed in \pb; the text of a section name is skipped
1168entirely, even if it contains \pb\ constructions.
1169
1170The variables |lhs| and |rhs| point to the respective identifiers involved
1171in a format definition.
1172
1173@<Global...@>=
1174name_pointer lhs, rhs; /* pointers to |byte_start| for format identifiers */
1175name_pointer res_wd_end; /* pointer to the first nonreserved identifier */
1176
1177@ When we get to the following code we have |next_control>=format_code|.
1178
1179@<Store cross-references in the d...@>=
1180while (next_control<=definition) { /* |format_code| or |definition| */
1181  if (next_control==definition) {
1182    xref_switch=def_flag; /* implied \.{@@!} */
1183    next_control=get_next();
1184  } else @<Process a format definition@>;
1185  outer_xref();
1186}
1187
1188@ Error messages for improper format definitions will be issued in phase
1189two. Our job in phase one is to define the |ilk| of a properly formatted
1190identifier, and to remove cross-references to identifiers that we now
1191discover should be unindexed.
1192
1193@<Process a form...@>= {
1194  next_control=get_next();
1195  if (next_control==identifier) {
1196    lhs=id_lookup(id_first, id_loc,normal); lhs->ilk=normal;
1197    if (xref_switch) new_xref(lhs);
1198    next_control=get_next();
1199    if (next_control==identifier) {
1200      rhs=id_lookup(id_first, id_loc,normal);
1201      lhs->ilk=rhs->ilk;
1202      if (unindexed(lhs)) { /* retain only underlined entries */
1203        xref_pointer q,r=NULL;
1204        for (q=(xref_pointer)lhs->xref;q>xmem;q=q->xlink)
1205          if (q->num<def_flag)
1206            if (r) r->xlink=q->xlink;
1207            else lhs->xref=(char*)q->xlink;
1208          else r=q;
1209      }
1210      next_control=get_next();
1211    }
1212  }
1213}
1214
1215@ A much simpler processing of format definitions occurs when the
1216definition is found in limbo.
1217
1218@<Process simple format in limbo@>=
1219{
1220  if (get_next()!=identifier)
1221    err_print("! Missing left identifier of @@s");
1222@.Missing left identifier...@>
1223  else {
1224    lhs=id_lookup(id_first,id_loc,normal);
1225    if (get_next()!=identifier)
1226      err_print("! Missing right identifier of @@s");
1227@.Missing right identifier...@>
1228    else {
1229      rhs=id_lookup(id_first,id_loc,normal);
1230      lhs->ilk=rhs->ilk;
1231    }
1232  }
1233}
1234
1235@ Finally, when the \TEX/ and definition parts have been treated, we have
1236|next_control>=begin_C|.
1237
1238@<Store cross-references in the \CEE/...@>=
1239if (next_control<=section_name) {  /* |begin_C| or |section_name| */
1240  if (next_control==begin_C) section_xref_switch=0;
1241  else {
1242    section_xref_switch=def_flag;
1243    if(cur_section_char=='(' && cur_section!=name_dir)
1244      set_file_flag(cur_section);
1245  }
1246  do {
1247    if (next_control==section_name && cur_section!=name_dir)
1248      new_section_xref(cur_section);
1249    next_control=get_next(); outer_xref();
1250  } while ( next_control<=section_name);
1251}
1252
1253@ After phase one has looked at everything, we want to check that each
1254section name was both defined and used.  The variable |cur_xref| will point
1255to cross-references for the current section name of interest.
1256
1257@<Global...@>=
1258xref_pointer cur_xref; /* temporary cross-reference pointer */
1259boolean an_output; /* did |file_flag| precede |cur_xref|? */
1260
1261@ The following recursive procedure
1262walks through the tree of section names and prints out anomalies.
1263@^recursion@>
1264
1265@<Predecl...@>=
1266void section_check();
1267
1268@ @c
1269void
1270section_check(p)
1271name_pointer p; /* print anomalies in subtree |p| */
1272{
1273  if (p) {
1274    section_check(p->llink);
1275    cur_xref=(xref_pointer)p->xref;
1276    if (cur_xref->num==file_flag) {an_output=1; cur_xref=cur_xref->xlink;}
1277    else an_output=0;
1278    if (cur_xref->num <def_flag) {
1279      printf("\n! Never defined: <"); print_section_name(p); putchar('>'); mark_harmless;
1280@.Never defined: <section name>@>
1281    }
1282    while (cur_xref->num >=cite_flag) cur_xref=cur_xref->xlink;
1283    if (cur_xref==xmem && !an_output) {
1284      printf("\n! Never used: <"); print_section_name(p); putchar('>'); mark_harmless;
1285@.Never used: <section name>@>
1286    }
1287    section_check(p->rlink);
1288  }
1289}
1290
1291@ @<Print error messages about un...@>=section_check(root)
1292
1293@* Low-level output routines.
1294The \TEX/ output is supposed to appear in lines at most |line_length|
1295characters long, so we place it into an output buffer. During the output
1296process, |out_line| will hold the current line number of the line about to
1297be output.
1298
1299@<Global...@>=
1300char out_buf[line_length+1]; /* assembled characters */
1301char *out_ptr; /* just after last character in |out_buf| */
1302char *out_buf_end = out_buf+line_length; /* end of |out_buf| */
1303int out_line; /* number of next line to be output */
1304
1305@ The |flush_buffer| routine empties the buffer up to a given breakpoint,
1306and moves any remaining characters to the beginning of the next line.
1307If the |per_cent| parameter is 1 a |'%'| is appended to the line
1308that is being output; in this case the breakpoint |b| should be strictly
1309less than |out_buf_end|. If the |per_cent| parameter is |0|,
1310trailing blanks are suppressed.
1311The characters emptied from the buffer form a new line of output;
1312if the |carryover| parameter is true, a |"%"| in that line will be
1313carried over to the next line (so that \TEX/ will ignore the completion
1314of commented-out text).
1315
1316@d c_line_write(c) fflush(active_file),fwrite(out_buf+1,sizeof(char),c,active_file)
1317@d tex_putc(c) putc(c,active_file)
1318@d tex_new_line putc('\n',active_file)
1319@d tex_printf(c) fprintf(active_file,c)
1320
1321@c
1322void
1323flush_buffer(b,per_cent,carryover)
1324char *b;  /* outputs from |out_buf+1| to |b|,where |b<=out_ptr| */
1325boolean per_cent,carryover;
1326{
1327  char *j; j=b; /* pointer into |out_buf| */
1328  if (! per_cent) /* remove trailing blanks */
1329    while (j>out_buf && *j==' ') j--;
1330  c_line_write(j-out_buf);
1331  if (per_cent) tex_putc('%');
1332  tex_new_line; out_line++;
1333  if (carryover)
1334    while (j>out_buf)
1335      if (*j--=='%' && (j==out_buf || *j!='\\')) {
1336        *b--='%'; break;
1337      }
1338  if (b<out_ptr) strncpy(out_buf+1,b+1,out_ptr-b);
1339  out_ptr-=b-out_buf;
1340}
1341
1342@ When we are copying \TEX/ source material, we retain line breaks
1343that occur in the input, except that an empty line is not
1344output when the \TEX/ source line was nonempty. For example, a line
1345of the \TEX/ file that contains only an index cross-reference entry
1346will not be copied. The |finish_line| routine is called just before
1347|get_line| inputs a new line, and just after a line break token has
1348been emitted during the output of translated \CEE/ text.
1349
1350@c
1351void
1352finish_line() /* do this at the end of a line */
1353{
1354  char *k; /* pointer into |buffer| */
1355  if (out_ptr>out_buf) flush_buffer(out_ptr,0,0);
1356  else {
1357    for (k=buffer; k<=limit; k++)
1358      if (!(xisspace(*k))) return;
1359    flush_buffer(out_buf,0,0);
1360  }
1361}
1362
1363@ In particular, the |finish_line| procedure is called near the very
1364beginning of phase two. We initialize the output variables in a slightly
1365tricky way so that the first line of the output file will be
1366`\.{\\input cwebmac}'.
1367
1368@<Set init...@>=
1369out_ptr=out_buf+1; out_line=1; active_file=tex_file;
1370*out_ptr='c'; tex_printf("\\input cwebma");
1371
1372@ When we wish to append one character |c| to the output buffer, we write
1373`|out(c)|'; this will cause the buffer to be emptied if it was already
1374full.  If we want to append more than one character at once, we say
1375|out_str(s)|, where |s| is a string containing the characters.
1376
1377A line break will occur at a space or after a single-nonletter
1378\TEX/ control sequence.
1379
1380@d out(c) {if (out_ptr>=out_buf_end) break_out(); *(++out_ptr)=c;}
1381
1382@c
1383void
1384out_str(s) /* output characters from |s| to end of string */
1385char *s;
1386{
1387  while (*s) out(*s++);
1388}
1389
1390@ The |break_out| routine is called just before the output buffer is about
1391to overflow. To make this routine a little faster, we initialize position
13920 of the output buffer to `\.\\'; this character isn't really output.
1393
1394@<Set init...@>=
1395out_buf[0]='\\';
1396
1397@ A long line is broken at a blank space or just before a backslash that isn't
1398preceded by another backslash. In the latter case, a |'%'| is output at
1399the break.
1400
1401@<Predecl...@>=
1402void break_out();
1403
1404@ @c
1405void
1406break_out() /* finds a way to break the output line */
1407{
1408  char *k=out_ptr; /* pointer into |out_buf| */
1409  while (1) {
1410    if (k==out_buf) @<Print warning message, break the line, |return|@>;
1411    if (*k==' ') {
1412      flush_buffer(k,0,1); return;
1413    }
1414    if (*(k--)=='\\' && *k!='\\') { /* we've decreased |k| */
1415      flush_buffer(k,1,1); return;
1416    }
1417  }
1418}
1419
1420@ We get to this section only in the unusual case that the entire output line
1421consists of a string of backslashes followed by a string of nonblank
1422non-backslashes. In such cases it is almost always safe to break the
1423line by putting a |'%'| just before the last character.
1424
1425@<Print warning message...@>=
1426{
1427  printf("\n! Line had to be broken (output l. %d):\n",out_line);
1428@.Line had to be broken@>
1429  term_write(out_buf+1, out_ptr-out_buf-1);
1430  new_line; mark_harmless;
1431  flush_buffer(out_ptr-1,1,1); return;
1432}
1433
1434@ Here is a macro that outputs a section number in decimal notation.
1435The number to be converted by |out_section| is known to be less than
1436|def_flag|, so it cannot have more than five decimal digits.  If
1437the section is changed, we output `\.{\\*}' just after the number.
1438
1439@c
1440void
1441out_section(n)
1442sixteen_bits n;
1443{
1444  char s[6];
1445  sprintf(s,"%d",n); out_str(s);
1446  if(changed_section[n]) out_str ("\\*");
1447@.\\*@>
1448}
1449
1450@ The |out_name| procedure is used to output an identifier or index
1451entry, enclosing it in braces.
1452
1453@c
1454void
1455out_name(p,quote_xalpha)
1456name_pointer p;
1457boolean quote_xalpha;
1458{
1459  char *k, *k_end=(p+1)->byte_start; /* pointers into |byte_mem| */
1460  out('{');
1461  for (k=p->byte_start; k<k_end; k++) {
1462    if (isxalpha(*k) && quote_xalpha) out('\\');
1463@.\\\$@>
1464@.\\\_@>
1465    out(*k);
1466  }
1467  out('}');
1468}
1469
1470@* Routines that copy \TEX/ material.
1471During phase two, we use subroutines |copy_limbo|, |copy_TeX|, and
1472|copy_comment| in place of the analogous |skip_limbo|, |skip_TeX|, and
1473|skip_comment| that were used in phase one. (Well, |copy_comment|
1474was actually written in such a way that it functions as |skip_comment|
1475in phase one.)
1476
1477The |copy_limbo| routine, for example, takes \TEX/ material that is not
1478part of any section and transcribes it almost verbatim to the output file.
1479The use of `\.{@@}' signs is severely restricted in such material:
1480`\.{@@@@}' pairs are replaced by singletons; `\.{@@l}' and `\.{@@q}' and
1481`\.{@@s}' are interpreted.
1482
1483@c
1484void
1485copy_limbo()
1486{
1487  char c;
1488  while (1) {
1489    if (loc>limit && (finish_line(), get_line()==0)) return;
1490    *(limit+1)='@@';
1491    while (*loc!='@@') out(*(loc++));
1492    if (loc++<=limit) {
1493      c=*loc++;
1494      if (ccode[(eight_bits)c]==new_section) break;
1495      switch (ccode[(eight_bits)c]) {
1496        case translit_code: out_str("\\ATL"); break;
1497@.\\ATL@>
1498        case '@@': out('@@'); break;
1499        case noop: skip_restricted(); break;
1500        case format_code: if (get_next()==identifier) get_next();
1501          if (loc>=limit) get_line(); /* avoid blank lines in output */
1502          break; /* the operands of \.{@@s} are ignored on this pass */
1503        default: err_print("! Double @@ should be used in limbo");
1504@.Double @@ should be used...@>
1505        out('@@');
1506      }
1507    }
1508  }
1509}
1510
1511@ The |copy_TeX| routine processes the \TEX/ code at the beginning of a
1512section; for example, the words you are now reading were copied in this
1513way. It returns the next control code or `\.{\v}' found in the input.
1514We don't copy spaces or tab marks into the beginning of a line. This
1515makes the test for empty lines in |finish_line| work.
1516
1517@ @f copy_TeX TeX
1518@c
1519eight_bits
1520copy_TeX()
1521{
1522  char c; /* current character being copied */
1523  while (1) {
1524    if (loc>limit && (finish_line(), get_line()==0)) return(new_section);
1525    *(limit+1)='@@';
1526    while ((c=*(loc++))!='|' && c!='@@') {
1527      out(c);
1528      if (out_ptr==out_buf+1 && (xisspace(c))) out_ptr--;
1529    }
1530    if (c=='|') return('|');
1531    if (loc<=limit) return(ccode[(eight_bits)*(loc++)]);
1532  }
1533}
1534
1535@ The |copy_comment| function issues a warning if more braces are opened than
1536closed, and in the case of a more serious error it supplies enough
1537braces to keep \TEX/ from complaining about unbalanced braces.
1538Instead of copying the \TEX/ material
1539into the output buffer, this function copies it into the token memory
1540(in phase two only).
1541The abbreviation |app_tok(t)| is used to append token |t| to the current
1542token list, and it also makes sure that it is possible to append at least
1543one further token without overflow.
1544
1545@d app_tok(c) {if (tok_ptr+2>tok_mem_end) overflow("token"); *(tok_ptr++)=c;}
1546
1547@<Predec...@>=
1548int copy_comment();
1549
1550@ @c
1551int copy_comment(is_long_comment,bal) /* copies \TEX/ code in comments */
1552boolean is_long_comment; /* is this a traditional \CEE/ comment? */
1553int bal; /* brace balance */
1554{
1555  char c; /* current character being copied */
1556  while (1) {
1557    if (loc>limit) {
1558      if (is_long_comment) {
1559        if (get_line()==0) {
1560          err_print("! Input ended in mid-comment");
1561@.Input ended in mid-comment@>
1562          loc=buffer+1; goto done;
1563        }
1564      }
1565      else {
1566        if (bal>1) err_print("! Missing } in comment");
1567@.Missing \} in comment@>
1568        goto done;
1569      }
1570    }
1571    c=*(loc++);
1572    if (c=='|') return(bal);
1573    if (is_long_comment) @<Check for end of comment@>;
1574    if (phase==2) {
1575      if (ishigh(c)) app_tok(quoted_char);
1576      app_tok(c);
1577    }
1578    @<Copy special things when |c=='@@', '\\'|@>;
1579    if (c=='{') bal++;
1580    else if (c=='}') {
1581      if(bal>1) bal--;
1582      else {err_print("! Extra } in comment");
1583@.Extra \} in comment@>
1584        if (phase==2) tok_ptr--;
1585      }
1586    }
1587  }
1588done:@<Clear |bal| and |return|@>;
1589}
1590
1591@ @<Check for end of comment@>=
1592if (c=='*' && *loc=='/') {
1593  loc++;
1594  if (bal>1) err_print("! Missing } in comment");
1595@.Missing \} in comment@>
1596  goto done;
1597}
1598
1599@ @<Copy special things when |c=='@@'...@>=
1600if (c=='@@') {
1601  if (*(loc++)!='@@') {
1602    err_print("! Illegal use of @@ in comment");
1603@.Illegal use of @@...@>
1604    loc-=2; if (phase==2) *(tok_ptr-1)=' '; goto done;
1605  }
1606}
1607else if (c=='\\' && *loc!='@@')
1608  if (phase==2) app_tok(*(loc++)) else loc++;
1609
1610@ We output
1611enough right braces to keep \TEX/ happy.
1612
1613@<Clear |bal|...@>=
1614if (phase==2) while (bal-- >0) app_tok('}');
1615return(0);
1616
1617@** Parsing.
1618The most intricate part of \.{CWEAVE} is its mechanism for converting
1619\CEE/-like code into \TEX/ code, and we might as well plunge into this
1620aspect of the program now. A ``bottom up'' approach is used to parse the
1621\CEE/-like material, since \.{CWEAVE} must deal with fragmentary
1622constructions whose overall ``part of speech'' is not known.
1623
1624At the lowest level, the input is represented as a sequence of entities
1625that we shall call {\it scraps}, where each scrap of information consists
1626of two parts, its {\it category} and its {\it translation}. The category
1627is essentially a syntactic class, and the translation is a token list that
1628represents \TEX/ code. Rules of syntax and semantics tell us how to
1629combine adjacent scraps into larger ones, and if we are lucky an entire
1630\CEE/ text that starts out as hundreds of small scraps will join
1631together into one gigantic scrap whose translation is the desired \TEX/
1632code. If we are unlucky, we will be left with several scraps that don't
1633combine; their translations will simply be output, one by one.
1634
1635The combination rules are given as context-sensitive productions that are
1636applied from left to right. Suppose that we are currently working on the
1637sequence of scraps $s_1\,s_2\ldots s_n$. We try first to find the longest
1638production that applies to an initial substring $s_1\,s_2\ldots\,$; but if
1639no such productions exist, we try to find the longest production
1640applicable to the next substring $s_2\,s_3\ldots\,$; and if that fails, we
1641try to match $s_3\,s_4\ldots\,$, etc.
1642
1643A production applies if the category codes have a given pattern. For
1644example, one of the productions (see rule~3) is
1645$$\hbox{|exp| }\left\{\matrix{\hbox{|binop|}\cr\hbox{|ubinop|}}\right\}
1646\hbox{ |exp| }\RA\hbox{ |exp|}$$
1647and it means that three consecutive scraps whose respective categories are
1648|exp|, |binop| (or |ubinop|),
1649and |exp| are converted to one scrap whose category
1650is |exp|.  The translations of the original
1651scraps are simply concatenated.  The case of
1652$$\hbox{|exp| |comma| |exp| $\RA$ |exp|} \hskip4emE_1C\,\\{opt}9\,E_2$$
1653(rule 4) is only slightly more complicated:
1654Here the resulting |exp| translation
1655consists not only of the three original translations, but also of the
1656tokens |opt| and 9 between the translations of the
1657|comma| and the following |exp|.
1658In the \TEX/ file, this will specify an optional line break after the
1659comma, with penalty 90.
1660
1661At each opportunity the longest possible production is applied.  For
1662example, if the current sequence of scraps is |int_like| |cast|
1663|lbrace|, rule 31 is applied; but if the sequence is |int_like| |cast|
1664followed by anything other than |lbrace|, rule 32 takes effect.
1665
1666Translation rules such as `$E_1C\,\\{opt}9\,E_2$' above use subscripts
1667to distinguish between translations of scraps whose categories have the
1668same initial letter; these subscripts are assigned from left to right.
1669
1670@ Here is a list of the category codes that scraps can have.
1671(A few others, like |int_like|, have already been defined; the
1672|cat_name| array contains a complete list.)
1673
1674@d exp 1 /* denotes an expression, including perhaps a single identifier */
1675@d unop 2 /* denotes a unary operator */
1676@d binop 3 /* denotes a binary operator */
1677@d ubinop 4
1678  /* denotes an operator that can be unary or binary, depending on context */
1679@d cast 5 /* denotes a cast */
1680@d question 6 /* denotes a question mark and possibly the expressions flanking it */
1681@d lbrace 7 /* denotes a left brace */
1682@d rbrace 8 /* denotes a right brace */
1683@d decl_head 9 /* denotes an incomplete declaration */
1684@d comma 10 /* denotes a comma */
1685@d lpar 11 /* denotes a left parenthesis or left bracket */
1686@d rpar 12 /* denotes a right parenthesis or right bracket */
1687@d prelangle 13 /* denotes `$<$' before we know what it is */
1688@d prerangle 14 /* denotes `$>$' before we know what it is */
1689@d langle 15 /* denotes `$<$' when it's used as angle bracket in a template */
1690@d colcol 18 /* denotes `::' */
1691@d base 19 /* denotes a colon that introduces a base specifier */
1692@d decl 20 /* denotes a complete declaration */
1693@d struct_head 21 /* denotes the beginning of a structure specifier */
1694@d stmt 23 /* denotes a complete statement */
1695@d function 24 /* denotes a complete function */
1696@d fn_decl 25 /* denotes a function declarator */
1697@d semi 27 /* denotes a semicolon */
1698@d colon 28 /* denotes a colon */
1699@d tag 29 /* denotes a statement label */
1700@d if_head 30 /* denotes the beginning of a compound conditional */
1701@d else_head 31 /* denotes a prefix for a compound statement */
1702@d if_clause 32 /* pending \.{if} together with a condition */
1703@d lproc 35 /* begins a preprocessor command */
1704@d rproc 36 /* ends a preprocessor command */
1705@d insert 37 /* a scrap that gets combined with its neighbor */
1706@d section_scrap 38 /* section name */
1707@d dead 39 /* scrap that won't combine */
1708@d ftemplate 59 /* \\{make\_pair} */
1709@d new_exp 60 /* \&{new} and a following type identifier */
1710@d begin_arg 61 /* \.{@@[} */
1711@d end_arg 62 /* \.{@@]} */
1712
1713@<Glo...@>=
1714char cat_name[256][12];
1715eight_bits cat_index;
1716
1717@ @<Set in...@>=
1718    for (cat_index=0;cat_index<255;cat_index++)
1719      strcpy(cat_name[cat_index],"UNKNOWN");
1720@.UNKNOWN@>
1721    strcpy(cat_name[exp],"exp");
1722    strcpy(cat_name[unop],"unop");
1723    strcpy(cat_name[binop],"binop");
1724    strcpy(cat_name[ubinop],"ubinop");
1725    strcpy(cat_name[cast],"cast");
1726    strcpy(cat_name[question],"?");
1727    strcpy(cat_name[lbrace],"{"@q}@>);
1728    strcpy(cat_name[rbrace],@q{@>"}");
1729    strcpy(cat_name[decl_head],"decl_head");
1730    strcpy(cat_name[comma],",");
1731    strcpy(cat_name[lpar],"(");
1732    strcpy(cat_name[rpar],")");
1733    strcpy(cat_name[prelangle],"<");
1734    strcpy(cat_name[prerangle],">");
1735    strcpy(cat_name[langle],"\\<");
1736    strcpy(cat_name[colcol],"::");
1737    strcpy(cat_name[base],"\\:");
1738    strcpy(cat_name[decl],"decl");
1739    strcpy(cat_name[struct_head],"struct_head");
1740    strcpy(cat_name[alfop],"alfop");
1741    strcpy(cat_name[stmt],"stmt");
1742    strcpy(cat_name[function],"function");
1743    strcpy(cat_name[fn_decl],"fn_decl");
1744    strcpy(cat_name[else_like],"else_like");
1745    strcpy(cat_name[semi],";");
1746    strcpy(cat_name[colon],":");
1747    strcpy(cat_name[tag],"tag");
1748    strcpy(cat_name[if_head],"if_head");
1749    strcpy(cat_name[else_head],"else_head");
1750    strcpy(cat_name[if_clause],"if()");
1751    strcpy(cat_name[lproc],"#{"@q}@>);
1752    strcpy(cat_name[rproc],@q{@>"#}");
1753    strcpy(cat_name[insert],"insert");
1754    strcpy(cat_name[section_scrap],"section");
1755    strcpy(cat_name[dead],"@@d");
1756    strcpy(cat_name[public_like],"public");
1757    strcpy(cat_name[operator_like],"operator");
1758    strcpy(cat_name[new_like],"new");
1759    strcpy(cat_name[catch_like],"catch");
1760    strcpy(cat_name[for_like],"for");
1761    strcpy(cat_name[do_like],"do");
1762    strcpy(cat_name[if_like],"if");
1763    strcpy(cat_name[delete_like],"delete");
1764    strcpy(cat_name[raw_ubin],"ubinop?");
1765    strcpy(cat_name[const_like],"const");
1766    strcpy(cat_name[raw_int],"raw");
1767    strcpy(cat_name[int_like],"int");
1768    strcpy(cat_name[case_like],"case");
1769    strcpy(cat_name[sizeof_like],"sizeof");
1770    strcpy(cat_name[struct_like],"struct");
1771    strcpy(cat_name[typedef_like],"typedef");
1772    strcpy(cat_name[define_like],"define");
1773    strcpy(cat_name[template_like],"template");
1774    strcpy(cat_name[ftemplate],"ftemplate");
1775    strcpy(cat_name[new_exp],"new_exp");
1776    strcpy(cat_name[begin_arg],"@@["@q]@>);
1777    strcpy(cat_name[end_arg],@q[@>"@@]");
1778    strcpy(cat_name[0],"zero");
1779
1780@ This code allows \.{CWEAVE} to display its parsing steps.
1781
1782@c
1783void
1784print_cat(c) /* symbolic printout of a category */
1785eight_bits c;
1786{
1787  printf(cat_name[c]);
1788}
1789
1790@ The token lists for translated \TEX/ output contain some special control
1791symbols as well as ordinary characters. These control symbols are
1792interpreted by \.{CWEAVE} before they are written to the output file.
1793
1794\yskip\hang |break_space| denotes an optional line break or an en space;
1795
1796\yskip\hang |force| denotes a line break;
1797
1798\yskip\hang |big_force| denotes a line break with additional vertical space;
1799
1800\yskip\hang |preproc_line| denotes that the line will be printed flush left;
1801
1802\yskip\hang |opt| denotes an optional line break (with the continuation
1803line indented two ems with respect to the normal starting position)---this
1804code is followed by an integer |n|, and the break will occur with penalty
1805$10n$;
1806
1807\yskip\hang |backup| denotes a backspace of one em;
1808
1809\yskip\hang |cancel| obliterates any |break_space|, |opt|, |force|, or
1810|big_force| tokens that immediately precede or follow it and also cancels any
1811|backup| tokens that follow it;
1812
1813\yskip\hang |indent| causes future lines to be indented one more em;
1814
1815\yskip\hang |outdent| causes future lines to be indented one less em.
1816
1817\yskip\noindent All of these tokens are removed from the \TEX/ output that
1818comes from \CEE/ text between \pb\ signs; |break_space| and |force| and
1819|big_force| become single spaces in this mode. The translation of other
1820\CEE/ texts results in \TEX/ control sequences \.{\\1}, \.{\\2},
1821\.{\\3}, \.{\\4}, \.{\\5}, \.{\\6}, \.{\\7}, \.{\\8}
1822corresponding respectively to
1823|indent|, |outdent|, |opt|, |backup|, |break_space|, |force|,
1824|big_force| and |preproc_line|.
1825However, a sequence of consecutive `\.\ ', |break_space|,
1826|force|, and/or |big_force| tokens is first replaced by a single token
1827(the maximum of the given ones).
1828
1829The token |math_rel| will be translated into
1830\.{\\MRL\{}, and it will get a matching \.\} later.
1831Other control sequences in the \TEX/ output will be
1832`\.{\\\\\{}$\,\ldots\,$\.\}'
1833surrounding identifiers, `\.{\\\&\{}$\,\ldots\,$\.\}' surrounding
1834reserved words, `\.{\\.\{}$\,\ldots\,$\.\}' surrounding strings,
1835`\.{\\C\{}$\,\ldots\,$\.\}$\,$|force|' surrounding comments, and
1836`\.{\\X$n$:}$\,\ldots\,$\.{\\X}' surrounding section names, where
1837|n| is the section number.
1838
1839@d math_rel 0206
1840@d big_cancel 0210 /* like |cancel|, also overrides spaces */
1841@d cancel 0211 /* overrides |backup|, |break_space|, |force|, |big_force| */
1842@d indent 0212 /* one more tab (\.{\\1}) */
1843@d outdent 0213 /* one less tab (\.{\\2}) */
1844@d opt 0214 /* optional break in mid-statement (\.{\\3}) */
1845@d backup 0215 /* stick out one unit to the left (\.{\\4}) */
1846@d break_space 0216 /* optional break between statements (\.{\\5}) */
1847@d force 0217 /* forced break between statements (\.{\\6}) */
1848@d big_force 0220 /* forced break with additional space (\.{\\7}) */
1849@d preproc_line 0221 /* begin line without indentation (\.{\\8}) */
1850@^high-bit character handling@>
1851@d quoted_char 0222
1852        /* introduces a character token in the range |0200|--|0377| */
1853@d end_translation 0223 /* special sentinel token at end of list */
1854@d inserted 0224 /* sentinel to mark translations of inserts */
1855@d qualifier 0225 /* introduces an explicit namespace qualifier */
1856
1857@ The raw input is converted into scraps according to the following table,
1858which gives category codes followed by the translations.
1859\def\stars {\.{**}}%
1860The symbol `\stars' stands for `\.{\\\&\{{\rm identifier}\}}',
1861i.e., the identifier itself treated as a reserved word.
1862The right-hand column is the so-called |mathness|, which is explained
1863further below.
1864
1865An identifier |c| of length 1 is translated as \.{\\\v c} instead of
1866as \.{\\\\\{c\}}. An identifier \.{CAPS} in all caps is translated as
1867\.{\\.\{CAPS\}} instead of as \.{\\\\\{CAPS\}}. An identifier that has
1868become a reserved word via |typedef| is translated with \.{\\\&} replacing
1869\.{\\\\} and |raw_int| replacing |exp|.
1870
1871A string of length greater than 20 is broken into pieces of size at most~20
1872with discretionary breaks in between.
1873
1874\yskip\halign{\quad#\hfil&\quad#\hfil&\quad\hfil#\hfil\cr
1875\.{!=}&|binop|: \.{\\I}&yes\cr
1876\.{<=}&|binop|: \.{\\Z}&yes\cr
1877\.{>=}&|binop|: \.{\\G}&yes\cr
1878\.{==}&|binop|: \.{\\E}&yes\cr
1879\.{\&\&}&|binop|: \.{\\W}&yes\cr
1880\.{\v\v}&|binop|: \.{\\V}&yes\cr
1881\.{++}&|unop|: \.{\\PP}&yes\cr
1882\.{--}&|unop|: \.{\\MM}&yes\cr
1883\.{->}&|binop|: \.{\\MG}&yes\cr
1884\.{>>}&|binop|: \.{\\GG}&yes\cr
1885\.{<<}&|binop|: \.{\\LL}&yes\cr
1886\.{::}&|colcol|: \.{\\DC}&maybe\cr
1887\.{.*}&|binop|: \.{\\PA}&yes\cr
1888\.{->*}&|binop|: \.{\\MGA}&yes\cr
1889\.{...}&|raw_int|: \.{\\,\\ldots\\,}&yes\cr
1890\."string\."&|exp|: \.{\\.\{}string with special characters quoted\.\}&maybe\cr
1891\.{@@=}string\.{@@>}&|exp|: \.{\\vb\{}string with special characters
1892  quoted\.\}&maybe\cr
1893\.{@@'7'}&|exp|: \.{\\.\{@@'7'\}}&maybe\cr
1894\.{077} or \.{\\77}&|exp|: \.{\\T\{\\\~77\}}&maybe\cr
1895\.{0x7f}&|exp|: \.{\\T\{\\\^7f\}}&maybe\cr
1896\.{77}&|exp|: \.{\\T\{77\}}&maybe\cr
1897\.{77L}&|exp|: \.{\\T\{77\\\$L\}}&maybe\cr
1898\.{0.1E5}&|exp|: \.{\\T\{0.1\\\_5\}}&maybe\cr
1899\.+&|ubinop|: \.+&yes\cr
1900\.-&|ubinop|: \.-&yes\cr
1901\.*&|raw_ubin|: \.*&yes\cr
1902\./&|binop|: \./&yes\cr
1903\.<&|prelangle|: \.{\\langle}&yes\cr
1904\.=&|binop|: \.{\\K}&yes\cr
1905\.>&|prerangle|: \.{\\rangle}&yes\cr
1906\..&|binop|: \..&yes\cr
1907\.{\v}&|binop|: \.{\\OR}&yes\cr
1908\.\^&|binop|: \.{\\XOR}&yes\cr
1909\.\%&|binop|: \.{\\MOD}&yes\cr
1910\.?&|question|: \.{\\?}&yes\cr
1911\.!&|unop|: \.{\\R}&yes\cr
1912\.\~&|unop|: \.{\\CM}&yes\cr
1913\.\&&|raw_ubin|: \.{\\AND}&yes\cr
1914\.(&|lpar|: \.(&maybe\cr
1915\.[&|lpar|: \.[&maybe\cr
1916\.)&|rpar|: \.)&maybe\cr
1917\.]&|rpar|: \.]&maybe\cr
1918\.\{&|lbrace|: \.\{&yes\cr
1919\.\}&|lbrace|: \.\}&yes\cr
1920\.,&|comma|: \.,&yes\cr
1921\.;&|semi|: \.;&maybe\cr
1922\.:&|colon|: \.:&no\cr
1923\.\# (within line)&|ubinop|: \.{\\\#}&yes\cr
1924\.\# (at beginning)&|lproc|:  |force| |preproc_line| \.{\\\#}&no\cr
1925end of \.\# line&|rproc|:  |force|&no\cr
1926identifier&|exp|: \.{\\\\\{}identifier with underlines and
1927             dollar signs quoted\.\}&maybe\cr
1928\.{and}&|alfop|: \stars&yes\cr
1929\.{and\_eq}&|alfop|: \stars&yes\cr
1930\.{asm}&|sizeof_like|: \stars&maybe\cr
1931\.{auto}&|int_like|: \stars&maybe\cr
1932\.{bitand}&|alfop|: \stars&yes\cr
1933\.{bitor}&|alfop|: \stars&yes\cr
1934\.{bool}&|raw_int|: \stars&maybe\cr
1935\.{break}&|case_like|: \stars&maybe\cr
1936\.{case}&|case_like|: \stars&maybe\cr
1937\.{catch}&|catch_like|: \stars&maybe\cr
1938\.{char}&|raw_int|: \stars&maybe\cr
1939\.{class}&|struct_like|: \stars&maybe\cr
1940\.{clock\_t}&|raw_int|: \stars&maybe\cr
1941\.{compl}&|alfop|: \stars&yes\cr
1942\.{const}&|const_like|: \stars&maybe\cr
1943\.{const\_cast}&|raw_int|: \stars&maybe\cr
1944\.{continue}&|case_like|: \stars&maybe\cr
1945\.{default}&|case_like|: \stars&maybe\cr
1946\.{define}&|define_like|: \stars&maybe\cr
1947\.{defined}&|sizeof_like|: \stars&maybe\cr
1948\.{delete}&|delete_like|: \stars&maybe\cr
1949\.{div\_t}&|raw_int|: \stars&maybe\cr
1950\.{do}&|do_like|: \stars&maybe\cr
1951\.{double}&|raw_int|: \stars&maybe\cr
1952\.{dynamic\_cast}&|raw_int|: \stars&maybe\cr
1953\.{elif}&|if_like|: \stars&maybe\cr
1954\.{else}&|else_like|: \stars&maybe\cr
1955\.{endif}&|if_like|: \stars&maybe\cr
1956\.{enum}&|struct_like|: \stars&maybe\cr
1957\.{error}&|if_like|: \stars&maybe\cr
1958\.{explicit}&|int_like|: \stars&maybe\cr
1959\.{export}&|int_like|: \stars&maybe\cr
1960\.{extern}&|int_like|: \stars&maybe\cr
1961\.{FILE}&|raw_int|: \stars&maybe\cr
1962\.{float}&|raw_int|: \stars&maybe\cr
1963\.{for}&|for_like|: \stars&maybe\cr
1964\.{fpos\_t}&|raw_int|: \stars&maybe\cr
1965\.{friend}&|int_like|: \stars&maybe\cr
1966\.{goto}&|case_like|: \stars&maybe\cr
1967\.{if}&|if_like|: \stars&maybe\cr
1968\.{ifdef}&|if_like|: \stars&maybe\cr
1969\.{ifndef}&|if_like|: \stars&maybe\cr
1970\.{include}&|if_like|: \stars&maybe\cr
1971\.{inline}&|int_like|: \stars&maybe\cr
1972\.{int}&|raw_int|: \stars&maybe\cr
1973\.{jmp\_buf}&|raw_int|: \stars&maybe\cr
1974\.{ldiv\_t}&|raw_int|: \stars&maybe\cr
1975\.{line}&|if_like|: \stars&maybe\cr
1976\.{long}&|raw_int|: \stars&maybe\cr
1977\.{make\_pair}&|ftemplate|: \.{\\\\\{make\\\_pair\}}&maybe\cr
1978\.{mutable}&|int_like|: \stars&maybe\cr
1979\.{namespace}&|struct_like|: \stars&maybe\cr
1980\.{new}&|new_like|: \stars&maybe\cr
1981\.{not}&|alfop|: \stars&yes\cr
1982\.{not\_eq}&|alfop|: \stars&yes\cr
1983\.{NULL}&|exp|: \.{\\NULL}&yes\cr
1984\.{offsetof}&|raw_int|: \stars&maybe\cr
1985\.{operator}&|operator_like|: \stars&maybe\cr
1986\.{or}&|alfop|: \stars&yes\cr
1987\.{or\_eq}&|alfop|: \stars&yes\cr
1988\.{pragma}&|if_like|: \stars&maybe\cr
1989\.{private}&|public_like|: \stars&maybe\cr
1990\.{protected}&|public_like|: \stars&maybe\cr
1991\.{ptrdiff\_t}&|raw_int|: \stars&maybe\cr
1992\.{public}&|public_like|: \stars&maybe\cr
1993\.{register}&|int_like|: \stars&maybe\cr
1994\.{reinterpret\_cast}&|raw_int|: \stars&maybe\cr
1995\.{return}&|case_like|: \stars&maybe\cr
1996\.{short}&|raw_int|: \stars&maybe\cr
1997\.{sig\_atomic\_t}&|raw_int|: \stars&maybe\cr
1998\.{signed}&|raw_int|: \stars&maybe\cr
1999\.{size\_t}&|raw_int|: \stars&maybe\cr
2000\.{sizeof}&|sizeof_like|: \stars&maybe\cr
2001\.{static}&|int_like|: \stars&maybe\cr
2002\.{static\_cast}&|raw_int|: \stars&maybe\cr
2003\.{struct}&|struct_like|: \stars&maybe\cr
2004\.{switch}&|for_like|: \stars&maybe\cr
2005\.{template}&|template_like|: \stars&maybe\cr
2006\.{TeX}&|exp|: \.{\\TeX}&yes\cr
2007\.{this}&|exp|: \.{\\this}&yes\cr
2008\.{throw}&|case_like|: \stars&maybe\cr
2009\.{time\_t}&|raw_int|: \stars&maybe\cr
2010\.{try}&|else_like|: \stars&maybe\cr
2011\.{typedef}&|typedef_like|: \stars&maybe\cr
2012\.{typeid}&|raw_int|: \stars&maybe\cr
2013\.{typename}&|struct_like|: \stars&maybe\cr
2014\.{undef}&|if_like|: \stars&maybe\cr
2015\.{union}&|struct_like|: \stars&maybe\cr
2016\.{unsigned}&|raw_int|: \stars&maybe\cr
2017\.{using}&|int_like|: \stars&maybe\cr
2018\.{va\_dcl}&|decl|: \stars&maybe\cr
2019\.{va\_list}&|raw_int|: \stars&maybe\cr
2020\.{virtual}&|int_like|: \stars&maybe\cr
2021\.{void}&|raw_int|: \stars&maybe\cr
2022\.{volatile}&|const_like|: \stars&maybe\cr
2023\.{wchar\_t}&|raw_int|: \stars&maybe\cr
2024\.{while}&|for_like|: \stars&maybe\cr
2025\.{xor}&|alfop|: \stars&yes\cr
2026\.{xor\_eq}&|alfop|: \stars&yes\cr
2027\.{@@,}&|insert|: \.{\\,}&maybe\cr
2028\.{@@\v}&|insert|:  |opt| \.0&maybe\cr
2029\.{@@/}&|insert|:  |force|&no\cr
2030\.{@@\#}&|insert|:  |big_force|&no\cr
2031\.{@@+}&|insert|:  |big_cancel| \.{\{\}} |break_space|
2032  \.{\{\}} |big_cancel|&no\cr
2033\.{@@;}&|semi|: &maybe\cr
2034\.{@@[@q]@>}&|begin_arg|: &maybe\cr
2035\.{@q[@>@@]}&|end_arg|: &maybe\cr
2036\.{@@\&}&|insert|: \.{\\J}&maybe\cr
2037\.{@@h}&|insert|: |force| \.{\\ATH} |force|&no\cr
2038\.{@@<}\thinspace section name\thinspace\.{@@>}&|section_scrap|:
2039 \.{\\X}$n$\.:translated section name\.{\\X}&maybe\cr
2040\.{@@(@q)@>}\thinspace section name\thinspace\.{@@>}&|section_scrap|:
2041 \.{\\X}$n$\.{:\\.\{}section name with special characters
2042      quoted\.{\ \}\\X}&maybe\cr
2043\.{/*}comment\.{*/}&|insert|: |cancel|
2044      \.{\\C\{}translated comment\.\} |force|&no\cr
2045\.{//}comment&|insert|: |cancel|
2046      \.{\\SHC\{}translated comment\.\} |force|&no\cr
2047}
2048
2049\smallskip
2050The construction \.{@@t}\thinspace stuff\/\thinspace\.{@@>} contributes
2051\.{\\hbox\{}\thinspace  stuff\/\thinspace\.\} to the following scrap.
2052
2053@i prod.w
2054
2055@* Implementing the productions.
2056More specifically, a scrap is a structure consisting of a category
2057|cat| and a |text_pointer| |trans|, which points to the translation in
2058|tok_start|.  When \CEE/ text is to be processed with the grammar above,
2059we form an array |scrap_info| containing the initial scraps.
2060Our production rules have the nice property that the right-hand side is never
2061longer than the left-hand side. Therefore it is convenient to use sequential
2062allocation for the current sequence of scraps. Five pointers are used to
2063manage the parsing:
2064
2065\yskip\hang |pp| is a pointer into |scrap_info|.  We will try to match
2066the category codes |pp->cat,@,@,(pp+1)->cat|$,\,\,\ldots\,$
2067to the left-hand sides of productions.
2068
2069\yskip\hang |scrap_base|, |lo_ptr|, |hi_ptr|, and |scrap_ptr| are such that
2070the current sequence of scraps appears in positions |scrap_base| through
2071|lo_ptr| and |hi_ptr| through |scrap_ptr|, inclusive, in the |cat| and
2072|trans| arrays. Scraps located between |scrap_base| and |lo_ptr| have
2073been examined, while those in positions |>=hi_ptr| have not yet been
2074looked at by the parsing process.
2075
2076\yskip\noindent Initially |scrap_ptr| is set to the position of the final
2077scrap to be parsed, and it doesn't change its value. The parsing process
2078makes sure that |lo_ptr>=pp+3|, since productions have as many as four terms,
2079by moving scraps from |hi_ptr| to |lo_ptr|. If there are
2080fewer than |pp+3| scraps left, the positions up to |pp+3| are filled with
2081blanks that will not match in any productions. Parsing stops when
2082|pp==lo_ptr+1| and |hi_ptr==scrap_ptr+1|.
2083
2084Since the |scrap| structure will later be used for other purposes, we
2085declare its second element as a union.
2086
2087@<Type...@>=
2088typedef struct {
2089  eight_bits cat;
2090  eight_bits mathness;
2091  union {
2092    text_pointer Trans;
2093    @<Rest of |trans_plus| union@>@;
2094  } trans_plus;
2095} scrap;
2096typedef scrap *scrap_pointer;
2097
2098@ @d trans trans_plus.Trans /* translation texts of scraps */
2099
2100@<Global...@>=
2101scrap scrap_info[max_scraps]; /* memory array for scraps */
2102scrap_pointer scrap_info_end=scrap_info+max_scraps -1; /* end of |scrap_info| */
2103scrap_pointer pp; /* current position for reducing productions */
2104scrap_pointer scrap_base; /* beginning of the current scrap sequence */
2105scrap_pointer scrap_ptr; /* ending of the current scrap sequence */
2106scrap_pointer lo_ptr; /* last scrap that has been examined */
2107scrap_pointer hi_ptr; /* first scrap that has not been examined */
2108scrap_pointer max_scr_ptr; /* largest value assumed by |scrap_ptr| */
2109
2110@ @<Set init...@>=
2111scrap_base=scrap_info+1;
2112max_scr_ptr=scrap_ptr=scrap_info;
2113
2114@ Token lists in |@!tok_mem| are composed of the following kinds of
2115items for \TEX/ output.
2116
2117\yskip\item{$\bullet$}Character codes and special codes like |force| and
2118|math_rel| represent themselves;
2119
2120\item{$\bullet$}|id_flag+p| represents \.{\\\\\{{\rm identifier $p$}\}};
2121
2122\item{$\bullet$}|res_flag+p| represents \.{\\\&\{{\rm identifier $p$}\}};
2123
2124\item{$\bullet$}|section_flag+p| represents section name |p|;
2125
2126\item{$\bullet$}|tok_flag+p| represents token list number |p|;
2127
2128\item{$\bullet$}|inner_tok_flag+p| represents token list number |p|, to be
2129translated without line-break controls.
2130
2131@d id_flag 10240 /* signifies an identifier */
2132@d res_flag 2*id_flag /* signifies a reserved word */
2133@d section_flag 3*id_flag /* signifies a section name */
2134@d tok_flag 4*id_flag /* signifies a token list */
2135@d inner_tok_flag 5*id_flag /* signifies a token list in `\pb' */
2136
2137@c
2138void
2139print_text(p) /* prints a token list for debugging; not used in |main| */
2140text_pointer p;
2141{
2142  token_pointer j; /* index into |tok_mem| */
2143  sixteen_bits r; /* remainder of token after the flag has been stripped off */
2144  if (p>=text_ptr) printf("BAD");
2145  else for (j=*p; j<*(p+1); j++) {
2146    r=*j%id_flag;
2147    switch (*j/id_flag) {
2148      case 1: printf("\\\\{"@q}@>); print_id((name_dir+r)); printf(@q{@>"}");
2149        break; /* |id_flag| */
2150      case 2: printf("\\&{"@q}@>); print_id((name_dir+r)); printf(@q{@>"}");
2151        break; /* |res_flag| */
2152      case 3: printf("<"); print_section_name((name_dir+r)); printf(">");
2153        break; /* |section_flag| */
2154      case 4: printf("[[%d]]",r); break; /* |tok_flag| */
2155      case 5: printf("|[[%d]]|",r); break; /* |inner_tok_flag| */
2156      default: @<Print token |r| in symbolic form@>;
2157    }
2158  }
2159  fflush(stdout);
2160}
2161
2162@ @<Print token |r|...@>=
2163switch (r) {
2164  case math_rel: printf("\\mathrel{"@q}@>); break;
2165  case big_cancel: printf("[ccancel]"); break;
2166  case cancel: printf("[cancel]"); break;
2167  case indent: printf("[indent]"); break;
2168  case outdent: printf("[outdent]"); break;
2169  case backup: printf("[backup]"); break;
2170  case opt: printf("[opt]"); break;
2171  case break_space: printf("[break]"); break;
2172  case force: printf("[force]"); break;
2173  case big_force: printf("[fforce]"); break;
2174  case preproc_line: printf("[preproc]"); break;
2175  case quoted_char: j++; printf("[%o]",(unsigned)*j); break;
2176  case end_translation: printf("[quit]"); break;
2177  case inserted: printf("[inserted]"); break;
2178  default: putxchar(r);
2179}
2180
2181@ The production rules listed above are embedded directly into \.{CWEAVE},
2182since it is easier to do this than to write an interpretive system
2183that would handle production systems in general. Several macros are defined
2184here so that the program for each production is fairly short.
2185
2186All of our productions conform to the general notion that some |k|
2187consecutive scraps starting at some position |j| are to be replaced by a
2188single scrap of some category |c| whose translation is composed from the
2189translations of the disappearing scraps. After this production has been
2190applied, the production pointer |pp| should change by an amount |d|. Such
2191a production can be represented by the quadruple |(j,k,c,d)|. For example,
2192the production `|exp@,comma@,exp| $\RA$ |exp|' would be represented by
2193`|(pp,3,exp,-2)|'; in this case the pointer |pp| should decrease by 2
2194after the production has been applied, because some productions with
2195|exp| in their second or third positions might now match,
2196but no productions have
2197|exp| in the fourth position of their left-hand sides. Note that
2198the value of |d| is determined by the whole collection of productions, not
2199by an individual one.
2200The determination of |d| has been
2201done by hand in each case, based on the full set of productions but not on
2202the grammar of \CEE/ or on the rules for constructing the initial
2203scraps.
2204
2205We also attach a serial number to each production, so that additional
2206information is available when debugging. For example, the program below
2207contains the statement `|reduce(pp,3,exp,-2,4)|' when it implements
2208the production just mentioned.
2209
2210Before calling |reduce|, the program should have appended the tokens of
2211the new translation to the |tok_mem| array. We commonly want to append
2212copies of several existing translations, and macros are defined to
2213simplify these common cases. For example, \\{app2}|(pp)| will append the
2214translations of two consecutive scraps, |pp->trans| and |(pp+1)->trans|, to
2215the current token list. If the entire new translation is formed in this
2216way, we write `|squash(j,k,c,d,n)|' instead of `|reduce(j,k,c,d,n)|'. For
2217example, `|squash(pp,3,exp,-2,3)|' is an abbreviation for `\\{app3}|(pp);
2218reduce(pp,3,exp,-2,3)|'.
2219
2220A couple more words of explanation:
2221Both |big_app| and |app| append a token (while |big_app1| to |big_app4|
2222append the specified number of scrap translations) to the current token list.
2223The difference between |big_app| and |app| is simply that |big_app|
2224checks whether there can be a conflict between math and non-math
2225tokens, and intercalates a `\.{\$}' token if necessary.  When in
2226doubt what to use, use |big_app|.
2227
2228The |mathness| is an attribute of scraps that says whether they are
2229to be printed in a math mode context or not.  It is separate from the
2230``part of speech'' (the |cat|) because to make each |cat| have
2231a fixed |mathness| (as in the original \.{WEAVE}) would multiply the
2232number of necessary production rules.
2233
2234The low two bits (i.e. |mathness % 4|) control the left boundary.
2235(We need two bits because we allow cases |yes_math|, |no_math| and
2236|maybe_math|, which can go either way.)
2237The next two bits (i.e. |mathness / 4|) control the right boundary.
2238If we combine two scraps and the right boundary of the first has
2239a different mathness from the left boundary of the second, we
2240insert a \.{\$} in between.  Similarly, if at printing time some
2241irreducible scrap has a |yes_math| boundary the scrap gets preceded
2242or followed by a \.{\$}. The left boundary is |maybe_math| if and
2243only if the right boundary is.
2244
2245The code below is an exact translation of the production rules into
2246\CEE/, using such macros, and the reader should have no difficulty
2247understanding the format by comparing the code with the symbolic
2248productions as they were listed earlier.
2249
2250@d no_math 2 /* should be in horizontal mode */
2251@d yes_math 1 /* should be in math mode */
2252@d maybe_math 0 /* works in either horizontal or math mode */
2253@d big_app2(a) big_app1(a);big_app1(a+1)
2254@d big_app3(a) big_app2(a);big_app1(a+2)
2255@d big_app4(a) big_app3(a);big_app1(a+3)
2256@d app(a) *(tok_ptr++)=a
2257@d app1(a) *(tok_ptr++)=tok_flag+(int)((a)->trans-tok_start)
2258
2259@<Global...@>=
2260int cur_mathness, init_mathness;
2261
2262@ @c
2263void
2264app_str(s)
2265char *s;
2266{
2267  while (*s) app_tok(*(s++));
2268}
2269
2270void
2271big_app(a)
2272token a;
2273{
2274        if (a==' ' || (a>=big_cancel && a<=big_force)) /* non-math token */ {
2275                if (cur_mathness==maybe_math) init_mathness=no_math;
2276                else if (cur_mathness==yes_math) app_str("{}$");
2277                cur_mathness=no_math;
2278        }
2279        else {
2280                if (cur_mathness==maybe_math) init_mathness=yes_math;
2281                else if (cur_mathness==no_math) app_str("${}");
2282                cur_mathness=yes_math;
2283        }
2284        app(a);
2285}
2286
2287void
2288big_app1(a)
2289scrap_pointer a;
2290{
2291  switch (a->mathness % 4) { /* left boundary */
2292  case (no_math):
2293    if (cur_mathness==maybe_math) init_mathness=no_math;
2294    else if (cur_mathness==yes_math) app_str("{}$");
2295    cur_mathness=a->mathness / 4; /* right boundary */
2296    break;
2297  case (yes_math):
2298    if (cur_mathness==maybe_math) init_mathness=yes_math;
2299    else if (cur_mathness==no_math) app_str("${}");
2300    cur_mathness=a->mathness / 4; /* right boundary */
2301    break;
2302  case (maybe_math): /* no changes */ break;
2303  }
2304  app(tok_flag+(int)((a)->trans-tok_start));
2305}
2306
2307@ Let us consider the big switch for productions now, before looking
2308at its context. We want to design the program so that this switch
2309works, so we might as well not keep ourselves in suspense about exactly what
2310code needs to be provided with a proper environment.
2311
2312@d cat1 (pp+1)->cat
2313@d cat2 (pp+2)->cat
2314@d cat3 (pp+3)->cat
2315@d lhs_not_simple (pp->cat!=public_like
2316        && pp->cat!=semi
2317        && pp->cat!=prelangle
2318        && pp->cat!=prerangle
2319        && pp->cat!=template_like
2320        && pp->cat!=new_like
2321        && pp->cat!=new_exp
2322        && pp->cat!=ftemplate
2323        && pp->cat!=raw_ubin
2324        && pp->cat!=const_like
2325        && pp->cat!=raw_int
2326        && pp->cat!=operator_like)
2327 /* not a production with left side length 1 */
2328
2329@<Match a production at |pp|, or increase |pp| if there is no match@>= {
2330  if (cat1==end_arg && lhs_not_simple)
2331    if (pp->cat==begin_arg) squash(pp,2,exp,-2,124);
2332    else squash(pp,2,end_arg,-1,125);
2333  else if (cat1==insert) squash(pp,2,pp->cat,-2,0);
2334  else if (cat2==insert) squash(pp+1,2,(pp+1)->cat,-1,0);
2335  else if (cat3==insert) squash(pp+2,2,(pp+2)->cat,0,0);
2336  else
2337  switch (pp->cat) {
2338    case exp: @<Cases for |exp|@>; @+break;
2339    case lpar: @<Cases for |lpar|@>; @+break;
2340    case unop: @<Cases for |unop|@>; @+break;
2341    case ubinop: @<Cases for |ubinop|@>; @+break;
2342    case binop: @<Cases for |binop|@>; @+break;
2343    case cast: @<Cases for |cast|@>; @+break;
2344    case sizeof_like: @<Cases for |sizeof_like|@>; @+break;
2345    case int_like: @<Cases for |int_like|@>; @+break;
2346    case public_like: @<Cases for |public_like|@>; @+break;
2347    case colcol: @<Cases for |colcol|@>; @+break;
2348    case decl_head: @<Cases for |decl_head|@>; @+break;
2349    case decl: @<Cases for |decl|@>; @+break;
2350    case base: @<Cases for |base|@>; @+break;
2351    case struct_like: @<Cases for |struct_like|@>; @+break;
2352    case struct_head: @<Cases for |struct_head|@>; @+break;
2353    case fn_decl: @<Cases for |fn_decl|@>; @+break;
2354    case function: @<Cases for |function|@>; @+break;
2355    case lbrace: @<Cases for |lbrace|@>; @+break;
2356    case if_like: @<Cases for |if_like|@>; @+break;
2357    case else_like: @<Cases for |else_like|@>; @+break;
2358    case else_head: @<Cases for |else_head|@>; @+break;
2359    case if_clause: @<Cases for |if_clause|@>; @+break;
2360    case if_head: @<Cases for |if_head|@>; @+break;
2361    case do_like: @<Cases for |do_like|@>; @+break;
2362    case case_like: @<Cases for |case_like|@>; @+break;
2363    case catch_like: @<Cases for |catch_like|@>; @+break;
2364    case tag: @<Cases for |tag|@>; @+break;
2365    case stmt: @<Cases for |stmt|@>; @+break;
2366    case semi: @<Cases for |semi|@>; @+break;
2367    case lproc: @<Cases for |lproc|@>; @+break;
2368    case section_scrap: @<Cases for |section_scrap|@>; @+break;
2369    case insert: @<Cases for |insert|@>; @+break;
2370    case prelangle: @<Cases for |prelangle|@>; @+break;
2371    case prerangle: @<Cases for |prerangle|@>; @+break;
2372    case langle: @<Cases for |langle|@>; @+break;
2373    case template_like: @<Cases for |template_like|@>; @+break;
2374    case new_like: @<Cases for |new_like|@>; @+break;
2375    case new_exp: @<Cases for |new_exp|@>; @+break;
2376    case ftemplate: @<Cases for |ftemplate|@>; @+break;
2377    case for_like: @<Cases for |for_like|@>; @+break;
2378    case raw_ubin: @<Cases for |raw_ubin|@>; @+break;
2379    case const_like: @<Cases for |const_like|@>; @+break;
2380    case raw_int: @<Cases for |raw_int|@>; @+break;
2381    case operator_like: @<Cases for |operator_like|@>; @+break;
2382    case typedef_like: @<Cases for |typedef_like|@>; @+break;
2383    case delete_like: @<Cases for |delete_like|@>; @+break;
2384    case question: @<Cases for |question|@>; @+break;
2385  }
2386  pp++; /* if no match was found, we move to the right */
2387}
2388
2389@ In \CEE/, new specifier names can be defined via |typedef|, and we want
2390to make the parser recognize future occurrences of the identifier thus
2391defined as specifiers.  This is done by the procedure |make_reserved|,
2392which changes the |ilk| of the relevant identifier.
2393
2394We first need a procedure to recursively seek the first
2395identifier in a token list, because the identifier might
2396be enclosed in parentheses, as when one defines a function
2397returning a pointer.
2398
2399If the first identifier found is a keyword like `\&{case}', we
2400return the special value |case_found|; this prevents underlining
2401of identifiers in case labels.
2402
2403If the first identifier is the keyword `\&{operator}', we give up;
2404users who want to index definitions of overloaded \CPLUSPLUS/ operators
2405should say, for example, `\.{@@!@@\^\\\&\{operator\} \$+\{=\}\$@@>}' (or,
2406more properly alphebetized,
2407`\.{@@!@@:operator+=\}\{\\\&\{operator\} \$+\{=\}\$@@>}').
2408
2409@d no_ident_found (token_pointer)0 /* distinct from any identifier token */
2410@d case_found (token_pointer)1 /* likewise */
2411@d operator_found (token_pointer)2 /* likewise */
2412
2413@c
2414token_pointer
2415find_first_ident(p)
2416text_pointer p;
2417{
2418  token_pointer q; /* token to be returned */
2419  token_pointer j; /* token being looked at */
2420  sixteen_bits r; /* remainder of token after the flag has been stripped off */
2421  if (p>=text_ptr) confusion("find_first_ident");
2422  for (j=*p; j<*(p+1); j++) {
2423    r=*j%id_flag;
2424    switch (*j/id_flag) {
2425      case 2: /* |res_flag| */
2426        if (name_dir[r].ilk==case_like) return case_found;
2427        if (name_dir[r].ilk==operator_like) return operator_found;
2428        if (name_dir[r].ilk!=raw_int) break;
2429      case 1: return j;
2430      case 4: case 5: /* |tok_flag| or |inner_tok_flag| */
2431        if ((q=find_first_ident(tok_start+r))!=no_ident_found)
2432          return q;
2433      default: ; /* char, |section_flag|, fall thru: move on to next token */
2434        if (*j==inserted) return no_ident_found; /* ignore inserts */
2435        else if (*j==qualifier) j++; /* bypass namespace qualifier */
2436    }
2437  }
2438  return no_ident_found;
2439}
2440
2441@ The scraps currently being parsed must be inspected for any
2442occurrence of the identifier that we're making reserved; hence
2443the |for| loop below.
2444
2445@c
2446void
2447make_reserved(p) /* make the first identifier in |p->trans| like |int| */
2448scrap_pointer p;
2449{
2450  sixteen_bits tok_value; /* the name of this identifier, plus its flag*/
2451  token_pointer tok_loc; /* pointer to |tok_value| */
2452  if ((tok_loc=find_first_ident(p->trans))<=operator_found)
2453    return; /* this should not happen */
2454  tok_value=*tok_loc;
2455  for (;p<=scrap_ptr; p==lo_ptr? p=hi_ptr: p++) {
2456    if (p->cat==exp) {
2457      if (**(p->trans)==tok_value) {
2458        p->cat=raw_int;
2459        **(p->trans)=tok_value%id_flag+res_flag;
2460      }
2461    }
2462  }
2463  (name_dir+(sixteen_bits)(tok_value%id_flag))->ilk=raw_int;
2464  *tok_loc=tok_value%id_flag+res_flag;
2465}
2466
2467@ In the following situations we want to mark the occurrence of
2468an identifier as a definition: when |make_reserved| is just about to be
2469used; after a specifier, as in |char **argv|;
2470before a colon, as in \\{found}:; and in the declaration of a function,
2471as in \\{main}()$\{\ldots;\}$.  This is accomplished by the invocation
2472of |make_underlined| at appropriate times.  Notice that, in the declaration
2473of a function, we find out that the identifier is being defined only after
2474it has been swallowed up by an |exp|.
2475
2476@c
2477void
2478make_underlined(p)
2479/* underline the entry for the first identifier in |p->trans| */
2480scrap_pointer p;
2481{
2482  token_pointer tok_loc; /* where the first identifier appears */
2483  if ((tok_loc=find_first_ident(p->trans))<=operator_found)
2484    return; /* this happens, for example, in |case found:| */
2485  xref_switch=def_flag;
2486  underline_xref(*tok_loc%id_flag+name_dir);
2487}
2488
2489@ We cannot use |new_xref| to underline a cross-reference at this point
2490because this would just make a new cross-reference at the end of the list.
2491We actually have to search through the list for the existing
2492cross-reference.
2493
2494@<Predecl...@>=
2495void  underline_xref();
2496
2497@ @c
2498void
2499underline_xref(p)
2500name_pointer p;
2501{
2502  xref_pointer q=(xref_pointer)p->xref; /* pointer to cross-reference being examined */
2503  xref_pointer r; /* temporary pointer for permuting cross-references */
2504  sixteen_bits m; /* cross-reference value to be installed */
2505  sixteen_bits n; /* cross-reference value being examined */
2506  if (no_xref) return;
2507  m=section_count+xref_switch;
2508  while (q != xmem) {
2509    n=q->num;
2510    if (n==m) return;
2511    else if (m==n+def_flag) {
2512        q->num=m; return;
2513    }
2514    else if (n>=def_flag && n<m) break;
2515    q=q->xlink;
2516  }
2517  @<Insert new cross-reference at |q|, not at beginning of list@>;
2518}
2519
2520@ We get to this section only when the identifier is one letter long,
2521so it didn't get a non-underlined entry during phase one.  But it may
2522have got some explicitly underlined entries in later sections, so in order
2523to preserve the numerical order of the entries in the index, we have
2524to insert the new cross-reference not at the beginning of the list
2525(namely, at |p->xref|), but rather right before |q|.
2526
2527@<Insert new cross-reference at |q|...@>=
2528  append_xref(0); /* this number doesn't matter */
2529  xref_ptr->xlink=(xref_pointer)p->xref; r=xref_ptr;
2530  p->xref=(char*)xref_ptr;
2531  while (r->xlink!=q) {r->num=r->xlink->num; r=r->xlink;}
2532  r->num=m; /* everything from |q| on is left undisturbed */
2533
2534@ Now comes the code that tries to match each production starting
2535with a particular type of scrap. Whenever a match is discovered,
2536the |squash| or |reduce| macro will cause the appropriate action
2537to be performed, followed by |goto found|.
2538
2539@<Cases for |exp|@>=
2540if (cat1==lbrace || cat1==int_like || cat1==decl) {
2541  make_underlined(pp); big_app1(pp); big_app(indent); app(indent);
2542  reduce(pp,1,fn_decl,0,1);
2543}
2544else if (cat1==unop) squash(pp,2,exp,-2,2);
2545else if ((cat1==binop || cat1==ubinop) && cat2==exp)
2546        squash(pp,3,exp,-2,3);
2547else if (cat1==comma && cat2==exp) {
2548  big_app2(pp);
2549  app(opt); app('9'); big_app1(pp+2); reduce(pp,3,exp,-2,4);
2550}
2551else if (cat1==lpar && cat2==rpar && cat3==colon) squash(pp+3,1,base,0,5);
2552else if (cat1==cast && cat2==colon) squash(pp+2,1,base,0,5);
2553else if (cat1==semi) squash(pp,2,stmt,-1,6);
2554else if (cat1==colon) {
2555  make_underlined (pp);  squash(pp,2,tag,-1,7);
2556}
2557else if (cat1==rbrace) squash(pp,1,stmt,-1,8);
2558else if (cat1==lpar && cat2==rpar && (cat3==const_like || cat3==case_like)) {
2559  big_app1(pp+2); big_app(' '); big_app1(pp+3); reduce(pp+2,2,rpar,0,9);
2560}
2561else if (cat1==cast && (cat2==const_like || cat2==case_like)) {
2562  big_app1(pp+1); big_app(' '); big_app1(pp+2); reduce(pp+1,2,cast,0,9);
2563}
2564else if (cat1==exp || cat1==cast) squash(pp,2,exp,-2,10);
2565
2566@ @<Cases for |lpar|@>=
2567if ((cat1==exp||cat1==ubinop) && cat2==rpar) squash(pp,3,exp,-2,11);
2568else if (cat1==rpar) {
2569  big_app1(pp); app('\\'); app(','); big_app1(pp+1);
2570@.\\,@>
2571  reduce(pp,2,exp,-2,12);
2572}
2573else if ((cat1==decl_head || cat1==int_like || cat1==cast) && cat2==rpar)
2574 squash(pp,3,cast,-2,13);
2575else if ((cat1==decl_head || cat1==int_like || cat1==exp) && cat2==comma) {
2576  big_app3(pp); app(opt); app('9'); reduce(pp,3,lpar,-1,14);
2577}
2578else if (cat1==stmt || cat1==decl) {
2579  big_app2(pp); big_app(' '); reduce(pp,2,lpar,-1,15);
2580}
2581
2582@ @<Cases for |unop|@>=
2583if (cat1==exp || cat1==int_like) squash(pp,2,exp,-2,16);
2584
2585@ @<Cases for |ubinop|@>=
2586if (cat1==cast && cat2==rpar) {
2587  big_app('{'); big_app1(pp); big_app('}'); big_app1(pp+1);
2588  reduce(pp,2,cast,-2,17);
2589}
2590else if (cat1==exp || cat1==int_like) {
2591  big_app('{'); big_app1(pp); big_app('}'); big_app1(pp+1);
2592  reduce(pp,2,cat1,-2,18);
2593}
2594else if (cat1==binop) {
2595  big_app(math_rel); big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2596  big_app('}'); reduce(pp,2,binop,-1,19);
2597}
2598
2599@ @<Cases for |binop|@>=
2600if (cat1==binop) {
2601  big_app(math_rel); big_app('{'); big_app1(pp); big_app('}');
2602  big_app('{'); big_app1(pp+1); big_app('}');
2603  big_app('}'); reduce(pp,2,binop,-1,20);
2604}
2605
2606@ @<Cases for |cast|@>=
2607if (cat1==lpar) squash(pp,2,lpar,-1,21);
2608else if (cat1==exp) {
2609  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,21);
2610}
2611else if (cat1==semi) squash(pp,1,exp,-2,22);
2612
2613@ @<Cases for |sizeof_like|@>=
2614if (cat1==cast) squash(pp,2,exp,-2,23);
2615else if (cat1==exp) {
2616  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,24);
2617}
2618
2619@ @<Cases for |int_like|@>=
2620if (cat1==int_like|| cat1==struct_like) {
2621  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,cat1,-2,25);
2622}
2623else if (cat1==exp && (cat2==raw_int||cat2==struct_like))
2624  squash(pp,2,int_like,-2,26);
2625else if (cat1==exp || cat1==ubinop || cat1==colon) {
2626  big_app1(pp); big_app(' '); reduce(pp,1,decl_head,-1,27);
2627}
2628else if (cat1==semi || cat1==binop) squash(pp,1,decl_head,0,28);
2629
2630@ @<Cases for |public_like|@>=
2631if (cat1==colon) squash(pp,2,tag,-1,29);
2632else squash(pp,1,int_like,-2,30);
2633
2634@ @<Cases for |colcol|@>=
2635if (cat1==exp||cat1==int_like) {
2636  app(qualifier); squash(pp,2,cat1,-2,31);
2637}@+else if (cat1==colcol) squash(pp,2,colcol,-1,32);
2638
2639@ @<Cases for |decl_head|@>=
2640if (cat1==comma) {
2641  big_app2(pp); big_app(' '); reduce(pp,2,decl_head,-1,33);
2642}
2643else if (cat1==ubinop) {
2644  big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2645  reduce(pp,2,decl_head,-1,34);
2646}
2647else if (cat1==exp && cat2!=lpar && cat2!=exp && cat2!=cast) {
2648  make_underlined(pp+1); squash(pp,2,decl_head,-1,35);
2649}
2650else if ((cat1==binop||cat1==colon) && cat2==exp && (cat3==comma ||
2651    cat3==semi || cat3==rpar))
2652  squash(pp,3,decl_head,-1,36);
2653else if (cat1==cast) squash(pp,2,decl_head,-1,37);
2654else if (cat1==lbrace || cat1==int_like || cat1==decl) {
2655  big_app1(pp); big_app(indent); app(indent); reduce(pp,1,fn_decl,0,38);
2656}
2657else if (cat1==semi) squash(pp,2,decl,-1,39);
2658
2659@ @<Cases for |decl|@>=
2660if (cat1==decl) {
2661  big_app1(pp); big_app(force); big_app1(pp+1);
2662  reduce(pp,2,decl,-1,40);
2663}
2664else if (cat1==stmt || cat1==function) {
2665  big_app1(pp); big_app(big_force);
2666  big_app1(pp+1); reduce(pp,2,cat1,-1,41);
2667}
2668
2669@ @<Cases for |base|@>=
2670if (cat1==int_like || cat1==exp) {
2671  if (cat2==comma) {
2672    big_app1(pp); big_app(' '); big_app2(pp+1);
2673    app(opt); app('9'); reduce(pp,3,base,0,42);
2674  }
2675  else if (cat2==lbrace) {
2676    big_app1(pp); big_app(' '); big_app1(pp+1); big_app(' '); big_app1(pp+2);
2677    reduce(pp,3,lbrace,-2,43);
2678  }
2679}
2680
2681@ @<Cases for |struct_like|@>=
2682if (cat1==lbrace) {
2683  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,struct_head,0,44);
2684}
2685else if (cat1==exp||cat1==int_like) {
2686  if (cat2==lbrace || cat2==semi) {
2687    make_underlined(pp+1); make_reserved(pp+1);
2688    big_app1(pp); big_app(' '); big_app1(pp+1);
2689    if (cat2==semi) reduce(pp,2,decl_head,0,45);
2690    else {
2691      big_app(' '); big_app1(pp+2);reduce(pp,3,struct_head,0,46);
2692    }
2693  }
2694  else if (cat2==colon) squash(pp+2,1,base,2,47);
2695  else if (cat2!=base) {
2696    big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,int_like,-2,48);
2697  }
2698}
2699
2700@ @<Cases for |struct_head|@>=
2701if ((cat1==decl || cat1==stmt || cat1==function) && cat2==rbrace) {
2702  big_app1(pp); big_app(indent); big_app(force); big_app1(pp+1);
2703  big_app(outdent); big_app(force);  big_app1(pp+2);
2704  reduce(pp,3,int_like,-2,49);
2705}
2706else if (cat1==rbrace) {
2707  big_app1(pp); app_str("\\,"); big_app1(pp+1);
2708@.\\,@>
2709  reduce(pp,2,int_like,-2,50);
2710}
2711
2712@ @<Cases for |fn_decl|@>=
2713if (cat1==decl) {
2714  big_app1(pp); big_app(force); big_app1(pp+1); reduce(pp,2,fn_decl,0,51);
2715}
2716else if (cat1==stmt) {
2717  big_app1(pp); app(outdent); app(outdent); big_app(force);
2718  big_app1(pp+1); reduce(pp,2,function,-1,52);
2719}
2720
2721@ @<Cases for |function|@>=
2722if (cat1==function || cat1==decl || cat1==stmt) {
2723  big_app1(pp); big_app(big_force); big_app1(pp+1); reduce(pp,2,cat1,-1,53);
2724}
2725
2726@ @<Cases for |lbrace|@>=
2727if (cat1==rbrace) {
2728  big_app1(pp); app('\\'); app(','); big_app1(pp+1);
2729@.\\,@>
2730  reduce(pp,2,stmt,-1,54);
2731}
2732else if ((cat1==stmt||cat1==decl||cat1==function) && cat2==rbrace) {
2733  big_app(force); big_app1(pp);  big_app(indent); big_app(force);
2734  big_app1(pp+1); big_app(force); big_app(backup);  big_app1(pp+2);
2735  big_app(outdent); big_app(force); reduce(pp,3,stmt,-1,55);
2736}
2737else if (cat1==exp) {
2738  if (cat2==rbrace) squash(pp,3,exp,-2,56);
2739  else if (cat2==comma && cat3==rbrace) squash(pp,4,exp,-2,56);
2740}
2741
2742@ @<Cases for |if_like|@>=
2743if (cat1==exp) {
2744  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,if_clause,0,57);
2745}
2746
2747@ @<Cases for |else_like|@>=
2748if (cat1==colon) squash(pp+1,1,base,1,58);
2749else if (cat1==lbrace) squash(pp,1,else_head,0,59);
2750else if (cat1==stmt) {
2751  big_app(force); big_app1(pp); big_app(indent); big_app(break_space);
2752  big_app1(pp+1); big_app(outdent); big_app(force);
2753  reduce(pp,2,stmt,-1,60);
2754}
2755
2756@ @<Cases for |else_head|@>=
2757if (cat1==stmt || cat1==exp) {
2758  big_app(force); big_app1(pp); big_app(break_space); app(noop);
2759  big_app(cancel); big_app1(pp+1); big_app(force);
2760  reduce(pp,2,stmt,-1,61);
2761}
2762
2763@ @<Cases for |if_clause|@>=
2764if (cat1==lbrace) squash(pp,1,if_head,0,62);
2765else if (cat1==stmt) {
2766  if (cat2==else_like) {
2767    big_app(force); big_app1(pp); big_app(indent); big_app(break_space);
2768    big_app1(pp+1); big_app(outdent); big_app(force); big_app1(pp+2);
2769    if (cat3==if_like) {
2770      big_app(' '); big_app1(pp+3); reduce(pp,4,if_like,0,63);
2771    }@+else reduce(pp,3,else_like,0,64);
2772  }
2773  else squash(pp,1,else_like,0,65);
2774}
2775
2776@ @<Cases for |if_head|@>=
2777if (cat1==stmt || cat1==exp) {
2778  if (cat2==else_like) {
2779    big_app(force); big_app1(pp); big_app(break_space); app(noop);
2780    big_app(cancel); big_app1(pp+1); big_app(force); big_app1(pp+2);
2781    if (cat3==if_like) {
2782      big_app(' '); big_app1(pp+3); reduce(pp,4,if_like,0,66);
2783    }@+else reduce(pp,3,else_like,0,67);
2784  }
2785  else squash(pp,1,else_head,0,68);
2786}
2787
2788@ @<Cases for |do_like|@>=
2789if (cat1==stmt && cat2==else_like && cat3==semi) {
2790  big_app1(pp); big_app(break_space); app(noop); big_app(cancel);
2791  big_app1(pp+1); big_app(cancel); app(noop); big_app(break_space);
2792  big_app2(pp+2); reduce(pp,4,stmt,-1,69);
2793}
2794
2795@ @<Cases for |case_like|@>=
2796if (cat1==semi) squash(pp,2,stmt,-1,70);
2797else if (cat1==colon) squash(pp,2,tag,-1,71);
2798else if (cat1==exp) {
2799  big_app1(pp); big_app(' ');  big_app1(pp+1);  reduce(pp,2,exp,-2,72);
2800}
2801
2802@ @<Cases for |catch_like|@>=
2803if (cat1==cast || cat1==exp) {
2804  big_app2(pp); big_app(indent); big_app(indent); reduce(pp,2,fn_decl,0,73);
2805}
2806
2807@ @<Cases for |tag|@>=
2808if (cat1==tag) {
2809  big_app1(pp); big_app(break_space); big_app1(pp+1); reduce(pp,2,tag,-1,74);
2810}
2811else if (cat1==stmt||cat1==decl||cat1==function) {
2812  big_app(force); big_app(backup); big_app1(pp); big_app(break_space);
2813  big_app1(pp+1); reduce(pp,2,cat1,-1,75);
2814}
2815
2816@ The user can decide at run-time whether short statements should be
2817grouped together on the same line.
2818
2819@d force_lines flags['f'] /* should each statement be on its own line? */
2820@<Cases for |stmt|@>=
2821if (cat1==stmt||cat1==decl||cat1==function) {
2822  big_app1(pp);
2823  if (cat1==function) big_app(big_force);
2824  else if (cat1==decl) big_app(big_force);
2825  else if (force_lines) big_app(force);
2826  else big_app(break_space);
2827  big_app1(pp+1); reduce(pp,2,cat1,-1,76);
2828}
2829
2830@ @<Cases for |semi|@>=
2831big_app(' '); big_app1(pp); reduce(pp,1,stmt,-1,77);
2832
2833@ @<Cases for |lproc|@>=
2834if (cat1==define_like) make_underlined(pp+2);
2835if (cat1==else_like || cat1==if_like ||cat1==define_like)
2836  squash(pp,2,lproc,0,78);
2837else if (cat1==rproc) {
2838  app(inserted); big_app2(pp); reduce(pp,2,insert,-1,79);
2839} else if (cat1==exp || cat1==function) {
2840  if (cat2==rproc) {
2841    app(inserted); big_app1(pp); big_app(' '); big_app2(pp+1);
2842    reduce(pp,3,insert,-1,80);
2843  }
2844  else if (cat2==exp && cat3==rproc && cat1==exp) {
2845    app(inserted); big_app1(pp); big_app(' '); big_app1(pp+1); app_str(" \\5");
2846@.\\5@>
2847    big_app2(pp+2); reduce(pp,4,insert,-1,80);
2848  }
2849}
2850
2851@ @<Cases for |section_scrap|@>=
2852if (cat1==semi) {
2853  big_app2(pp); big_app(force); reduce(pp,2,stmt,-2,81);
2854}
2855else squash(pp,1,exp,-2,82);
2856
2857@ @<Cases for |insert|@>=
2858if (cat1)
2859  squash(pp,2,cat1,0,83);
2860
2861@ @<Cases for |prelangle|@>=
2862init_mathness=cur_mathness=yes_math;
2863app('<'); reduce(pp,1,binop,-2,84);
2864
2865@ @<Cases for |prerangle|@>=
2866init_mathness=cur_mathness=yes_math;
2867app('>'); reduce(pp,1,binop,-2,85);
2868
2869@ @<Cases for |langle|@>=
2870if (cat1==prerangle) {
2871  big_app1(pp); app('\\'); app(','); big_app1(pp+1);
2872@.\\,@>
2873  reduce(pp,2,cast,-1,86);
2874}
2875else if (cat1==decl_head || cat1==int_like || cat1==exp) {
2876  if (cat2==prerangle) squash(pp,3,cast,-1,87);
2877  else if (cat2==comma) {
2878    big_app3(pp); app(opt); app('9'); reduce(pp,3,langle,0,88);
2879  }
2880}
2881
2882@ @<Cases for |template_like|@>=
2883if (cat1==exp && cat2==prelangle) squash(pp+2,1,langle,2,89);
2884else if (cat1==exp || cat1==raw_int) {
2885  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,cat1,-2,90);
2886}@+ else squash(pp,1,raw_int,0,91);
2887
2888@ @<Cases for |new_like|@>=
2889if (cat1==lpar && cat2==exp && cat3==rpar) squash(pp,4,new_like,0,92);
2890else if (cat1==cast) {
2891  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,93);
2892}
2893else if (cat1!=lpar) squash(pp,1,new_exp,0,94);
2894
2895@ @<Cases for |new_exp|@>=
2896if (cat1==int_like || cat1==const_like) {
2897  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,new_exp,0,95);
2898}
2899else if (cat1==struct_like && (cat2==exp || cat2==int_like)) {
2900  big_app1(pp); big_app(' '); big_app1(pp+1); big_app(' ');
2901  big_app1(pp+2); reduce(pp,3,new_exp,0,96);
2902}
2903else if (cat1==raw_ubin) {
2904  big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2905  reduce(pp,2,new_exp,0,97);
2906}
2907else if (cat1==lpar) squash(pp,1,exp,-2,98);
2908else if (cat1==exp) {
2909  big_app1(pp); big_app(' '); reduce(pp,1,exp,-2,98);
2910}
2911else if (cat1!=raw_int && cat1!=struct_like && cat1!=colcol)
2912  squash(pp,1,exp,-2,99);
2913
2914@ @<Cases for |ftemplate|@>=
2915if (cat1==prelangle) squash(pp+1,1,langle,1,100);
2916else squash(pp,1,exp,-2,101);
2917
2918@ @<Cases for |for_like|@>=
2919if (cat1==exp) {
2920  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,else_like,-2,102);
2921}
2922
2923@ @<Cases for |raw_ubin|@>=
2924if (cat1==const_like) {
2925  big_app2(pp); app_str("\\ "); reduce(pp,2,raw_ubin,0,103);
2926@.\\\ @>
2927} else squash(pp,1,ubinop,-2,104);
2928
2929@ @<Cases for |const_like|@>=
2930squash(pp,1,int_like,-2,105);
2931
2932@ @<Cases for |raw_int|@>=
2933if (cat1==prelangle) squash(pp+1,1,langle,1,106);
2934else if (cat1==colcol) squash(pp,2,colcol,-1,107);
2935else if (cat1==cast) squash(pp,2,raw_int,0,108);
2936else if (cat1==lpar) squash(pp,1,exp,-2,109);
2937else if (cat1!=langle) squash(pp,1,int_like,-3,110);
2938
2939@ @<Cases for |operator_like|@>=
2940if (cat1==binop || cat1==unop || cat1==ubinop) {
2941  if (cat2==binop) break;
2942  big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
2943  reduce(pp,2,exp,-2,111);
2944}
2945else if (cat1==new_like || cat1==delete_like) {
2946  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,112);
2947}
2948else if (cat1==comma) squash(pp,2,exp,-2,113);
2949else if (cat1!=raw_ubin) squash(pp,1,new_exp,0,114);
2950
2951@ @<Cases for |typedef_like|@>=
2952if ((cat1==int_like || cat1==cast) && (cat2==comma || cat2==semi))
2953  squash(pp+1,1,exp,-1,115);
2954else if (cat1==int_like) {
2955  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,typedef_like,0,116);
2956}
2957else if (cat1==exp && cat2!=lpar && cat2!=exp && cat2!=cast) {
2958  make_underlined(pp+1); make_reserved(pp+1);
2959  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,typedef_like,0,117);
2960}
2961else if (cat1==comma) {
2962  big_app2(pp); big_app(' '); reduce(pp,2,typedef_like,0,118);
2963}
2964else if (cat1==semi) squash(pp,2,decl,-1,119);
2965else if (cat1==ubinop && (cat2==ubinop || cat2==cast)) {
2966  big_app('{'); big_app1(pp+1); big_app('}'); big_app1(pp+2);
2967  reduce(pp+1,2,cat2,0,120);
2968}
2969
2970@ @<Cases for |delete_like|@>=
2971if (cat1==lpar && cat2==rpar) {
2972  big_app2(pp); app('\\'); app(','); big_app1(pp+2);
2973@.\\,@>
2974  reduce(pp,3,delete_like,0,121);
2975}
2976else if (cat1==exp) {
2977  big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,122);
2978}
2979
2980@ @<Cases for |question|@>=
2981if (cat1==exp && (cat2==colon || cat2==base)) {
2982  (pp+2)->mathness=5*yes_math; /* this colon should be in math mode */
2983  squash(pp,3,binop,-2,123);
2984}
2985
2986@ Now here's the |reduce| procedure used in our code for productions.
2987
2988The `|freeze_text|' macro is used to give official status to a token list.
2989Before saying |freeze_text|, items are appended to the current token list,
2990and we know that the eventual number of this token list will be the current
2991value of |text_ptr|. But no list of that number really exists as yet,
2992because no ending point for the current list has been
2993stored in the |tok_start| array. After saying |freeze_text|, the
2994old current token list becomes legitimate, and its number is the current
2995value of |text_ptr-1| since |text_ptr| has been increased. The new
2996current token list is empty and ready to be appended to.
2997Note that |freeze_text| does not check to see that |text_ptr| hasn't gotten
2998too large, since it is assumed that this test was done beforehand.
2999
3000@d freeze_text *(++text_ptr)=tok_ptr
3001
3002@c
3003void
3004reduce(j,k,c,d,n)
3005scrap_pointer j;
3006eight_bits c;
3007short k, d, n;
3008{
3009  scrap_pointer i, i1; /* pointers into scrap memory */
3010  j->cat=c; j->trans=text_ptr;
3011  j->mathness=4*cur_mathness+init_mathness;
3012  freeze_text;
3013  if (k>1) {
3014    for (i=j+k, i1=j+1; i<=lo_ptr; i++, i1++) {
3015      i1->cat=i->cat; i1->trans=i->trans;
3016      i1->mathness=i->mathness;
3017    }
3018    lo_ptr=lo_ptr-k+1;
3019  }
3020  pp=(pp+d<scrap_base? scrap_base: pp+d);
3021  @<Print a snapshot of the scrap list if debugging @>;
3022  pp--; /* we next say |pp++| */
3023}
3024
3025@ Here's the |squash| procedure, which
3026takes advantage of the simplification that occurs when |k==1|.
3027
3028@c
3029void
3030squash(j,k,c,d,n)
3031scrap_pointer j;
3032eight_bits c;
3033short k, d, n;
3034{
3035  scrap_pointer i; /* pointers into scrap memory */
3036  if (k==1) {
3037    j->cat=c; pp=(pp+d<scrap_base? scrap_base: pp+d);
3038    @<Print a snapshot...@>;
3039    pp--; /* we next say |pp++| */
3040    return;
3041  }
3042  for (i=j; i<j+k; i++) big_app1(i);
3043  reduce(j,k,c,d,n);
3044}
3045
3046@ And here now is the code that applies productions as long as possible.
3047Before applying the production mechanism, we must make sure
3048it has good input (at least four scraps, the length of the lhs of the
3049longest rules), and that there is enough room in the memory arrays
3050to hold the appended tokens and texts.  Here we use a very
3051conservative test; it's more important to make sure the program
3052will still work if we change the production rules (within reason)
3053than to squeeze the last bit of space from the memory arrays.
3054
3055@d safe_tok_incr 20
3056@d safe_text_incr 10
3057@d safe_scrap_incr 10
3058
3059@<Reduce the scraps using the productions until no more rules apply@>=
3060while (1) {
3061  @<Make sure the entries |pp| through |pp+3| of |cat| are defined@>;
3062  if (tok_ptr+safe_tok_incr>tok_mem_end) {
3063    if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
3064    overflow("token");
3065  }
3066  if (text_ptr+safe_text_incr>tok_start_end) {
3067    if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
3068    overflow("text");
3069  }
3070  if (pp>lo_ptr) break;
3071  init_mathness=cur_mathness=maybe_math;
3072  @<Match a production...@>;
3073}
3074
3075@ If we get to the end of the scrap list, category codes equal to zero are
3076stored, since zero does not match anything in a production.
3077
3078@<Make sure the entries...@>=
3079if (lo_ptr<pp+3) {
3080  while (hi_ptr<=scrap_ptr && lo_ptr!=pp+3) {
3081    (++lo_ptr)->cat=hi_ptr->cat; lo_ptr->mathness=(hi_ptr)->mathness;
3082    lo_ptr->trans=(hi_ptr++)->trans;
3083  }
3084  for (i=lo_ptr+1;i<=pp+3;i++) i->cat=0;
3085}
3086
3087@ If \.{CWEAVE} is being run in debugging mode, the production numbers and
3088current stack categories will be printed out when |tracing| is set to 2;
3089a sequence of two or more irreducible scraps will be printed out when
3090|tracing| is set to 1.
3091
3092@<Global...@>=
3093int tracing; /* can be used to show parsing details */
3094
3095@ @<Print a snapsh...@>=
3096{ scrap_pointer k; /* pointer into |scrap_info| */
3097  if (tracing==2) {
3098    printf("\n%d:",n);
3099    for (k=scrap_base; k<=lo_ptr; k++) {
3100      if (k==pp) putxchar('*'); else putxchar(' ');
3101      if (k->mathness %4 ==  yes_math) putchar('+');
3102      else if (k->mathness %4 ==  no_math) putchar('-');
3103      print_cat(k->cat);
3104      if (k->mathness /4 ==  yes_math) putchar('+');
3105      else if (k->mathness /4 ==  no_math) putchar('-');
3106    }
3107    if (hi_ptr<=scrap_ptr) printf("..."); /* indicate that more is coming */
3108  }
3109}
3110
3111@ The |translate| function assumes that scraps have been stored in
3112positions |scrap_base| through |scrap_ptr| of |cat| and |trans|. It
3113applies productions as much as
3114possible. The result is a token list containing the translation of
3115the given sequence of scraps.
3116
3117After calling |translate|, we will have |text_ptr+3<=max_texts| and
3118|tok_ptr+6<=max_toks|, so it will be possible to create up to three token
3119lists with up to six tokens without checking for overflow. Before calling
3120|translate|, we should have |text_ptr<max_texts| and |scrap_ptr<max_scraps|,
3121since |translate| might add a new text and a new scrap before it checks
3122for overflow.
3123
3124@c
3125text_pointer
3126translate() /* converts a sequence of scraps */
3127{
3128  scrap_pointer i, /* index into |cat| */
3129  j; /* runs through final scraps */
3130  pp=scrap_base; lo_ptr=pp-1; hi_ptr=pp;
3131  @<If tracing, print an indication of where we are@>;
3132  @<Reduce the scraps...@>;
3133  @<Combine the irreducible scraps that remain@>;
3134}
3135
3136@ If the initial sequence of scraps does not reduce to a single scrap,
3137we concatenate the translations of all remaining scraps, separated by
3138blank spaces, with dollar signs surrounding the translations of scraps
3139where appropriate.
3140
3141@<Combine the irreducible...@>= {
3142  @<If semi-tracing, show the irreducible scraps@>;
3143  for (j=scrap_base; j<=lo_ptr; j++) {
3144    if (j!=scrap_base) app(' ');
3145    if (j->mathness % 4 == yes_math) app('$');
3146    app1(j);
3147    if (j->mathness / 4 == yes_math) app('$');
3148    if (tok_ptr+6>tok_mem_end) overflow("token");
3149  }
3150  freeze_text; return(text_ptr-1);
3151}
3152
3153@ @<If semi-tracing, show the irreducible scraps@>=
3154if (lo_ptr>scrap_base && tracing==1) {
3155  printf("\nIrreducible scrap sequence in section %d:",section_count);
3156@.Irreducible scrap sequence...@>
3157  mark_harmless;
3158  for (j=scrap_base; j<=lo_ptr; j++) {
3159    printf(" "); print_cat(j->cat);
3160  }
3161}
3162
3163@ @<If tracing,...@>=
3164if (tracing==2) {
3165  printf("\nTracing after l. %d:\n",cur_line); mark_harmless;
3166@.Tracing after...@>
3167  if (loc>buffer+50) {
3168    printf("...");
3169    term_write(loc-51,51);
3170  }
3171  else term_write(buffer,loc-buffer);
3172}
3173
3174@* Initializing the scraps.
3175If we are going to use the powerful production mechanism just developed, we
3176must get the scraps set up in the first place, given a \CEE/ text. A table
3177of the initial scraps corresponding to \CEE/ tokens appeared above in the
3178section on parsing; our goal now is to implement that table. We shall do this
3179by implementing a subroutine called |C_parse| that is analogous to the
3180|C_xref| routine used during phase one.
3181
3182Like |C_xref|, the |C_parse| procedure starts with the current
3183value of |next_control| and it uses the operation |next_control=get_next()|
3184repeatedly to read \CEE/ text until encountering the next `\.{\v}' or
3185`\.{/*}', or until |next_control>=format_code|. The scraps corresponding to
3186what it reads are appended into the |cat| and |trans| arrays, and |scrap_ptr|
3187is advanced.
3188
3189@c
3190void
3191C_parse(spec_ctrl) /* creates scraps from \CEE/ tokens */
3192  eight_bits spec_ctrl;
3193{
3194  int count; /* characters remaining before string break */
3195  while (next_control<format_code || next_control==spec_ctrl) {
3196    @<Append the scrap appropriate to |next_control|@>;
3197    next_control=get_next();
3198    if (next_control=='|' || next_control==begin_comment ||
3199        next_control==begin_short_comment) return;
3200  }
3201}
3202
3203@ The following macro is used to append a scrap whose tokens have just
3204been appended:
3205
3206@d app_scrap(c,b) {
3207  (++scrap_ptr)->cat=(c); scrap_ptr->trans=text_ptr;
3208  scrap_ptr->mathness=5*(b); /* no no, yes yes, or maybe maybe */
3209  freeze_text;
3210}
3211
3212@ @<Append the scr...@>=
3213@<Make sure that there is room for the new scraps, tokens, and texts@>;
3214switch (next_control) {
3215  case section_name:
3216    app(section_flag+(int)(cur_section-name_dir));
3217    app_scrap(section_scrap,maybe_math);
3218    app_scrap(exp,yes_math);@+break;
3219  case string: case constant: case verbatim: @<Append a string or constant@>;
3220   @+break;
3221  case identifier: app_cur_id(1);@+break;
3222  case TeX_string: @<Append a \TEX/ string, without forming a scrap@>;@+break;
3223  case '/': case '.':
3224    app(next_control); app_scrap(binop,yes_math);@+break;
3225  case '<': app_str("\\langle");@+app_scrap(prelangle,yes_math);@+break;
3226@.\\langle@>
3227  case '>': app_str("\\rangle");@+app_scrap(prerangle,yes_math);@+break;
3228@.\\rangle@>
3229  case '=': app_str("\\K"); app_scrap(binop,yes_math);@+break;
3230@.\\K@>
3231  case '|': app_str("\\OR"); app_scrap(binop,yes_math);@+break;
3232@.\\OR@>
3233  case '^': app_str("\\XOR"); app_scrap(binop,yes_math);@+break;
3234@.\\XOR@>
3235  case '%': app_str("\\MOD"); app_scrap(binop,yes_math);@+break;
3236@.\\MOD@>
3237  case '!': app_str("\\R"); app_scrap(unop,yes_math);@+break;
3238@.\\R@>
3239  case '~': app_str("\\CM"); app_scrap(unop,yes_math);@+break;
3240@.\\CM@>
3241  case '+': case '-': app(next_control); app_scrap(ubinop,yes_math);@+break;
3242  case '*': app(next_control); app_scrap(raw_ubin,yes_math);@+break;
3243  case '&': app_str("\\AND"); app_scrap(raw_ubin,yes_math);@+break;
3244@.\\AND@>
3245  case '?': app_str("\\?"); app_scrap(question,yes_math);@+break;
3246@.\\?@>
3247  case '#': app_str("\\#"); app_scrap(ubinop,yes_math);@+break;
3248@.\\\#@>
3249  case ignore: case xref_roman: case xref_wildcard:
3250  case xref_typewriter: case noop:@+break;
3251  case '(': case '[': app(next_control); app_scrap(lpar,maybe_math);@+break;
3252  case ')': case ']': app(next_control); app_scrap(rpar,maybe_math);@+break;
3253  case '{': app_str("\\{"@q}@>); app_scrap(lbrace,yes_math);@+break;
3254@.\\\{@>@q}@>
3255  case '}': app_str(@q{@>"\\}"); app_scrap(rbrace,yes_math);@+break;
3256@q{@>@.\\\}@>
3257  case ',': app(','); app_scrap(comma,yes_math);@+break;
3258  case ';': app(';'); app_scrap(semi,maybe_math);@+break;
3259  case ':': app(':'); app_scrap(colon,no_math);@+break;@/
3260  @t\4@>  @<Cases involving nonstandard characters@>@;
3261  case thin_space: app_str("\\,"); app_scrap(insert,maybe_math);@+break;
3262@.\\,@>
3263  case math_break: app(opt); app_str("0");
3264    app_scrap(insert,maybe_math);@+break;
3265  case line_break: app(force); app_scrap(insert,no_math);@+break;
3266  case left_preproc: app(force); app(preproc_line);
3267    app_str("\\#"); app_scrap(lproc,no_math);@+break;
3268@.\\\#@>
3269  case right_preproc: app(force); app_scrap(rproc,no_math);@+break;
3270  case big_line_break: app(big_force); app_scrap(insert,no_math);@+break;
3271  case no_line_break: app(big_cancel); app(noop); app(break_space);
3272    app(noop); app(big_cancel);
3273    app_scrap(insert,no_math);@+break;
3274  case pseudo_semi: app_scrap(semi,maybe_math);@+break;
3275  case macro_arg_open: app_scrap(begin_arg,maybe_math);@+break;
3276  case macro_arg_close: app_scrap(end_arg,maybe_math);@+break;
3277  case join: app_str("\\J"); app_scrap(insert,no_math);@+break;
3278@.\\J@>
3279  case output_defs_code: app(force); app_str("\\ATH"); app(force);
3280    app_scrap(insert,no_math);@+break;
3281@.\\ATH@>
3282  default: app(inserted); app(next_control);
3283    app_scrap(insert,maybe_math);@+break;
3284}
3285
3286@ @<Make sure that there is room for the new...@>=
3287if (scrap_ptr+safe_scrap_incr>scrap_info_end ||
3288  tok_ptr+safe_tok_incr>tok_mem_end @| ||
3289  text_ptr+safe_text_incr>tok_start_end) {
3290  if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
3291  if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
3292  if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
3293  overflow("scrap/token/text");
3294}
3295
3296@ Some nonstandard characters may have entered \.{CWEAVE} by means of
3297standard ones. They are converted to \TEX/ control sequences so that it is
3298possible to keep \.{CWEAVE} from outputting unusual |char| codes.
3299
3300@<Cases involving nonstandard...@>=
3301case not_eq: app_str("\\I");@+app_scrap(binop,yes_math);@+break;
3302@.\\I@>
3303case lt_eq: app_str("\\Z");@+app_scrap(binop,yes_math);@+break;
3304@.\\Z@>
3305case gt_eq: app_str("\\G");@+app_scrap(binop,yes_math);@+break;
3306@.\\G@>
3307case eq_eq: app_str("\\E");@+app_scrap(binop,yes_math);@+break;
3308@.\\E@>
3309case and_and: app_str("\\W");@+app_scrap(binop,yes_math);@+break;
3310@.\\W@>
3311case or_or: app_str("\\V");@+app_scrap(binop,yes_math);@+break;
3312@.\\V@>
3313case plus_plus: app_str("\\PP");@+app_scrap(unop,yes_math);@+break;
3314@.\\PP@>
3315case minus_minus: app_str("\\MM");@+app_scrap(unop,yes_math);@+break;
3316@.\\MM@>
3317case minus_gt: app_str("\\MG");@+app_scrap(binop,yes_math);@+break;
3318@.\\MG@>
3319case gt_gt: app_str("\\GG");@+app_scrap(binop,yes_math);@+break;
3320@.\\GG@>
3321case lt_lt: app_str("\\LL");@+app_scrap(binop,yes_math);@+break;
3322@.\\LL@>
3323case dot_dot_dot: app_str("\\,\\ldots\\,");@+app_scrap(raw_int,yes_math);
3324  @+break;
3325@.\\,@>
3326@.\\ldots@>
3327case colon_colon: app_str("\\DC");@+app_scrap(colcol,maybe_math);@+break;
3328@.\\DC@>
3329case period_ast: app_str("\\PA");@+app_scrap(binop,yes_math);@+break;
3330@.\\PA@>
3331case minus_gt_ast: app_str("\\MGA");@+app_scrap(binop,yes_math);@+break;
3332@.\\MGA@>
3333
3334@ The following code must use |app_tok| instead of |app| in order to
3335protect against overflow. Note that |tok_ptr+1<=max_toks| after |app_tok|
3336has been used, so another |app| is legitimate before testing again.
3337
3338Many of the special characters in a string must be prefixed by `\.\\' so that
3339\TEX/ will print them properly.
3340@^special string characters@>
3341
3342@<Append a string or...@>=
3343count= -1;
3344if (next_control==constant) app_str("\\T{"@q}@>);
3345@.\\T@>
3346else if (next_control==string) {
3347  count=20; app_str("\\.{"@q}@>);
3348}
3349@.\\.@>
3350else app_str("\\vb{"@q}@>);
3351@.\\vb@>
3352while (id_first<id_loc) {
3353  if (count==0) { /* insert a discretionary break in a long string */
3354     app_str(@q(@>@q{@>"}\\)\\.{"@q}@>); count=20;
3355@q(@>@.\\)@>
3356  }
3357@^high-bit character handling@>
3358  if((eight_bits)(*id_first)>0177) {
3359    app_tok(quoted_char);
3360    app_tok((eight_bits)(*id_first++));
3361  }
3362  else {
3363    switch (*id_first) {
3364      case  ' ':case '\\':case '#':case '%':case '$':case '^':
3365      case '{': case '}': case '~': case '&': case '_': app('\\'); break;
3366@.\\\ @>
3367@.\\\\@>
3368@.\\\#@>
3369@.\\\%@>
3370@.\\\$@>
3371@.\\\^@>
3372@.\\\{@>@q}@>
3373@q{@>@.\\\}@>
3374@.\\\~@>
3375@.\\\&@>
3376@.\\\_@>
3377      case '@@': if (*(id_first+1)=='@@') id_first++;
3378        else err_print("! Double @@ should be used in strings");
3379@.Double @@ should be used...@>
3380    }
3381    app_tok(*id_first++);
3382  }
3383  count--;
3384}
3385app(@q{@>'}');
3386app_scrap(exp,maybe_math);
3387
3388@ We do not make the \TEX/ string into a scrap, because there is no
3389telling what the user will be putting into it; instead we leave it
3390open, to be picked up by the next scrap. If it comes at the end of a
3391section, it will be made into a scrap when |finish_C| is called.
3392
3393There's a known bug here, in cases where an adjacent scrap is
3394|prelangle| or |prerangle|. Then the \TEX/ string can disappear
3395when the \.{\\langle} or \.{\\rangle} becomes \.{<} or \.{>}.
3396For example, if the user writes \.{\v x<@@ty@@>\v}, the \TEX/ string
3397\.{\\hbox\{y\}} eventually becomes part of an |insert| scrap, which is combined
3398with a |prelangle| scrap and eventually lost. The best way to work around
3399this bug is probably to enclose the \.{@@t...@@>} in \.{@@[...@@]} so that
3400the \TEX/ string is treated as an expression.
3401@^bug, known@>
3402
3403@<Append a \TEX/ string, without forming a scrap@>=
3404app_str("\\hbox{"@q}@>);
3405@^high-bit character handling@>
3406while (id_first<id_loc)
3407  if((eight_bits)(*id_first)>0177) {
3408    app_tok(quoted_char);
3409    app_tok((eight_bits)(*id_first++));
3410  }
3411  else {
3412    if (*id_first=='@@') id_first++;
3413    app_tok(*id_first++);
3414  }
3415app(@q{@>'}');
3416
3417@ The function |app_cur_id| appends the current identifier to the
3418token list; it also builds a new scrap if |scrapping==1|.
3419
3420@<Predec...@>=
3421void app_cur_id();
3422
3423@ @c
3424void
3425app_cur_id(scrapping)
3426boolean scrapping; /* are we making this into a scrap? */
3427{
3428  name_pointer p=id_lookup(id_first,id_loc,normal);
3429  if (p->ilk<=custom) { /* not a reserved word */
3430    app(id_flag+(int)(p-name_dir));
3431    if (scrapping) app_scrap(p->ilk==func_template? ftemplate: exp,
3432                             p->ilk==custom? yes_math: maybe_math);
3433@.\\NULL@>
3434  } else {
3435    app(res_flag+(int)(p-name_dir));
3436    if (scrapping) {
3437      if (p->ilk==alfop) app_scrap(ubinop,yes_math)@;
3438      else app_scrap(p->ilk,maybe_math);
3439    }
3440  }
3441}
3442
3443@ When the `\.{\v}' that introduces \CEE/ text is sensed, a call on
3444|C_translate| will return a pointer to the \TEX/ translation of
3445that text. If scraps exist in |scrap_info|, they are
3446unaffected by this translation process.
3447
3448@c
3449text_pointer
3450C_translate()
3451{
3452  text_pointer p; /* points to the translation */
3453  scrap_pointer save_base; /* holds original value of |scrap_base| */
3454  save_base=scrap_base; scrap_base=scrap_ptr+1;
3455  C_parse(section_name); /* get the scraps together */
3456  if (next_control!='|') err_print("! Missing '|' after C text");
3457@.Missing '|'...@>
3458  app_tok(cancel); app_scrap(insert,maybe_math);
3459        /* place a |cancel| token as a final ``comment'' */
3460  p=translate(); /* make the translation */
3461  if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
3462  scrap_ptr=scrap_base-1; scrap_base=save_base; /* scrap the scraps */
3463  return(p);
3464}
3465
3466@ The |outer_parse| routine is to |C_parse| as |outer_xref|
3467is to |C_xref|: It constructs a sequence of scraps for \CEE/ text
3468until |next_control>=format_code|. Thus, it takes care of embedded comments.
3469
3470The token list created from within `\pb' brackets is output as an argument
3471to \.{\\PB}, if the user has invoked \.{CWEAVE} with the \.{+e} flag.
3472Although \.{cwebmac} ignores \.{\\PB}, other macro packages
3473might use it to localize the special meaning of the macros that mark up
3474program text.
3475
3476@d make_pb flags['e']
3477
3478@c
3479void
3480outer_parse() /* makes scraps from \CEE/ tokens and comments */
3481{
3482  int bal; /* brace level in comment */
3483  text_pointer p, q; /* partial comments */
3484  while (next_control<format_code)
3485    if (next_control!=begin_comment && next_control!=begin_short_comment)
3486      C_parse(ignore);
3487    else {
3488      boolean is_long_comment=(next_control==begin_comment);
3489      @<Make sure that there is room for the new...@>;
3490      app(cancel); app(inserted);
3491      if (is_long_comment) app_str("\\C{"@q}@>);
3492@.\\C@>
3493      else app_str("\\SHC{"@q}@>);
3494@.\\SHC@>
3495      bal=copy_comment(is_long_comment,1); next_control=ignore;
3496      while (bal>0) {
3497        p=text_ptr; freeze_text; q=C_translate();
3498         /* at this point we have |tok_ptr+6<=max_toks| */
3499        app(tok_flag+(int)(p-tok_start));
3500        if (make_pb) app_str("\\PB{");
3501@.\\PB@>
3502        app(inner_tok_flag+(int)(q-tok_start));
3503        if (make_pb)  app_tok('}');
3504        if (next_control=='|') {
3505          bal=copy_comment(is_long_comment,bal);
3506          next_control=ignore;
3507        }
3508        else bal=0; /* an error has been reported */
3509      }
3510      app(force); app_scrap(insert,no_math);
3511        /* the full comment becomes a scrap */
3512    }
3513}
3514
3515@* Output of tokens.
3516So far our programs have only built up multi-layered token lists in
3517\.{CWEAVE}'s internal memory; we have to figure out how to get them into
3518the desired final form. The job of converting token lists to characters in
3519the \TEX/ output file is not difficult, although it is an implicitly
3520recursive process. Four main considerations had to be kept in mind when
3521this part of \.{CWEAVE} was designed.  (a) There are two modes of output:
3522|outer| mode, which translates tokens like |force| into line-breaking
3523control sequences, and |inner| mode, which ignores them except that blank
3524spaces take the place of line breaks. (b) The |cancel| instruction applies
3525to adjacent token or tokens that are output, and this cuts across levels
3526of recursion since `|cancel|' occurs at the beginning or end of a token
3527list on one level. (c) The \TEX/ output file will be semi-readable if line
3528breaks are inserted after the result of tokens like |break_space| and
3529|force|.  (d) The final line break should be suppressed, and there should
3530be no |force| token output immediately after `\.{\\Y\\B}'.
3531
3532@ The output process uses a stack to keep track of what is going on at
3533different ``levels'' as the token lists are being written out. Entries on
3534this stack have three parts:
3535
3536\yskip\hang |end_field| is the |tok_mem| location where the token list of a
3537particular level will end;
3538
3539\yskip\hang |tok_field| is the |tok_mem| location from which the next token
3540on a particular level will be read;
3541
3542\yskip\hang |mode_field| is the current mode, either |inner| or |outer|.
3543
3544\yskip\noindent The current values of these quantities are referred to
3545quite frequently, so they are stored in a separate place instead of in the
3546|stack| array. We call the current values |cur_end|, |cur_tok|, and
3547|cur_mode|.
3548
3549The global variable |stack_ptr| tells how many levels of output are
3550currently in progress. The end of output occurs when an |end_translation|
3551token is found, so the stack is never empty except when we first begin the
3552output process.
3553
3554@d inner 0 /* value of |mode| for \CEE/ texts within \TEX/ texts */
3555@d outer 1 /* value of |mode| for \CEE/ texts in sections */
3556
3557@<Typed...@>= typedef int mode;
3558typedef struct {
3559  token_pointer end_field; /* ending location of token list */
3560  token_pointer tok_field; /* present location within token list */
3561  boolean mode_field; /* interpretation of control tokens */
3562} output_state;
3563typedef output_state *stack_pointer;
3564
3565@ @d cur_end cur_state.end_field /* current ending location in |tok_mem| */
3566@d cur_tok cur_state.tok_field /* location of next output token in |tok_mem| */
3567@d cur_mode cur_state.mode_field /* current mode of interpretation */
3568@d init_stack stack_ptr=stack;cur_mode=outer /* initialize the stack */
3569
3570@<Global...@>=
3571output_state cur_state; /* |cur_end|, |cur_tok|, |cur_mode| */
3572output_state stack[stack_size]; /* info for non-current levels */
3573stack_pointer stack_ptr; /* first unused location in the output state stack */
3574stack_pointer stack_end=stack+stack_size-1; /* end of |stack| */
3575stack_pointer max_stack_ptr; /* largest value assumed by |stack_ptr| */
3576
3577@ @<Set init...@>=
3578max_stack_ptr=stack;
3579
3580@ To insert token-list |p| into the output, the |push_level| subroutine
3581is called; it saves the old level of output and gets a new one going.
3582The value of |cur_mode| is not changed.
3583
3584@c
3585void
3586push_level(p) /* suspends the current level */
3587text_pointer p;
3588{
3589  if (stack_ptr==stack_end) overflow("stack");
3590  if (stack_ptr>stack) { /* save current state */
3591    stack_ptr->end_field=cur_end;
3592    stack_ptr->tok_field=cur_tok;
3593    stack_ptr->mode_field=cur_mode;
3594  }
3595  stack_ptr++;
3596  if (stack_ptr>max_stack_ptr) max_stack_ptr=stack_ptr;
3597  cur_tok=*p; cur_end=*(p+1);
3598}
3599
3600@ Conversely, the |pop_level| routine restores the conditions that were in
3601force when the current level was begun. This subroutine will never be
3602called when |stack_ptr==1|.
3603
3604@c
3605void
3606pop_level()
3607{
3608  cur_end=(--stack_ptr)->end_field;
3609  cur_tok=stack_ptr->tok_field; cur_mode=stack_ptr->mode_field;
3610}
3611
3612@ The |get_output| function returns the next byte of output that is not a
3613reference to a token list. It returns the values |identifier| or |res_word|
3614or |section_code| if the next token is to be an identifier (typeset in
3615italics), a reserved word (typeset in boldface), or a section name (typeset
3616by a complex routine that might generate additional levels of output).
3617In these cases |cur_name| points to the identifier or section name in
3618question.
3619
3620@<Global...@>=
3621name_pointer cur_name;
3622
3623@ @d res_word 0201 /* returned by |get_output| for reserved words */
3624@d section_code 0200 /* returned by |get_output| for section names */
3625
3626@c
3627eight_bits
3628get_output() /* returns the next token of output */
3629{
3630  sixteen_bits a; /* current item read from |tok_mem| */
3631  restart: while (cur_tok==cur_end) pop_level();
3632  a=*(cur_tok++);
3633  if (a>=0400) {
3634    cur_name=a % id_flag + name_dir;
3635    switch (a / id_flag) {
3636      case 2: return(res_word); /* |a==res_flag+cur_name| */
3637      case 3: return(section_code); /* |a==section_flag+cur_name| */
3638      case 4: push_level(a % id_flag + tok_start); goto restart;
3639        /* |a==tok_flag+cur_name| */
3640      case 5: push_level(a % id_flag + tok_start); cur_mode=inner; goto restart;
3641        /* |a==inner_tok_flag+cur_name| */
3642      default: return(identifier); /* |a==id_flag+cur_name| */
3643    }
3644  }
3645  return(a);
3646}
3647
3648@ The real work associated with token output is done by |make_output|.
3649This procedure appends an |end_translation| token to the current token list,
3650and then it repeatedly calls |get_output| and feeds characters to the output
3651buffer until reaching the |end_translation| sentinel. It is possible for
3652|make_output| to be called recursively, since a section name may include
3653embedded \CEE/ text; however, the depth of recursion never exceeds one
3654level, since section names cannot be inside of section names.
3655
3656A procedure called |output_C| does the scanning, translation, and
3657output of \CEE/ text within `\pb' brackets, and this procedure uses
3658|make_output| to output the current token list. Thus, the recursive call
3659of |make_output| actually occurs when |make_output| calls |output_C|
3660while outputting the name of a section.
3661@^recursion@>
3662
3663@c
3664void
3665output_C() /* outputs the current token list */
3666{
3667  token_pointer save_tok_ptr;
3668  text_pointer save_text_ptr;
3669  sixteen_bits save_next_control; /* values to be restored */
3670  text_pointer p; /* translation of the \CEE/ text */
3671  save_tok_ptr=tok_ptr; save_text_ptr=text_ptr;
3672  save_next_control=next_control; next_control=ignore; p=C_translate();
3673  app(inner_tok_flag+(int)(p-tok_start));
3674  if (make_pb) {
3675    out_str("\\PB{"); make_output(); out('}');
3676@.\\PB@>
3677  }@+else make_output(); /* output the list */
3678  if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
3679  if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
3680  text_ptr=save_text_ptr; tok_ptr=save_tok_ptr; /* forget the tokens */
3681  next_control=save_next_control; /* restore |next_control| to original state */
3682}
3683
3684@ Here is \.{CWEAVE}'s major output handler.
3685
3686@<Predecl...@>=
3687void make_output();
3688
3689@ @c
3690void
3691make_output() /* outputs the equivalents of tokens */
3692{
3693  eight_bits a, /* current output byte */
3694  b; /* next output byte */
3695  int c; /* count of |indent| and |outdent| tokens */
3696  char scratch[longest_name]; /* scratch area for section names */
3697  char *k, *k_limit; /* indices into |scratch| */
3698  char *j; /* index into |buffer| */
3699  char *p; /* index into |byte_mem| */
3700  char delim; /* first and last character of string being copied */
3701  char *save_loc, *save_limit; /* |loc| and |limit| to be restored */
3702  name_pointer cur_section_name; /* name of section being output */
3703  boolean save_mode; /* value of |cur_mode| before a sequence of breaks */
3704  app(end_translation); /* append a sentinel */
3705  freeze_text; push_level(text_ptr-1);
3706  while (1) {
3707    a=get_output();
3708    reswitch: switch(a) {
3709      case end_translation: return;
3710      case identifier: case res_word: @<Output an identifier@>; break;
3711      case section_code: @<Output a section name@>; break;
3712      case math_rel: out_str("\\MRL{"@q}@>);
3713@.\\MRL@>
3714      case noop: case inserted: break;
3715      case cancel: case big_cancel: c=0; b=a;
3716        while (1) {
3717          a=get_output();
3718          if (a==inserted) continue;
3719          if ((a<indent && !(b==big_cancel&&a==' ')) || a>big_force) break;
3720          if (a==indent) c++; else if (a==outdent) c--;
3721          else if (a==opt) a=get_output();
3722        }
3723        @<Output saved |indent| or |outdent| tokens@>;
3724        goto reswitch;
3725      case indent: case outdent: case opt: case backup: case break_space:
3726      case force: case big_force: case preproc_line: @<Output a control,
3727        look ahead in case of line breaks, possibly |goto reswitch|@>; break;
3728      case quoted_char: out(*(cur_tok++));
3729      case qualifier: break;
3730      default: out(a); /* otherwise |a| is an ordinary character */
3731    }
3732  }
3733}
3734
3735@ An identifier of length one does not have to be enclosed in braces, and it
3736looks slightly better if set in a math-italic font instead of a (slightly
3737narrower) text-italic font. Thus we output `\.{\\\v}\.{a}' but
3738`\.{\\\\\{aa\}}'.
3739
3740@<Output an identifier@>=
3741out('\\');
3742if (a==identifier) {
3743  if (cur_name->ilk==custom && !doing_format) {
3744 custom_out:
3745    for (p=cur_name->byte_start;p<(cur_name+1)->byte_start;p++)
3746      out(*p=='_'? 'x': *p=='$'? 'X': *p);
3747    break;
3748  } else if (is_tiny(cur_name)) out('|')@;
3749@.\\|@>
3750  else { delim='.';
3751    for (p=cur_name->byte_start;p<(cur_name+1)->byte_start;p++)
3752      if (xislower(*p)) { /* not entirely uppercase */
3753         delim='\\'; break;
3754      }
3755  out(delim);
3756  }
3757@.\\\\@>
3758@.\\.@>
3759}@+else if (cur_name->ilk==alfop) {
3760  out('X');
3761  goto custom_out;
3762}@+else out('&'); /* |a==res_word| */
3763@.\\\&@>
3764if (is_tiny(cur_name)) {
3765  if (isxalpha((cur_name->byte_start)[0]))
3766    out('\\');
3767  out((cur_name->byte_start)[0]);
3768}
3769else out_name(cur_name,1);
3770
3771@ The current mode does not affect the behavior of \.{CWEAVE}'s output routine
3772except when we are outputting control tokens.
3773
3774@<Output a control...@>=
3775if (a<break_space || a==preproc_line) {
3776  if (cur_mode==outer) {
3777    out('\\'); out(a-cancel+'0');
3778@.\\1@>
3779@.\\2@>
3780@.\\3@>
3781@.\\4@>
3782@.\\8@>
3783    if (a==opt) {
3784      b=get_output(); /* |opt| is followed by a digit */
3785      if (b!='0' || force_lines==0) out(b)@;
3786      else out_str("{-1}"); /* |force_lines| encourages more \.{@@\v} breaks */
3787    }
3788  } else if (a==opt) b=get_output(); /* ignore digit following |opt| */
3789  }
3790else @<Look ahead for strongest line break, |goto reswitch|@>
3791
3792@ If several of the tokens |break_space|, |force|, |big_force| occur in a
3793row, possibly mixed with blank spaces (which are ignored),
3794the largest one is used. A line break also occurs in the output file,
3795except at the very end of the translation. The very first line break
3796is suppressed (i.e., a line break that follows `\.{\\Y\\B}').
3797
3798@<Look ahead for st...@>= {
3799  b=a; save_mode=cur_mode; c=0;
3800  while (1) {
3801    a=get_output();
3802    if (a==inserted) continue;
3803    if (a==cancel || a==big_cancel) {
3804      @<Output saved |indent| or |outdent| tokens@>;
3805      goto reswitch; /* |cancel| overrides everything */
3806    }
3807    if ((a!=' ' && a<indent) || a==backup || a>big_force) {
3808      if (save_mode==outer) {
3809        if (out_ptr>out_buf+3 && strncmp(out_ptr-3,"\\Y\\B",4)==0)
3810          goto reswitch;
3811        @<Output saved |indent| or |outdent| tokens@>;
3812        out('\\'); out(b-cancel+'0');
3813@.\\5@>
3814@.\\6@>
3815@.\\7@>
3816        if (a!=end_translation) finish_line();
3817      }
3818      else if (a!=end_translation && cur_mode==inner) out(' ');
3819      goto reswitch;
3820    }
3821    if (a==indent) c++;
3822    else if (a==outdent) c--;
3823    else if (a==opt) a=get_output();
3824    else if (a>b) b=a; /* if |a==' '| we have |a<b| */
3825  }
3826}
3827
3828@ @<Output saved...@>=
3829  for (;c>0;c--) out_str("\\1");
3830@.\\1@>
3831  for (;c<0;c++) out_str("\\2");
3832@.\\2@>
3833
3834@ The remaining part of |make_output| is somewhat more complicated. When we
3835output a section name, we may need to enter the parsing and translation
3836routines, since the name may contain \CEE/ code embedded in
3837\pb\ constructions. This \CEE/ code is placed at the end of the active
3838input buffer and the translation process uses the end of the active
3839|tok_mem| area.
3840
3841@<Output a section name@>= {
3842  out_str("\\X");
3843@.\\X@>
3844  cur_xref=(xref_pointer)cur_name->xref;
3845  if (cur_xref->num==file_flag) {an_output=1; cur_xref=cur_xref->xlink;}
3846  else an_output=0;
3847  if (cur_xref->num>=def_flag) {
3848    out_section(cur_xref->num-def_flag);
3849    if (phase==3) {
3850      cur_xref=cur_xref->xlink;
3851      while (cur_xref->num>=def_flag) {
3852        out_str(", ");
3853        out_section(cur_xref->num-def_flag);
3854      cur_xref=cur_xref->xlink;
3855      }
3856    }
3857  }
3858  else out('0'); /* output the section number, or zero if it was undefined */
3859  out(':');
3860  if (an_output) out_str("\\.{"@q}@>);
3861@.\\.@>
3862  @<Output the text of the section name@>;
3863  if (an_output) out_str(@q{@>" }");
3864  out_str("\\X");
3865}
3866
3867@ @<Output the text...@>=
3868sprint_section_name(scratch,cur_name);
3869k=scratch;
3870k_limit=scratch+strlen(scratch);
3871cur_section_name=cur_name;
3872while (k<k_limit) {
3873  b=*(k++);
3874  if (b=='@@') @<Skip next character, give error if not `\.{@@}'@>;
3875  if (an_output)
3876    switch (b) {
3877 case  ' ':case '\\':case '#':case '%':case '$':case '^':
3878 case '{': case '}': case '~': case '&': case '_':
3879    out('\\'); /* falls through */
3880@.\\\ @>
3881@.\\\\@>
3882@.\\\#@>
3883@.\\\%@>
3884@.\\\$@>
3885@.\\\^@>
3886@.\\\{@>@q}@>
3887@q{@>@.\\\}@>
3888@.\\\~@>
3889@.\\\&@>
3890@.\\\_@>
3891 default: out(b);
3892    }
3893  else if (b!='|') out(b)
3894  else {
3895    @<Copy the \CEE/ text into the |buffer| array@>;
3896    save_loc=loc; save_limit=limit; loc=limit+2; limit=j+1;
3897    *limit='|'; output_C();
3898    loc=save_loc; limit=save_limit;
3899  }
3900}
3901
3902@ @<Skip next char...@>=
3903if (*k++!='@@') {
3904  printf("\n! Illegal control code in section name: <");
3905@.Illegal control code...@>
3906  print_section_name(cur_section_name); printf("> "); mark_error;
3907}
3908
3909@ The \CEE/ text enclosed in \pb\ should not contain `\.{\v}' characters,
3910except within strings. We put a `\.{\v}' at the front of the buffer, so that an
3911error message that displays the whole buffer will look a little bit sensible.
3912The variable |delim| is zero outside of strings, otherwise it
3913equals the delimiter that began the string being copied.
3914
3915@<Copy the \CEE/ text into...@>=
3916j=limit+1; *j='|'; delim=0;
3917while (1) {
3918  if (k>=k_limit) {
3919    printf("\n! C text in section name didn't end: <");
3920@.C text...didn't end@>
3921    print_section_name(cur_section_name); printf("> "); mark_error; break;
3922  }
3923  b=*(k++);
3924  if (b=='@@' || (b=='\\' && delim!=0))
3925     @<Copy a quoted character into the buffer@>
3926  else {
3927    if (b=='\'' || b=='"')
3928      if (delim==0) delim=b;
3929      else if (delim==b) delim=0;
3930    if (b!='|' || delim!=0) {
3931      if (j>buffer+long_buf_size-3) overflow("buffer");
3932      *(++j)=b;
3933    }
3934    else break;
3935  }
3936}
3937
3938@ @<Copy a quoted char...@>= {
3939  if (j>buffer+long_buf_size-4) overflow("buffer");
3940  *(++j)=b; *(++j)=*(k++);
3941}
3942
3943@** Phase two processing.
3944We have assembled enough pieces of the puzzle in order to be ready to specify
3945the processing in \.{CWEAVE}'s main pass over the source file. Phase two
3946is analogous to phase one, except that more work is involved because we must
3947actually output the \TEX/ material instead of merely looking at the
3948\.{CWEB} specifications.
3949
3950@<Predecl...@>=
3951void phase_two();
3952
3953@ @c
3954void
3955phase_two() {
3956reset_input(); if (show_progress) printf("\nWriting the output file...");
3957@.Writing the output file...@>
3958section_count=0; format_visible=1; copy_limbo();
3959finish_line(); flush_buffer(out_buf,0,0); /* insert a blank line, it looks nice */
3960while (!input_has_ended) @<Translate the current section@>;
3961}
3962
3963@ The output file will contain the control sequence \.{\\Y} between non-null
3964sections of a section, e.g., between the \TEX/ and definition parts if both
3965are nonempty. This puts a little white space between the parts when they are
3966printed. However, we don't want \.{\\Y} to occur between two definitions
3967within a single section. The variables |out_line| or |out_ptr| will
3968change if a section is non-null, so the following macros `|save_position|'
3969and `|emit_space_if_needed|' are able to handle the situation:
3970
3971@d save_position save_line=out_line; save_place=out_ptr
3972@d emit_space_if_needed if (save_line!=out_line || save_place!=out_ptr)
3973  out_str("\\Y");
3974  space_checked=1
3975@.\\Y@>
3976
3977@<Global...@>=
3978int save_line; /* former value of |out_line| */
3979char *save_place; /* former value of |out_ptr| */
3980int sec_depth; /* the integer, if any, following \.{@@*} */
3981boolean space_checked; /* have we done |emit_space_if_needed|? */
3982boolean format_visible; /* should the next format declaration be output? */
3983boolean doing_format=0; /* are we outputting a format declaration? */
3984boolean group_found=0; /* has a starred section occurred? */
3985
3986@ @<Translate the current section@>= {
3987  section_count++;
3988  @<Output the code for the beginning of a new section@>;
3989  save_position;
3990  @<Translate the \TEX/ part of the current section@>;
3991  @<Translate the definition part of the current section@>;
3992  @<Translate the \CEE/ part of the current section@>;
3993  @<Show cross-references to this section@>;
3994  @<Output the code for the end of a section@>;
3995}
3996
3997@ Sections beginning with the \.{CWEB} control sequence `\.{@@\ }' start in the
3998output with the \TEX/ control sequence `\.{\\M}', followed by the section
3999number. Similarly, `\.{@@*}' sections lead to the control sequence `\.{\\N}'.
4000In this case there's an additional parameter, representing one plus the
4001specified depth, immediately after the \.{\\N}.
4002If the section has changed, we put \.{\\*} just after the section number.
4003
4004@<Output the code for the beginning...@>=
4005if (*(loc-1)!='*') out_str("\\M");
4006@.\\M@>
4007else {
4008  while (*loc == ' ') loc++;
4009  if (*loc=='*') { /* ``top'' level */
4010    sec_depth = -1;
4011    loc++;
4012  }
4013  else {
4014    for (sec_depth=0; xisdigit(*loc);loc++)
4015      sec_depth = sec_depth*10 + (*loc) -'0';
4016  }
4017  while (*loc == ' ') loc++; /* remove spaces before group title */
4018  group_found=1;
4019  out_str("\\N");
4020@.\\N@>
4021  {@+ char s[32];@+sprintf(s,"{%d}",sec_depth+1);@+out_str(s);@+}
4022  if (show_progress)
4023  printf("*%d",section_count); update_terminal; /* print a progress report */
4024}
4025out_str("{");out_section(section_count); out_str("}");
4026
4027@ In the \TEX/ part of a section, we simply copy the source text, except that
4028index entries are not copied and \CEE/ text within \pb\ is translated.
4029
4030@<Translate the \T...@>= do {
4031  next_control=copy_TeX();
4032  switch (next_control) {
4033    case '|': init_stack; output_C(); break;
4034    case '@@': out('@@'); break;
4035    case TeX_string: case noop:
4036    case xref_roman: case xref_wildcard: case xref_typewriter:
4037    case section_name: loc-=2; next_control=get_next(); /* skip to \.{@@>} */
4038      if (next_control==TeX_string)
4039        err_print("! TeX string should be in C text only"); break;
4040@.TeX string should be...@>
4041    case thin_space: case math_break: case ord:
4042    case line_break: case big_line_break: case no_line_break: case join:
4043    case pseudo_semi: case macro_arg_open: case macro_arg_close:
4044    case output_defs_code:
4045        err_print("! You can't do that in TeX text"); break;
4046@.You can't do that...@>
4047  }
4048} while (next_control<format_code);
4049
4050@ When we get to the following code we have |next_control>=format_code|, and
4051the token memory is in its initial empty state.
4052
4053@<Translate the d...@>=
4054space_checked=0;
4055while (next_control<=definition) { /* |format_code| or |definition| */
4056  init_stack;
4057  if (next_control==definition) @<Start a macro definition@>@;
4058  else @<Start a format definition@>;
4059  outer_parse(); finish_C(format_visible); format_visible=1;
4060  doing_format=0;
4061}
4062
4063@ The |finish_C| procedure outputs the translation of the current
4064scraps, preceded by the control sequence `\.{\\B}' and followed by the
4065control sequence `\.{\\par}'. It also restores the token and scrap
4066memories to their initial empty state.
4067
4068A |force| token is appended to the current scraps before translation
4069takes place, so that the translation will normally end with \.{\\6} or
4070\.{\\7} (the \TEX/ macros for |force| and |big_force|). This \.{\\6} or
4071\.{\\7} is replaced by the concluding \.{\\par} or by \.{\\Y\\par}.
4072
4073@<Predecl...@>=
4074void finish_C();
4075
4076@ @c
4077void
4078finish_C(visible) /* finishes a definition or a \CEE/ part */
4079  boolean visible; /* nonzero if we should produce \TEX/ output */
4080{
4081  text_pointer p; /* translation of the scraps */
4082  if (visible) {
4083    out_str("\\B"); app_tok(force); app_scrap(insert,no_math);
4084    p=translate();
4085@.\\B@>
4086    app(tok_flag+(int)(p-tok_start)); make_output(); /* output the list */
4087    if (out_ptr>out_buf+1)
4088      if (*(out_ptr-1)=='\\')
4089@.\\6@>
4090@.\\7@>
4091@.\\Y@>
4092        if (*out_ptr=='6') out_ptr-=2;
4093        else if (*out_ptr=='7') *out_ptr='Y';
4094    out_str("\\par"); finish_line();
4095  }
4096  if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
4097  if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
4098  if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
4099  tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrap_ptr=scrap_info;
4100    /* forget the tokens and the scraps */
4101}
4102
4103@ Keeping in line with the conventions of the \CEE/ preprocessor (and
4104otherwise contrary to the rules of \.{CWEB}) we distinguish here
4105between the case that `\.(' immediately follows an identifier and the
4106case that the two are separated by a space.  In the latter case, and
4107if the identifier is not followed by `\.(' at all, the replacement
4108text starts immediately after the identifier.  In the former case,
4109it starts after we scan the matching `\.)'.
4110
4111@<Start a macro...@>= {
4112  if (save_line!=out_line || save_place!=out_ptr || space_checked) app(backup);
4113  if(!space_checked){emit_space_if_needed;save_position;}
4114  app_str("\\D"); /* this will produce `\&{define }' */
4115@.\\D@>
4116  if ((next_control=get_next())!=identifier)
4117    err_print("! Improper macro definition");
4118@.Improper macro definition@>
4119  else {
4120    app('$'); app_cur_id(0);
4121    if (*loc=='(')
4122  reswitch: switch (next_control=get_next()) {
4123      case '(': case ',': app(next_control); goto reswitch;
4124      case identifier: app_cur_id(0); goto reswitch;
4125      case ')': app(next_control); next_control=get_next(); break;
4126      default: err_print("! Improper macro definition"); break;
4127    }
4128    else next_control=get_next();
4129    app_str("$ "); app(break_space);
4130    app_scrap(dead,no_math); /* scrap won't take part in the parsing */
4131  }
4132}
4133
4134@ @<Start a format...@>= {
4135  doing_format=1;
4136  if(*(loc-1)=='s' || *(loc-1)=='S') format_visible=0;
4137  if(!space_checked){emit_space_if_needed;save_position;}
4138  app_str("\\F"); /* this will produce `\&{format }' */
4139@.\\F@>
4140  next_control=get_next();
4141  if (next_control==identifier) {
4142    app(id_flag+(int)(id_lookup(id_first, id_loc,normal)-name_dir));
4143    app(' ');
4144    app(break_space); /* this is syntactically separate from what follows */
4145    next_control=get_next();
4146    if (next_control==identifier) {
4147      app(id_flag+(int)(id_lookup(id_first, id_loc,normal)-name_dir));
4148      app_scrap(exp,maybe_math); app_scrap(semi,maybe_math);
4149      next_control=get_next();
4150    }
4151  }
4152  if (scrap_ptr!=scrap_info+2) err_print("! Improper format definition");
4153@.Improper format definition@>
4154}
4155
4156@ Finally, when the \TEX/ and definition parts have been treated, we have
4157|next_control>=begin_C|. We will make the global variable |this_section|
4158point to the current section name, if it has a name.
4159
4160@<Global...@>=
4161name_pointer this_section; /* the current section name, or zero */
4162
4163@ @<Translate the \CEE/...@>=
4164this_section=name_dir;
4165if (next_control<=section_name) {
4166  emit_space_if_needed; init_stack;
4167  if (next_control==begin_C) next_control=get_next();
4168  else {
4169    this_section=cur_section;
4170    @<Check that '=' or '==' follows this section name, and
4171      emit the scraps to start the section definition@>;
4172  }
4173  while  (next_control<=section_name) {
4174    outer_parse();
4175    @<Emit the scrap for a section name if present@>;
4176  }
4177  finish_C(1);
4178}
4179
4180@ The title of the section and an $\E$ or $\mathrel+\E$ are made
4181into a scrap that should not take part in the parsing.
4182
4183@<Check that '='...@>=
4184do next_control=get_next();
4185  while (next_control=='+'); /* allow optional `\.{+=}' */
4186if (next_control!='=' && next_control!=eq_eq)
4187  err_print("! You need an = sign after the section name");
4188@.You need an = sign...@>
4189  else next_control=get_next();
4190if (out_ptr>out_buf+1 && *out_ptr=='Y' && *(out_ptr-1)=='\\') app(backup);
4191    /* the section name will be flush left */
4192@.\\Y@>
4193app(section_flag+(int)(this_section-name_dir));
4194cur_xref=(xref_pointer)this_section->xref;
4195if(cur_xref->num==file_flag) cur_xref=cur_xref->xlink;
4196app_str("${}");
4197if (cur_xref->num!=section_count+def_flag) {
4198  app_str("\\mathrel+"); /*section name is multiply defined*/
4199  this_section=name_dir; /*so we won't give cross-reference info here*/
4200}
4201app_str("\\E"); /* output an equivalence sign */
4202@.\\E@>
4203app_str("{}$");
4204app(force); app_scrap(dead,no_math);
4205        /* this forces a line break unless `\.{@@+}' follows */
4206
4207@ @<Emit the scrap...@>=
4208if (next_control<section_name) {
4209  err_print("! You can't do that in C text");
4210@.You can't do that...@>
4211  next_control=get_next();
4212}
4213else if (next_control==section_name) {
4214  app(section_flag+(int)(cur_section-name_dir));
4215  app_scrap(section_scrap,maybe_math);
4216  next_control=get_next();
4217}
4218
4219@ Cross references relating to a named section are given
4220after the section ends.
4221
4222@<Show cross...@>=
4223if (this_section>name_dir) {
4224  cur_xref=(xref_pointer)this_section->xref;
4225  if (cur_xref->num==file_flag){an_output=1;cur_xref=cur_xref->xlink;}
4226  else an_output=0;
4227  if (cur_xref->num>def_flag)
4228    cur_xref=cur_xref->xlink; /* bypass current section number */
4229  footnote(def_flag); footnote(cite_flag); footnote(0);
4230}
4231
4232@ The |footnote| procedure gives cross-reference information about
4233multiply defined section names (if the |flag| parameter is
4234|def_flag|), or about references to a section name
4235(if |flag==cite_flag|), or to its uses (if |flag==0|). It assumes that
4236|cur_xref| points to the first cross-reference entry of interest, and it
4237leaves |cur_xref| pointing to the first element not printed.  Typical outputs:
4238`\.{\\A101.}'; `\.{\\Us 370\\ET1009.}';
4239`\.{\\As 8, 27\\*\\ETs64.}'.
4240
4241Note that the output of \.{CWEAVE} is not English-specific; users may
4242supply new definitions for the macros \.{\\A}, \.{\\As}, etc.
4243
4244@<Predecl...@>=
4245void footnote();
4246
4247@ @c
4248void
4249footnote(flag) /* outputs section cross-references */
4250sixteen_bits flag;
4251{
4252  xref_pointer q; /* cross-reference pointer variable */
4253  if (cur_xref->num<=flag) return;
4254  finish_line(); out('\\');
4255@.\\A@>
4256@.\\Q@>
4257@.\\U@>
4258  out(flag==0? 'U': flag==cite_flag? 'Q': 'A');
4259  @<Output all the section numbers on the reference list |cur_xref|@>;
4260  out('.');
4261}
4262
4263@ The following code distinguishes three cases, according as the number
4264of cross-references is one, two, or more than two. Variable |q| points
4265to the first cross-reference, and the last link is a zero.
4266
4267@<Output all the section numbers...@>=
4268q=cur_xref; if (q->xlink->num>flag) out('s'); /* plural */
4269while (1) {
4270  out_section(cur_xref->num-flag);
4271  cur_xref=cur_xref->xlink; /* point to the next cross-reference to output */
4272  if (cur_xref->num<=flag) break;
4273  if (cur_xref->xlink->num>flag) out_str(", "); /* not the last */
4274  else {out_str("\\ET"); /* the last */
4275@.\\ET@>
4276  if (cur_xref != q->xlink) out('s'); /* the last of more than two */
4277  }
4278}
4279
4280@ @<Output the code for the end of a section@>=
4281out_str("\\fi"); finish_line();
4282@.\\fi@>
4283flush_buffer(out_buf,0,0); /* insert a blank line, it looks nice */
4284
4285@** Phase three processing.
4286We are nearly finished! \.{CWEAVE}'s only remaining task is to write out the
4287index, after sorting the identifiers and index entries.
4288
4289If the user has set the |no_xref| flag (the \.{-x} option on the command line),
4290just finish off the page, omitting the index, section name list, and table of
4291contents.
4292
4293@<Predecl...@>=
4294void phase_three();
4295
4296@ @c
4297void
4298phase_three() {
4299if (no_xref) {
4300  finish_line();
4301  out_str("\\end");
4302@.\\end@>
4303  finish_line();
4304}
4305else {
4306  phase=3; if (show_progress) printf("\nWriting the index...");
4307@.Writing the index...@>
4308  finish_line();
4309  if ((idx_file=fopen(idx_file_name,"w"))==NULL)
4310    fatal("! Cannot open index file ",idx_file_name);
4311@.Cannot open index file@>
4312  if (change_exists) {
4313    @<Tell about changed sections@>; finish_line(); finish_line();
4314  }
4315  out_str("\\inx"); finish_line();
4316@.\\inx@>
4317  active_file=idx_file; /* change active file to the index file */
4318  @<Do the first pass of sorting@>;
4319  @<Sort and output the index@>;
4320  finish_line(); fclose(active_file); /* finished with |idx_file| */
4321  active_file=tex_file; /* switch back to |tex_file| for a tic */
4322  out_str("\\fin"); finish_line();
4323@.\\fin@>
4324  if ((scn_file=fopen(scn_file_name,"w"))==NULL)
4325    fatal("! Cannot open section file ",scn_file_name);
4326@.Cannot open section file@>
4327  active_file=scn_file; /* change active file to section listing file */
4328  @<Output all the section names@>;
4329  finish_line(); fclose(active_file); /* finished with |scn_file| */
4330  active_file=tex_file;
4331  if (group_found) out_str("\\con");@+else out_str("\\end");
4332@.\\con@>
4333@.\\end@>
4334  finish_line();
4335  fclose(active_file);
4336}
4337if (show_happiness) printf("\nDone.");
4338check_complete(); /* was all of the change file used? */
4339}
4340
4341@ Just before the index comes a list of all the changed sections, including
4342the index section itself.
4343
4344@<Global...@>=
4345sixteen_bits k_section; /* runs through the sections */
4346
4347@ @<Tell about changed sections@>= {
4348  /* remember that the index is already marked as changed */
4349  k_section=0;
4350  while (!changed_section[++k_section]);
4351  out_str("\\ch ");
4352@.\\ch@>
4353  out_section(k_section);
4354  while (k_section<section_count) {
4355    while (!changed_section[++k_section]);
4356    out_str(", "); out_section(k_section);
4357  }
4358  out('.');
4359}
4360
4361@ A left-to-right radix sorting method is used, since this makes it easy to
4362adjust the collating sequence and since the running time will be at worst
4363proportional to the total length of all entries in the index. We put the
4364identifiers into 102 different lists based on their first characters.
4365(Uppercase letters are put into the same list as the corresponding lowercase
4366letters, since we want to have `$t<\\{TeX}<\&{to}$'.) The
4367list for character |c| begins at location |bucket[c]| and continues through
4368the |blink| array.
4369
4370@<Global...@>=
4371name_pointer bucket[256];
4372name_pointer next_name; /* successor of |cur_name| when sorting */
4373name_pointer blink[max_names]; /* links in the buckets */
4374
4375@ To begin the sorting, we go through all the hash lists and put each entry
4376having a nonempty cross-reference list into the proper bucket.
4377
4378@<Do the first pass...@>= {
4379int c;
4380for (c=0; c<=255; c++) bucket[c]=NULL;
4381for (h=hash; h<=hash_end; h++) {
4382  next_name=*h;
4383  while (next_name) {
4384    cur_name=next_name; next_name=cur_name->link;
4385    if (cur_name->xref!=(char*)xmem) {
4386      c=(eight_bits)((cur_name->byte_start)[0]);
4387      if (xisupper(c)) c=tolower(c);
4388      blink[cur_name-name_dir]=bucket[c]; bucket[c]=cur_name;
4389    }
4390  }
4391}
4392}
4393
4394@ During the sorting phase we shall use the |cat| and |trans| arrays from
4395\.{CWEAVE}'s parsing algorithm and rename them |depth| and |head|. They now
4396represent a stack of identifier lists for all the index entries that have
4397not yet been output. The variable |sort_ptr| tells how many such lists are
4398present; the lists are output in reverse order (first |sort_ptr|, then
4399|sort_ptr-1|, etc.). The |j|th list starts at |head[j]|, and if the first
4400|k| characters of all entries on this list are known to be equal we have
4401|depth[j]==k|.
4402
4403@ @<Rest of |trans_plus| union@>=
4404name_pointer Head;
4405
4406@ @d depth cat /* reclaims memory that is no longer needed for parsing */
4407@d head trans_plus.Head /* ditto */
4408@f sort_pointer int
4409@d sort_pointer scrap_pointer /* ditto */
4410@d sort_ptr scrap_ptr /* ditto */
4411@d max_sorts max_scraps /* ditto */
4412
4413@<Global...@>=
4414eight_bits cur_depth; /* depth of current buckets */
4415char *cur_byte; /* index into |byte_mem| */
4416sixteen_bits cur_val; /* current cross-reference number */
4417sort_pointer max_sort_ptr; /* largest value of |sort_ptr| */
4418
4419@ @<Set init...@>=
4420max_sort_ptr=scrap_info;
4421
4422@ The desired alphabetic order is specified by the |collate| array; namely,
4423$|collate|[0]<|collate|[1]<\cdots<|collate|[100]$.
4424
4425@<Global...@>=
4426eight_bits collate[102+128]; /* collation order */
4427@^high-bit character handling@>
4428
4429@ We use the order $\hbox{null}<\.\ <\hbox{other characters}<{}$\.\_${}<
4430\.A=\.a<\cdots<\.Z=\.z<\.0<\cdots<\.9.$ Warning: The collation mapping
4431needs to be changed if ASCII code is not being used.
4432@^ASCII code dependencies@>
4433@^high-bit character handling@>
4434
4435We initialize |collate| by copying a few characters at a time, because
4436some \CEE/ compilers choke on long strings.
4437
4438@<Set init...@>=
4439collate[0]=0;
4440strcpy(collate+1," \1\2\3\4\5\6\7\10\11\12\13\14\15\16\17");
4441/* 16 characters + 1 = 17 */
4442strcpy(collate+17,"\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37");
4443/* 16 characters + 17 = 33 */
4444strcpy(collate+33,"!\42#$%&'()*+,-./:;<=>?@@[\\]^`{|}~_");
4445/* 32 characters + 33 = 65 */
4446strcpy(collate+65,"abcdefghijklmnopqrstuvwxyz0123456789");
4447/* (26 + 10) characters + 65 = 101 */
4448strcpy(collate+101,"\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217");
4449/* 16 characters + 101 = 117 */
4450strcpy(collate+117,"\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237");
4451/* 16 characters + 117 = 133 */
4452strcpy(collate+133,"\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257");
4453/* 16 characters + 133 = 149 */
4454strcpy(collate+149,"\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277");
4455/* 16 characters + 149 = 165 */
4456strcpy(collate+165,"\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317");
4457/* 16 characters + 165 = 181 */
4458strcpy(collate+181,"\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337");
4459/* 16 characters + 181 = 197 */
4460strcpy(collate+197,"\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357");
4461/* 16 characters + 197 = 213 */
4462strcpy(collate+213,"\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377");
4463/* 16 characters + 213 = 229 */
4464
4465@ Procedure |unbucket| goes through the buckets and adds nonempty lists
4466to the stack, using the collating sequence specified in the |collate| array.
4467The parameter to |unbucket| tells the current depth in the buckets.
4468Any two sequences that agree in their first 255 character positions are
4469regarded as identical.
4470
4471@d infinity 255 /* $\infty$ (approximately) */
4472
4473@<Predecl...@>=
4474void  unbucket();
4475
4476@ @c
4477void
4478unbucket(d) /* empties buckets having depth |d| */
4479eight_bits d;
4480{
4481  int c;  /* index into |bucket|; cannot be a simple |char| because of sign
4482    comparison below*/
4483  for (c=100+128; c>= 0; c--) if (bucket[collate[c]]) {
4484@^high-bit character handling@>
4485    if (sort_ptr>=scrap_info_end) overflow("sorting");
4486    sort_ptr++;
4487    if (sort_ptr>max_sort_ptr) max_sort_ptr=sort_ptr;
4488    if (c==0) sort_ptr->depth=infinity;
4489    else sort_ptr->depth=d;
4490    sort_ptr->head=bucket[collate[c]]; bucket[collate[c]]=NULL;
4491  }
4492}
4493
4494@ @<Sort and output...@>=
4495sort_ptr=scrap_info; unbucket(1);
4496while (sort_ptr>scrap_info) {
4497  cur_depth=sort_ptr->depth;
4498  if (blink[sort_ptr->head-name_dir]==0 || cur_depth==infinity)
4499    @<Output index entries for the list at |sort_ptr|@>@;
4500  else @<Split the list at |sort_ptr| into further lists@>;
4501}
4502
4503@ @<Split the list...@>= {
4504  eight_bits c;
4505  next_name=sort_ptr->head;
4506  do {
4507    cur_name=next_name; next_name=blink[cur_name-name_dir];
4508    cur_byte=cur_name->byte_start+cur_depth;
4509    if (cur_byte==(cur_name+1)->byte_start) c=0; /* hit end of the name */
4510    else {
4511      c=(eight_bits) *cur_byte;
4512      if (xisupper(c)) c=tolower(c);
4513    }
4514  blink[cur_name-name_dir]=bucket[c]; bucket[c]=cur_name;
4515  } while (next_name);
4516  --sort_ptr; unbucket(cur_depth+1);
4517}
4518
4519@ @<Output index...@>= {
4520  cur_name=sort_ptr->head;
4521  do {
4522    out_str("\\I");
4523@.\\I@>
4524    @<Output the name at |cur_name|@>;
4525    @<Output the cross-references at |cur_name|@>;
4526    cur_name=blink[cur_name-name_dir];
4527  } while (cur_name);
4528  --sort_ptr;
4529}
4530
4531@ @<Output the name...@>=
4532switch (cur_name->ilk) {
4533  case normal: case func_template: if (is_tiny(cur_name)) out_str("\\|");
4534    else {char *j;
4535      for (j=cur_name->byte_start;j<(cur_name+1)->byte_start;j++)
4536        if (xislower(*j)) goto lowcase;
4537      out_str("\\."); break;
4538lowcase: out_str("\\\\");
4539    }
4540  break;
4541@.\\|@>
4542@.\\.@>
4543@.\\\\@>
4544  case wildcard: out_str("\\9");@+ goto not_an_identifier;
4545@.\\9@>
4546  case typewriter: out_str("\\.");
4547@.\\.@>
4548  case roman: not_an_identifier: out_name(cur_name,0); goto name_done;
4549  case custom: {char *j; out_str("$\\");
4550    for (j=cur_name->byte_start;j<(cur_name+1)->byte_start;j++)
4551      out(*j=='_'? 'x': *j=='$'? 'X': *j);
4552    out('$');
4553    goto name_done;
4554    }
4555  default: out_str("\\&");
4556@.\\\&@>
4557}
4558out_name(cur_name,1);
4559name_done:@;
4560
4561@ Section numbers that are to be underlined are enclosed in
4562`\.{\\[}$\,\ldots\,$\.]'.
4563
4564@<Output the cross-references...@>=
4565@<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>;
4566do {
4567  out_str(", "); cur_val=cur_xref->num;
4568  if (cur_val<def_flag) out_section(cur_val);
4569  else {out_str("\\["); out_section(cur_val-def_flag); out(']');}
4570@.\\[@>
4571  cur_xref=cur_xref->xlink;
4572} while (cur_xref!=xmem);
4573out('.'); finish_line();
4574
4575@ List inversion is best thought of as popping elements off one stack and
4576pushing them onto another. In this case |cur_xref| will be the head of
4577the stack that we push things onto.
4578@<Global...@>=
4579xref_pointer next_xref, this_xref;
4580  /* pointer variables for rearranging a list */
4581
4582@ @<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>=
4583this_xref=(xref_pointer)cur_name->xref; cur_xref=xmem;
4584do {
4585  next_xref=this_xref->xlink; this_xref->xlink=cur_xref;
4586  cur_xref=this_xref; this_xref=next_xref;
4587} while (this_xref!=xmem);
4588
4589@ The following recursive procedure walks through the tree of section names and
4590prints them.
4591@^recursion@>
4592
4593@<Predecl...@>=
4594void section_print();
4595
4596@ @c
4597void
4598section_print(p) /* print all section names in subtree |p| */
4599name_pointer p;
4600{
4601  if (p) {
4602    section_print(p->llink); out_str("\\I");
4603@.\\I@>
4604    tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrap_ptr=scrap_info; init_stack;
4605    app(p-name_dir+section_flag); make_output();
4606    footnote(cite_flag);
4607    footnote(0); /* |cur_xref| was set by |make_output| */
4608    finish_line();@/
4609    section_print(p->rlink);
4610  }
4611}
4612
4613@ @<Output all the section names@>=section_print(root)
4614
4615@ Because on some systems the difference between two pointers is a |long|
4616rather than an |int|, we use \.{\%ld} to print these quantities.
4617
4618@c
4619void
4620print_stats() {
4621  printf("\nMemory usage statistics:\n");
4622@.Memory usage statistics:@>
4623  printf("%ld names (out of %ld)\n",
4624            (long)(name_ptr-name_dir),(long)max_names);
4625  printf("%ld cross-references (out of %ld)\n",
4626            (long)(xref_ptr-xmem),(long)max_refs);
4627  printf("%ld bytes (out of %ld)\n",
4628            (long)(byte_ptr-byte_mem),(long)max_bytes);
4629  printf("Parsing:\n");
4630  printf("%ld scraps (out of %ld)\n",
4631            (long)(max_scr_ptr-scrap_info),(long)max_scraps);
4632  printf("%ld texts (out of %ld)\n",
4633            (long)(max_text_ptr-tok_start),(long)max_texts);
4634  printf("%ld tokens (out of %ld)\n",
4635            (long)(max_tok_ptr-tok_mem),(long)max_toks);
4636  printf("%ld levels (out of %ld)\n",
4637            (long)(max_stack_ptr-stack),(long)stack_size);
4638  printf("Sorting:\n");
4639  printf("%ld levels (out of %ld)\n",
4640            (long)(max_sort_ptr-scrap_info),(long)max_scraps);
4641}
4642
4643@** Index.
4644If you have read and understood the code for Phase III above, you know what
4645is in this index and how it got here. All sections in which an identifier is
4646used are listed with that identifier, except that reserved words are
4647indexed only when they appear in format definitions, and the appearances
4648of identifiers in section names are not indexed. Underlined entries
4649correspond to where the identifier was declared. Error messages, control
4650sequences put into the output, and a few
4651other things like ``recursion'' are indexed here too.
4652