1% This file is part of CWEB.
2% This program by Silvio Levy and Donald E. Knuth
3% is based on a program by Knuth.
4% It is distributed WITHOUT ANY WARRANTY, express or implied.
5% Version 3.64 --- February 2002
6% (same as Version 3.5 except for minor corrections)
7% (also quotes backslashes in file names of #line directives)
8
9% Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth
10
11% Permission is granted to make and distribute verbatim copies of this
12% document provided that the copyright notice and this permission notice
13% are preserved on all copies.
14
15% Permission is granted to copy and distribute modified versions of this
16% document under the conditions for verbatim copying, provided that the
17% entire resulting derived work is given a different name and distributed
18% under the terms of a permission notice identical to this one.
19
20% Here is TeX material that gets inserted after \input cwebmac
21\def\hang{\hangindent 3em\indent\ignorespaces}
22\def\pb{$\.|\ldots\.|$} % C brackets (|...|)
23\def\v{\char'174} % vertical (|) in typewriter font
24\mathchardef\RA="3221 % right arrow
25\mathchardef\BA="3224 % double arrow
26
27\def\title{CTANGLE (Version 3.64)}
28\def\topofcontents{\null\vfill
29  \centerline{\titlefont The {\ttitlefont CTANGLE} processor}
30  \vskip 15pt
31  \centerline{(Version 3.64)}
32  \vfill}
33\def\botofcontents{\vfill
34\noindent
35Copyright \copyright\ 1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth
36\bigskip\noindent
37Permission is granted to make and distribute verbatim copies of this
38document provided that the copyright notice and this permission notice
39are preserved on all copies.
40
41\smallskip\noindent
42Permission is granted to copy and distribute modified versions of this
43document under the conditions for verbatim copying, provided that the
44entire resulting derived work is given a different name and distributed
45under the terms of a permission notice identical to this one.
46}
47\pageno=\contentspagenumber \advance\pageno by 1
48\let\maybe=\iftrue
49@s not_eq normal @q unreserve a C++ keyword @>
50
51@** Introduction.
52This is the \.{CTANGLE} program by Silvio Levy and Donald E. Knuth,
53based on \.{TANGLE} by Knuth.
54We are thankful to
55Nelson Beebe, Hans-Hermann Bode (to whom the \CPLUSPLUS/ adaptation is due),
56Klaus Guntermann, Norman Ramsey, Tomas Rokicki, Joachim Schnitter,
57Joachim Schrod, Lee Wittenberg, and others who have contributed improvements.
58
59The ``banner line'' defined here should be changed whenever \.{CTANGLE}
60is modified.
61
62@d banner "This is CTANGLE (Version 3.64)\n"
63
64@c
65@<Include files@>@/
66@h
67@<Common code for \.{CWEAVE} and \.{CTANGLE}@>@/
68@<Typedef declarations@>@/
69@<Global variables@>@/
70@<Predeclaration of procedures@>@/
71
72@ We predeclare several standard system functions here instead of including
73their system header files, because the names of the header files are not as
74standard as the names of the functions. (For example, some \CEE/ environments
75have \.{<string.h>} where others have \.{<strings.h>}.)
76
77@<Predecl...@>=
78extern int strlen(); /* length of string */
79extern int strcmp(); /* compare strings lexicographically */
80extern char* strcpy(); /* copy one string to another */
81extern int strncmp(); /* compare up to $n$ string characters */
82extern char* strncpy(); /* copy up to $n$ string characters */
83
84@ \.{CTANGLE} has a fairly straightforward outline.  It operates in
85two phases: First it reads the source file, saving the \CEE/ code in
86compressed form; then it shuffles and outputs the code.
87
88Please read the documentation for \.{common}, the set of routines common
89to \.{CTANGLE} and \.{CWEAVE}, before proceeding further.
90
91@c
92int main (ac, av)
93int ac;
94char **av;
95{
96  argc=ac; argv=av;
97  program=ctangle;
98  @<Set initial values@>;
99  common_init();
100  if (show_banner) printf(banner); /* print a ``banner line'' */
101  phase_one(); /* read all the user's text and compress it into |tok_mem| */
102  phase_two(); /* output the contents of the compressed tables */
103  return wrap_up(); /* and exit gracefully */
104}
105
106@ The following parameters were sufficient in the original \.{TANGLE} to
107handle \TEX/,
108so they should be sufficient for most applications of \.{CTANGLE}.
109If you change |max_bytes|, |max_names|, or |hash_size| you should also
110change them in the file |"common.w"|.
111
112@d max_bytes 90000 /* the number of bytes in identifiers,
113  index entries, and section names; used in |"common.w"| */
114@d max_toks 270000 /* number of bytes in compressed \CEE/ code */
115@d max_names 4000 /* number of identifiers, strings, section names;
116  must be less than 10240; used in |"common.w"| */
117@d max_texts 2500 /* number of replacement texts, must be less than 10240 */
118@d hash_size 353 /* should be prime; used in |"common.w"| */
119@d longest_name 10000 /* section names shouldn't be longer than this */
120@d stack_size 50 /* number of simultaneous levels of macro expansion */
121@d buf_size 100 /* for \.{CWEAVE} and \.{CTANGLE} */
122
123@ The next few sections contain stuff from the file |"common.w"| that must
124be included in both |"ctangle.w"| and |"cweave.w"|. It appears in
125file |"common.h"|, which needs to be updated when |"common.w"| changes.
126
127@i common.h
128
129@* Data structures exclusive to {\tt CTANGLE}.
130We've already seen that the |byte_mem| array holds the names of identifiers,
131strings, and sections;
132the |tok_mem| array holds the replacement texts
133for sections. Allocation is sequential, since things are deleted only
134during Phase II, and only in a last-in-first-out manner.
135
136A \&{text} variable is a structure containing a pointer into
137|tok_mem|, which tells where the corresponding text starts, and an
138integer |text_link|, which, as we shall see later, is used to connect
139pieces of text that have the same name.  All the \&{text}s are stored in
140the array |text_info|, and we use a |text_pointer| variable to refer
141to them.
142
143The first position of |tok_mem| that is unoccupied by
144replacement text is called |tok_ptr|, and the first unused location of
145|text_info| is called |text_ptr|.  Thus we usually have the identity
146|text_ptr->tok_start==tok_ptr|.
147
148If your machine does not support |unsigned char| you should change
149the definition of \&{eight\_bits} to |unsigned short|.
150@^system dependencies@>
151
152@<Typed...@>=
153typedef struct {
154  eight_bits *tok_start; /* pointer into |tok_mem| */
155  sixteen_bits text_link; /* relates replacement texts */
156} text;
157typedef text *text_pointer;
158
159@ @<Glob...@>=
160text text_info[max_texts];
161text_pointer text_info_end=text_info+max_texts-1;
162text_pointer text_ptr; /* first unused position in |text_info| */
163eight_bits tok_mem[max_toks];
164eight_bits *tok_mem_end=tok_mem+max_toks-1;
165eight_bits *tok_ptr; /* first unused position in |tok_mem| */
166
167@ @<Set init...@>=
168text_info->tok_start=tok_ptr=tok_mem;
169text_ptr=text_info+1; text_ptr->tok_start=tok_mem;
170  /* this makes replacement text 0 of length zero */
171
172@ If |p| is a pointer to a section name, |p->equiv| is a pointer to its
173replacement text, an element of the array |text_info|.
174
175@d equiv equiv_or_xref /* info corresponding to names */
176
177@ @<Set init...@>=
178name_dir->equiv=(char *)text_info; /* the undefined section has no replacement text */
179
180@ Here's the procedure that decides whether a name of length |l|
181starting at position |first| equals the identifier pointed to by |p|:
182
183@c
184int names_match(p,first,l)
185name_pointer p; /* points to the proposed match */
186char *first; /* position of first character of string */
187int l; /* length of identifier */
188{
189  if (length(p)!=l) return 0;
190  return !strncmp(first,p->byte_start,l);
191}
192
193@ The common lookup routine refers to separate routines |init_node| and
194|init_p| when the data structure grows. Actually |init_p| is called only by
195\.{CWEAVE}, but we need to declare a dummy version so that
196the loader won't complain of its absence.
197
198@c
199void
200init_node(node)
201name_pointer node;
202{
203    node->equiv=(char *)text_info;
204}
205void
206init_p() {}
207
208@* Tokens.
209Replacement texts, which represent \CEE/ code in a compressed format,
210appear in |tok_mem| as mentioned above. The codes in
211these texts are called `tokens'; some tokens occupy two consecutive
212eight-bit byte positions, and the others take just one byte.
213
214If $p$ points to a replacement text, |p->tok_start| is the |tok_mem| position
215of the first eight-bit code of that text. If |p->text_link==0|,
216this is the replacement text for a macro, otherwise it is the replacement
217text for a section. In the latter case |p->text_link| is either equal to
218|section_flag|, which means that there is no further text for this section, or
219|p->text_link| points to a continuation of this replacement text; such
220links are created when several sections have \CEE/ texts with the same
221name, and they also tie together all the \CEE/ texts of unnamed sections.
222The replacement text pointer for the first unnamed section appears in
223|text_info->text_link|, and the most recent such pointer is |last_unnamed|.
224
225@d section_flag max_texts /* final |text_link| in section replacement texts */
226
227@<Glob...@>=
228text_pointer last_unnamed; /* most recent replacement text of unnamed section */
229
230@ @<Set init...@>= last_unnamed=text_info; text_info->text_link=0;
231
232@ If the first byte of a token is less than |0200|, the token occupies a
233single byte. Otherwise we make a sixteen-bit token by combining two consecutive
234bytes |a| and |b|. If |0200<=a<0250|, then |(a-0200)@t${}\times2^8$@>+b|
235points to an identifier; if |0250<=a<0320|, then
236|(a-0250)@t${}\times2^8$@>+b| points to a section name
237(or, if it has the special value |output_defs_flag|,
238to the area where the preprocessor definitions are stored); and if
239|0320<=a<0400|, then |(a-0320)@t${}\times2^8$@>+b| is the number of the section
240in which the current replacement text appears.
241
242Codes less than |0200| are 7-bit |char| codes that represent themselves.
243Some of the 7-bit codes will not be present, however, so we can
244use them for special purposes. The following symbolic names are used:
245
246\yskip \hang |join| denotes the concatenation of adjacent items with no
247space or line breaks allowed between them (the \.{@@\&} operation of \.{CWEB}).
248
249\hang |string| denotes the beginning or end of a string, verbatim
250construction or numerical constant.
251@^ASCII code dependencies@>
252
253@d string 02 /* takes the place of extended ASCII \.{\char2} */
254@d join 0177 /* takes the place of ASCII delete */
255@d output_defs_flag (2*024000-1)
256
257@ The following procedure is used to enter a two-byte value into
258|tok_mem| when a replacement text is being generated.
259
260@c
261void
262store_two_bytes(x)
263sixteen_bits x;
264{
265  if (tok_ptr+2>tok_mem_end) overflow("token");
266  *tok_ptr++=x>>8; /* store high byte */
267  *tok_ptr++=x&0377; /* store low byte */
268}
269
270@** Stacks for output.  The output process uses a stack to keep track
271of what is going on at different ``levels'' as the sections are being
272written out.  Entries on this stack have five parts:
273
274\yskip\hang |end_field| is the |tok_mem| location where the replacement
275text of a particular level will end;
276
277\hang |byte_field| is the |tok_mem| location from which the next token
278on a particular level will be read;
279
280\hang |name_field| points to the name corresponding to a particular level;
281
282\hang |repl_field| points to the replacement text currently being read
283at a particular level;
284
285\hang |section_field| is the section number, or zero if this is a macro.
286
287\yskip\noindent The current values of these five quantities are referred to
288quite frequently, so they are stored in a separate place instead of in
289the |stack| array. We call the current values |cur_end|, |cur_byte|,
290|cur_name|, |cur_repl|, and |cur_section|.
291
292The global variable |stack_ptr| tells how many levels of output are
293currently in progress. The end of all output occurs when the stack is
294empty, i.e., when |stack_ptr==stack|.
295
296@<Typed...@>=
297typedef struct {
298  eight_bits *end_field; /* ending location of replacement text */
299  eight_bits *byte_field; /* present location within replacement text */
300  name_pointer name_field; /* |byte_start| index for text being output */
301  text_pointer repl_field; /* |tok_start| index for text being output */
302  sixteen_bits section_field; /* section number or zero if not a section */
303} output_state;
304typedef output_state *stack_pointer;
305
306@ @d cur_end cur_state.end_field /* current ending location in |tok_mem| */
307@d cur_byte cur_state.byte_field /* location of next output byte in |tok_mem|*/
308@d cur_name cur_state.name_field /* pointer to current name being expanded */
309@d cur_repl cur_state.repl_field /* pointer to current replacement text */
310@d cur_section cur_state.section_field /* current section number being expanded */
311
312@<Global...@>=
313output_state cur_state; /* |cur_end|, |cur_byte|, |cur_name|, |cur_repl|,
314  and |cur_section| */
315output_state stack[stack_size+1]; /* info for non-current levels */
316stack_pointer stack_ptr; /* first unused location in the output state stack */
317stack_pointer stack_end=stack+stack_size; /* end of |stack| */
318
319@ To get the output process started, we will perform the following
320initialization steps. We may assume that |text_info->text_link| is nonzero,
321since it points to the \CEE/ text in the first unnamed section that generates
322code; if there are no such sections, there is nothing to output, and an
323error message will have been generated before we do any of the initialization.
324
325@<Initialize the output stacks@>=
326stack_ptr=stack+1; cur_name=name_dir; cur_repl=text_info->text_link+text_info;
327cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start; cur_section=0;
328
329@ When the replacement text for name |p| is to be inserted into the output,
330the following subroutine is called to save the old level of output and get
331the new one going.
332
333We assume that the \CEE/ compiler can copy structures.
334@^system dependencies@>
335
336@c
337void
338push_level(p) /* suspends the current level */
339name_pointer p;
340{
341  if (stack_ptr==stack_end) overflow("stack");
342  *stack_ptr=cur_state;
343  stack_ptr++;
344  if (p!=NULL) { /* |p==NULL| means we are in |output_defs| */
345    cur_name=p; cur_repl=(text_pointer)p->equiv;
346    cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start;
347    cur_section=0;
348  }
349}
350
351@ When we come to the end of a replacement text, the |pop_level| subroutine
352does the right thing: It either moves to the continuation of this replacement
353text or returns the state to the most recently stacked level.
354
355@c
356void
357pop_level(flag) /* do this when |cur_byte| reaches |cur_end| */
358int flag; /* |flag==0| means we are in |output_defs| */
359{
360  if (flag && cur_repl->text_link<section_flag) { /* link to a continuation */
361    cur_repl=cur_repl->text_link+text_info; /* stay on the same level */
362    cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start;
363    return;
364  }
365  stack_ptr--; /* go down to the previous level */
366  if (stack_ptr>stack) cur_state=*stack_ptr;
367}
368
369@ The heart of the output procedure is the function |get_output|,
370which produces the next token of output and sends it on to the lower-level
371function |out_char|. The main purpose of |get_output| is to handle the
372necessary stacking and unstacking. It sends the value |section_number|
373if the next output begins or ends the replacement text of some section,
374in which case |cur_val| is that section's number (if beginning) or the
375negative of that value (if ending). (A section number of 0 indicates
376not the beginning or ending of a section, but a \&{\#line} command.)
377And it sends the value |identifier|
378if the next output is an identifier, in which case
379|cur_val| points to that identifier name.
380
381@d section_number 0201 /* code returned by |get_output| for section numbers */
382@d identifier 0202 /* code returned by |get_output| for identifiers */
383
384@<Global...@>=
385int cur_val; /* additional information corresponding to output token */
386
387@ If |get_output| finds that no more output remains, it returns with
388|stack_ptr==stack|.
389@^high-bit character handling@>
390
391@c
392void
393get_output() /* sends next token to |out_char| */
394{
395  sixteen_bits a; /* value of current byte */
396  restart: if (stack_ptr==stack) return;
397  if (cur_byte==cur_end) {
398    cur_val=-((int)cur_section); /* cast needed because of sign extension */
399    pop_level(1);
400    if (cur_val==0) goto restart;
401    out_char(section_number); return;
402  }
403  a=*cur_byte++;
404  if (out_state==verbatim && a!=string && a!=constant && a!='\n')
405    C_putc(a); /* a high-bit character can occur in a string */
406  else if (a<0200) out_char(a); /* one-byte token */
407  else {
408    a=(a-0200)*0400+*cur_byte++;
409    switch (a/024000) { /* |024000==(0250-0200)*0400| */
410      case 0: cur_val=a; out_char(identifier); break;
411      case 1: if (a==output_defs_flag) output_defs();
412        else @<Expand section |a-024000|, |goto restart|@>;
413        break;
414      default: cur_val=a-050000; if (cur_val>0) cur_section=cur_val;
415        out_char(section_number);
416    }
417  }
418}
419
420@ The user may have forgotten to give any \CEE/ text for a section name,
421or the \CEE/ text may have been associated with a different name by mistake.
422
423@<Expand section |a-...@>=
424{
425  a-=024000;
426  if ((a+name_dir)->equiv!=(char *)text_info) push_level(a+name_dir);
427  else if (a!=0) {
428    printf("\n! Not present: <");
429    print_section_name(a+name_dir); err_print(">");
430@.Not present: <section name>@>
431  }
432  goto restart;
433}
434
435@* Producing the output.
436The |get_output| routine above handles most of the complexity of output
437generation, but there are two further considerations that have a nontrivial
438effect on \.{CTANGLE}'s algorithms.
439
440@ First,
441we want to make sure that the output has spaces and line breaks in
442the right places (e.g., not in the middle of a string or a constant or an
443identifier, not at a `\.{@@\&}' position
444where quantities are being joined together, and certainly after an \.=
445because the \CEE/ compiler thinks \.{=-} is ambiguous).
446
447The output process can be in one of following states:
448
449\yskip\hang |num_or_id| means that the last item in the buffer is a number or
450identifier, hence a blank space or line break must be inserted if the next
451item is also a number or identifier.
452
453\yskip\hang |unbreakable| means that the last item in the buffer was followed
454by the \.{@@\&} operation that inhibits spaces between it and the next item.
455
456\yskip\hang |verbatim| means we're copying only character tokens, and
457that they are to be output exactly as stored.  This is the case during
458strings, verbatim constructions and numerical constants.
459
460\yskip\hang |post_slash| means we've just output a slash.
461
462\yskip\hang |normal| means none of the above.
463
464\yskip\noindent Furthermore, if the variable |protect| is positive, newlines
465are preceded by a `\.\\'.
466
467@d normal 0 /* non-unusual state */
468@d num_or_id 1 /* state associated with numbers and identifiers */
469@d post_slash 2 /* state following a \./ */
470@d unbreakable 3 /* state associated with \.{@@\&} */
471@d verbatim 4 /* state in the middle of a string */
472
473@<Global...@>=
474eight_bits out_state; /* current status of partial output */
475boolean protect; /* should newline characters be quoted? */
476
477@ Here is a routine that is invoked when we want to output the current line.
478During the output process, |cur_line| equals the number of the next line
479to be output.
480
481@c
482void
483flush_buffer() /* writes one line to output file */
484{
485  C_putc('\n');
486  if (cur_line % 100 == 0 && show_progress) {
487    printf(".");
488    if (cur_line % 500 == 0) printf("%d",cur_line);
489    update_terminal; /* progress report */
490  }
491  cur_line++;
492}
493
494@ Second, we have modified the original \.{TANGLE} so that it will write output
495on multiple files.
496If a section name is introduced in at least one place by \.{@@(}
497instead of \.{@@<}, we treat it as the name of a file.
498All these special sections are saved on a stack, |output_files|.
499We write them out after we've done the unnamed section.
500
501@d max_files 256
502@<Glob...@>=
503name_pointer output_files[max_files];
504name_pointer *cur_out_file, *end_output_files, *an_output_file;
505char cur_section_name_char; /* is it |'<'| or |'('| */
506char output_file_name[longest_name]; /* name of the file */
507
508@ We make |end_output_files| point just beyond the end of
509|output_files|. The stack pointer
510|cur_out_file| starts out there. Every time we see a new file, we
511decrement |cur_out_file| and then write it in.
512@<Set initial...@>=
513cur_out_file=end_output_files=output_files+max_files;
514
515@ @<If it's not there, add |cur_section_name| to the output file stack, or
516complain we're out of room@>=
517{
518  for (an_output_file=cur_out_file;
519        an_output_file<end_output_files; an_output_file++)
520            if (*an_output_file==cur_section_name) break;
521  if (an_output_file==end_output_files) {
522    if (cur_out_file>output_files)
523        *--cur_out_file=cur_section_name;
524    else {
525      overflow("output files");
526    }
527  }
528}
529
530@* The big output switch.  Here then is the routine that does the
531output.
532
533@<Predecl...@>=
534void phase_two();
535
536@ @c
537void
538phase_two () {
539  web_file_open=0;
540  cur_line=1;
541  @<Initialize the output stacks@>;
542  @<Output macro definitions if appropriate@>;
543  if (text_info->text_link==0 && cur_out_file==end_output_files) {
544    printf("\n! No program text was specified."); mark_harmless;
545@.No program text...@>
546  }
547  else {
548    if(cur_out_file==end_output_files) {
549      if(show_progress)
550        printf("\nWriting the output file (%s):",C_file_name);
551    }
552    else {
553      if (show_progress) {
554        printf("\nWriting the output files:");
555@.Writing the output...@>
556        printf(" (%s)",C_file_name);
557        update_terminal;
558      }
559      if (text_info->text_link==0) goto writeloop;
560    }
561    while (stack_ptr>stack) get_output();
562    flush_buffer();
563writeloop:   @<Write all the named output files@>;
564    if(show_happiness) printf("\nDone.");
565  }
566}
567
568@ To write the named output files, we proceed as for the unnamed
569section.
570The only subtlety is that we have to open each one.
571
572@<Write all the named output files@>=
573for (an_output_file=end_output_files; an_output_file>cur_out_file;) {
574    an_output_file--;
575    sprint_section_name(output_file_name,*an_output_file);
576    fclose(C_file);
577    C_file=fopen(output_file_name,"w");
578    if (C_file ==0) fatal("! Cannot open output file:",output_file_name);
579@.Cannot open output file@>
580    printf("\n(%s)",output_file_name); update_terminal;
581    cur_line=1;
582    stack_ptr=stack+1;
583    cur_name= (*an_output_file);
584    cur_repl= (text_pointer)cur_name->equiv;
585    cur_byte=cur_repl->tok_start;
586    cur_end=(cur_repl+1)->tok_start;
587    while (stack_ptr > stack) get_output();
588    flush_buffer();
589}
590
591@ If a \.{@@h} was not encountered in the input,
592we go through the list of replacement texts and copy the ones
593that refer to macros, preceded by the \.{\#define} preprocessor command.
594
595@<Output macro definitions if appropriate@>=
596  if (!output_defs_seen)
597    output_defs();
598
599@ @<Glob...@>=
600boolean output_defs_seen=0;
601
602@ @<Predecl...@>=
603void output_defs();
604
605@ @c
606void
607output_defs()
608{
609  sixteen_bits a;
610  push_level(NULL);
611  for (cur_text=text_info+1; cur_text<text_ptr; cur_text++)
612    if (cur_text->text_link==0) { /* |cur_text| is the text for a macro */
613      cur_byte=cur_text->tok_start;
614      cur_end=(cur_text+1)->tok_start;
615      C_printf("%s","#define ");
616      out_state=normal;
617      protect=1; /* newlines should be preceded by |'\\'| */
618      while (cur_byte<cur_end) {
619        a=*cur_byte++;
620        if (cur_byte==cur_end && a=='\n') break; /* disregard a final newline */
621        if (out_state==verbatim && a!=string && a!=constant && a!='\n')
622          C_putc(a); /* a high-bit character can occur in a string */
623@^high-bit character handling@>
624        else if (a<0200) out_char(a); /* one-byte token */
625        else {
626          a=(a-0200)*0400+*cur_byte++;
627          if (a<024000) { /* |024000==(0250-0200)*0400| */
628            cur_val=a; out_char(identifier);
629          }
630          else if (a<050000) { confusion("macro defs have strange char");}
631          else {
632            cur_val=a-050000; cur_section=cur_val; out_char(section_number);
633          }
634      /* no other cases */
635        }
636      }
637      protect=0;
638      flush_buffer();
639    }
640  pop_level(0);
641}
642
643@ A many-way switch is used to send the output.  Note that this function
644is not called if |out_state==verbatim|, except perhaps with arguments
645|'\n'| (protect the newline), |string| (end the string), or |constant|
646(end the constant).
647
648@<Predecl...@>=
649static void out_char();
650
651@ @c
652static void
653out_char(cur_char)
654eight_bits cur_char;
655{
656  char *j, *k; /* pointer into |byte_mem| */
657restart:
658    switch (cur_char) {
659      case '\n': if (protect && out_state!=verbatim) C_putc(' ');
660        if (protect || out_state==verbatim) C_putc('\\');
661        flush_buffer(); if (out_state!=verbatim) out_state=normal; break;
662      @/@t\4@>@<Case of an identifier@>;
663      @/@t\4@>@<Case of a section number@>;
664      @/@t\4@>@<Cases like \.{!=}@>;
665      case '=': case '>': C_putc(cur_char); C_putc(' ');
666        out_state=normal; break;
667      case join: out_state=unbreakable; break;
668      case constant: if (out_state==verbatim) {
669          out_state=num_or_id; break;
670        }
671        if(out_state==num_or_id) C_putc(' '); out_state=verbatim; break;
672      case string: if (out_state==verbatim) out_state=normal;
673        else out_state=verbatim; break;
674      case '/': C_putc('/'); out_state=post_slash; break;
675      case '*': if (out_state==post_slash) C_putc(' ');
676        /* fall through */
677      default: C_putc(cur_char); out_state=normal; break;
678    }
679}
680
681@ @<Cases like \.{!=}@>=
682case plus_plus: C_putc('+'); C_putc('+'); out_state=normal; break;
683case minus_minus: C_putc('-'); C_putc('-'); out_state=normal; break;
684case minus_gt: C_putc('-'); C_putc('>'); out_state=normal; break;
685case gt_gt: C_putc('>'); C_putc('>'); out_state=normal; break;
686case eq_eq: C_putc('='); C_putc('='); out_state=normal; break;
687case lt_lt: C_putc('<'); C_putc('<'); out_state=normal; break;
688case gt_eq: C_putc('>'); C_putc('='); out_state=normal; break;
689case lt_eq: C_putc('<'); C_putc('='); out_state=normal; break;
690case not_eq: C_putc('!'); C_putc('='); out_state=normal; break;
691case and_and: C_putc('&'); C_putc('&'); out_state=normal; break;
692case or_or: C_putc('|'); C_putc('|'); out_state=normal; break;
693case dot_dot_dot: C_putc('.'); C_putc('.'); C_putc('.'); out_state=normal;
694    break;
695case colon_colon: C_putc(':'); C_putc(':'); out_state=normal; break;
696case period_ast: C_putc('.'); C_putc('*'); out_state=normal; break;
697case minus_gt_ast: C_putc('-'); C_putc('>'); C_putc('*'); out_state=normal;
698    break;
699
700@ When an identifier is output to the \CEE/ file, characters in the
701range 128--255 must be changed into something else, so the \CEE/
702compiler won't complain.  By default, \.{CTANGLE} converts the
703character with code $16 x+y$ to the three characters `\.X$xy$', but
704a different transliteration table can be specified.  Thus a German
705might want {\it gr\"un\/} to appear as a still readable \.{gruen}.
706This makes debugging a lot less confusing.
707
708@d translit_length 10
709
710@<Glo...@>=
711char translit[128][translit_length];
712
713@ @<Set init...@>=
714{
715  int i;
716  for (i=0;i<128;i++) sprintf(translit[i],"X%02X",(unsigned)(128+i));
717}
718
719@ @<Case of an identifier@>=
720case identifier:
721  if (out_state==num_or_id) C_putc(' ');
722  j=(cur_val+name_dir)->byte_start;
723  k=(cur_val+name_dir+1)->byte_start;
724  while (j<k) {
725    if ((unsigned char)(*j)<0200) C_putc(*j);
726@^high-bit character handling@>
727    else C_printf("%s",translit[(unsigned char)(*j)-0200]);
728    j++;
729  }
730  out_state=num_or_id; break;
731
732@ @<Case of a sec...@>=
733case section_number:
734  if (cur_val>0) C_printf("/*%d:*/",cur_val);
735  else if(cur_val<0) C_printf("/*:%d*/",-cur_val);
736  else if (protect) {
737    cur_byte +=4; /* skip line number and file name */
738    cur_char = '\n';
739    goto restart;
740  } else {
741    sixteen_bits a;
742    a=0400* *cur_byte++;
743    a+=*cur_byte++; /* gets the line number */
744    C_printf("\n#line %d \"",a);
745@:line}{\.{\#line}@>
746    cur_val=*cur_byte++;
747    cur_val=0400*(cur_val-0200)+ *cur_byte++; /* points to the file name */
748    for (j=(cur_val+name_dir)->byte_start, k=(cur_val+name_dir+1)->byte_start;
749         j<k; j++) {
750      if (*j=='\\' || *j=='"') C_putc('\\');
751      C_putc(*j);
752    }
753    C_printf("%s","\"\n");
754  }
755  break;
756
757@** Introduction to the input phase.
758We have now seen that \.{CTANGLE} will be able to output the full
759\CEE/ program, if we can only get that program into the byte memory in
760the proper format. The input process is something like the output process
761in reverse, since we compress the text as we read it in and we expand it
762as we write it out.
763
764There are three main input routines. The most interesting is the one that gets
765the next token of a \CEE/ text; the other two are used to scan rapidly past
766\TEX/ text in the \.{CWEB} source code. One of the latter routines will jump to
767the next token that starts with `\.{@@}', and the other skips to the end
768of a \CEE/ comment.
769
770@ Control codes in \.{CWEB} begin with `\.{@@}', and the next character
771identifies the code. Some of these are of interest only to \.{CWEAVE},
772so \.{CTANGLE} ignores them; the others are converted by \.{CTANGLE} into
773internal code numbers by the |ccode| table below. The ordering
774of these internal code numbers has been chosen to simplify the program logic;
775larger numbers are given to the control codes that denote more significant
776milestones.
777
778@d ignore 0 /* control code of no interest to \.{CTANGLE} */
779@d ord 0302 /* control code for `\.{@@'}' */
780@d control_text 0303 /* control code for `\.{@@t}', `\.{@@\^}', etc. */
781@d translit_code 0304 /* control code for `\.{@@l}' */
782@d output_defs_code 0305 /* control code for `\.{@@h}' */
783@d format_code 0306 /* control code for `\.{@@f}' */
784@d definition 0307 /* control code for `\.{@@d}' */
785@d begin_C 0310 /* control code for `\.{@@c}' */
786@d section_name 0311 /* control code for `\.{@@<}' */
787@d new_section 0312 /* control code for `\.{@@\ }' and `\.{@@*}' */
788
789@<Global...@>=
790eight_bits ccode[256]; /* meaning of a char following \.{@@} */
791
792@ @<Set ini...@>= {
793  int c; /* must be |int| so the |for| loop will end */
794  for (c=0; c<256; c++) ccode[c]=ignore;
795  ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f']
796   =ccode['*']=new_section;
797  ccode['@@']='@@'; ccode['=']=string;
798  ccode['d']=ccode['D']=definition;
799  ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code;
800  ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C;
801  ccode['^']=ccode[':']=ccode['.']=ccode['t']=ccode['T']=
802   ccode['q']=ccode['Q']=control_text;
803  ccode['h']=ccode['H']=output_defs_code;
804  ccode['l']=ccode['L']=translit_code;
805  ccode['&']=join;
806  ccode['<']=ccode['(']=section_name;
807  ccode['\'']=ord;
808}
809
810@ The |skip_ahead| procedure reads through the input at fairly high speed
811until finding the next non-ignorable control code, which it returns.
812
813@c
814eight_bits
815skip_ahead() /* skip to next control code */
816{
817  eight_bits c; /* control code found */
818  while (1) {
819    if (loc>limit && (get_line()==0)) return(new_section);
820    *(limit+1)='@@';
821    while (*loc!='@@') loc++;
822    if (loc<=limit) {
823      loc++; c=ccode[(eight_bits)*loc]; loc++;
824      if (c!=ignore || *(loc-1)=='>') return(c);
825    }
826  }
827}
828
829@ The |skip_comment| procedure reads through the input at somewhat high
830speed in order to pass over comments, which \.{CTANGLE} does not transmit
831to the output. If the comment is introduced by \.{/*}, |skip_comment|
832proceeds until finding the end-comment token \.{*/} or a newline; in the
833latter case |skip_comment| will be called again by |get_next|, since the
834comment is not finished.  This is done so that each newline in the
835\CEE/ part of a section is copied to the output; otherwise the \&{\#line}
836commands inserted into the \CEE/ file by the output routines become useless.
837On the other hand, if the comment is introduced by \.{//} (i.e., if it
838is a \CPLUSPLUS/ ``short comment''), it always is simply delimited by the next
839newline. The boolean argument |is_long_comment| distinguishes between
840the two types of comments.
841
842If |skip_comment| comes to the end of the section, it prints an error message.
843No comment, long or short, is allowed to contain `\.{@@\ }' or `\.{@@*}'.
844
845@<Global...@>=
846boolean comment_continues=0; /* are we scanning a comment? */
847
848@ @c
849int skip_comment(is_long_comment) /* skips over comments */
850boolean is_long_comment;
851{
852  char c; /* current character */
853  while (1) {
854    if (loc>limit) {
855      if (is_long_comment) {
856        if(get_line()) return(comment_continues=1);
857        else{
858          err_print("! Input ended in mid-comment");
859@.Input ended in mid-comment@>
860          return(comment_continues=0);
861        }
862      }
863      else return(comment_continues=0);
864    }
865    c=*(loc++);
866    if (is_long_comment && c=='*' && *loc=='/') {
867      loc++; return(comment_continues=0);
868    }
869    if (c=='@@') {
870      if (ccode[(eight_bits)*loc]==new_section) {
871        err_print("! Section name ended in mid-comment"); loc--;
872@.Section name ended in mid-comment@>
873        return(comment_continues=0);
874      }
875      else loc++;
876    }
877  }
878}
879
880@* Inputting the next token.
881
882@d constant 03
883
884@<Global...@>=
885name_pointer cur_section_name; /* name of section just scanned */
886int no_where; /* suppress |print_where|? */
887
888@ @<Include...@>=
889#include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */
890#include <stdlib.h> /* definition of |exit| */
891
892@ As one might expect, |get_next| consists mostly of a big switch
893that branches to the various special cases that can arise.
894
895@d isxalpha(c) ((c)=='_' || (c)=='$')
896  /* non-alpha characters allowed in identifier */
897@d ishigh(c) ((unsigned char)(c)>0177)
898@^high-bit character handling@>
899
900@c
901eight_bits
902get_next() /* produces the next input token */
903{
904  static int preprocessing=0;
905  eight_bits c; /* the current character */
906  while (1) {
907    if (loc>limit) {
908      if (preprocessing && *(limit-1)!='\\') preprocessing=0;
909      if (get_line()==0) return(new_section);
910      else if (print_where && !no_where) {
911          print_where=0;
912          @<Insert the line number into |tok_mem|@>;
913        }
914        else return ('\n');
915    }
916    c=*loc;
917    if (comment_continues || (c=='/' && (*(loc+1)=='*' || *(loc+1)=='/'))) {
918      skip_comment(comment_continues||*(loc+1)=='*');
919          /* scan to end of comment or newline */
920      if (comment_continues) return('\n');
921      else continue;
922    }
923    loc++;
924    if (xisdigit(c) || c=='.') @<Get a constant@>@;
925    else if (c=='\'' || c=='"' || (c=='L'&&(*loc=='\'' || *loc=='"')))
926        @<Get a string@>@;
927    else if (isalpha(c) || isxalpha(c) || ishigh(c))
928      @<Get an identifier@>@;
929    else if (c=='@@') @<Get control code and possible section name@>@;
930    else if (xisspace(c)) {
931        if (!preprocessing || loc>limit) continue;
932          /* we don't want a blank after a final backslash */
933        else return(' '); /* ignore spaces and tabs, unless preprocessing */
934    }
935    else if (c=='#' && loc==buffer+1) preprocessing=1;
936    mistake: @<Compress two-symbol operator@>@;
937    return(c);
938  }
939}
940
941@ The following code assigns values to the combinations \.{++},
942\.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, \.{||} and
943\.{\&\&}, and to the \CPLUSPLUS/
944combinations \.{...}, \.{::}, \.{.*} and \.{->*}.
945The compound assignment operators (e.g., \.{+=}) are
946treated as separate tokens.
947
948@d compress(c) if (loc++<=limit) return(c)
949
950@<Compress tw...@>=
951switch(c) {
952  case '+': if (*loc=='+') compress(plus_plus); break;
953  case '-': if (*loc=='-') {compress(minus_minus);}
954    else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);}
955                        else compress(minus_gt); break;
956  case '.': if (*loc=='*') {compress(period_ast);}
957            else if (*loc=='.' && *(loc+1)=='.') {
958              loc++; compress(dot_dot_dot);
959            }
960            break;
961  case ':': if (*loc==':') compress(colon_colon); break;
962  case '=': if (*loc=='=') compress(eq_eq); break;
963  case '>': if (*loc=='=') {compress(gt_eq);}
964    else if (*loc=='>') compress(gt_gt); break;
965  case '<': if (*loc=='=') {compress(lt_eq);}
966    else if (*loc=='<') compress(lt_lt); break;
967  case '&': if (*loc=='&') compress(and_and); break;
968  case '|': if (*loc=='|') compress(or_or); break;
969  case '!': if (*loc=='=') compress(not_eq); break;
970}
971
972@ @<Get an identifier@>= {
973  id_first=--loc;
974  while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc));
975  id_loc=loc; return(identifier);
976}
977
978@ @<Get a constant@>= {
979  id_first=loc-1;
980  if (*id_first=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */
981  if (*id_first=='0') {
982    if (*loc=='x' || *loc=='X') { /* hex constant */
983      loc++; while (xisxdigit(*loc)) loc++; goto found;
984    }
985  }
986  while (xisdigit(*loc)) loc++;
987  if (*loc=='.') {
988  loc++;
989  while (xisdigit(*loc)) loc++;
990  }
991  if (*loc=='e' || *loc=='E') { /* float constant */
992    if (*++loc=='+' || *loc=='-') loc++;
993    while (xisdigit(*loc)) loc++;
994  }
995 found: while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L'
996             || *loc=='f' || *loc=='F') loc++;
997  id_loc=loc;
998  return(constant);
999}
1000
1001@ \CEE/ strings and character constants, delimited by double and single
1002quotes, respectively, can contain newlines or instances of their own
1003delimiters if they are protected by a backslash.  We follow this
1004convention, but do not allow the string to be longer than |longest_name|.
1005
1006@<Get a string@>= {
1007  char delim = c; /* what started the string */
1008  id_first = section_text+1;
1009  id_loc = section_text; *++id_loc=delim;
1010  if (delim=='L') { /* wide character constant */
1011    delim=*loc++; *++id_loc=delim;
1012  }
1013  while (1) {
1014    if (loc>=limit) {
1015      if(*(limit-1)!='\\') {
1016        err_print("! String didn't end"); loc=limit; break;
1017@.String didn't end@>
1018      }
1019      if(get_line()==0) {
1020        err_print("! Input ended in middle of string"); loc=buffer; break;
1021@.Input ended in middle of string@>
1022      }
1023      else if (++id_loc<=section_text_end) *id_loc='\n'; /* will print as
1024      \.{"\\\\\\n"} */
1025    }
1026    if ((c=*loc++)==delim) {
1027      if (++id_loc<=section_text_end) *id_loc=c;
1028      break;
1029    }
1030    if (c=='\\') {
1031      if (loc>=limit) continue;
1032      if (++id_loc<=section_text_end) *id_loc = '\\';
1033      c=*loc++;
1034    }
1035    if (++id_loc<=section_text_end) *id_loc=c;
1036  }
1037  if (id_loc>=section_text_end) {
1038    printf("\n! String too long: ");
1039@.String too long@>
1040    term_write(section_text+1,25);
1041    err_print("...");
1042  }
1043  id_loc++;
1044  return(string);
1045}
1046
1047@ After an \.{@@} sign has been scanned, the next character tells us
1048whether there is more work to do.
1049
1050@<Get control code and possible section name@>= {
1051  c=ccode[(eight_bits)*loc++];
1052  switch(c) {
1053    case ignore: continue;
1054    case translit_code: err_print("! Use @@l in limbo only"); continue;
1055@.Use @@l in limbo...@>
1056    case control_text: while ((c=skip_ahead())=='@@');
1057      /* only \.{@@@@} and \.{@@>} are expected */
1058      if (*(loc-1)!='>')
1059        err_print("! Double @@ should be used in control text");
1060@.Double @@ should be used...@>
1061      continue;
1062    case section_name:
1063      cur_section_name_char=*(loc-1);
1064      @<Scan the section name and make |cur_section_name| point to it@>;
1065    case string: @<Scan a verbatim string@>;
1066    case ord: @<Scan an ASCII constant@>;
1067    default: return(c);
1068  }
1069}
1070
1071@ After scanning a valid ASCII constant that follows
1072\.{@@'}, this code plows ahead until it finds the next single quote.
1073(Special care is taken if the quote is part of the constant.)
1074Anything after a valid ASCII constant is ignored;
1075thus, \.{@@'\\nopq'} gives the same result as \.{@@'\\n'}.
1076
1077@<Scan an ASCII constant@>=
1078  id_first=loc;
1079  if (*loc=='\\') {
1080    if (*++loc=='\'') loc++;
1081  }
1082  while (*loc!='\'') {
1083    if (*loc=='@@') {
1084      if (*(loc+1)!='@@')
1085        err_print("! Double @@ should be used in ASCII constant");
1086@.Double @@ should be used...@>
1087      else loc++;
1088    }
1089    loc++;
1090    if (loc>limit) {
1091        err_print("! String didn't end"); loc=limit-1; break;
1092@.String didn't end@>
1093    }
1094  }
1095  loc++;
1096  return(ord);
1097
1098@ @<Scan the section name...@>= {
1099  char *k; /* pointer into |section_text| */
1100  @<Put section name into |section_text|@>;
1101  if (k-section_text>3 && strncmp(k-2,"...",3)==0)
1102    cur_section_name=section_lookup(section_text+1,k-3,1); /* 1 means is a prefix */
1103  else cur_section_name=section_lookup(section_text+1,k,0);
1104  if (cur_section_name_char=='(')
1105    @<If it's not there, add |cur_section_name| to the output file stack, or
1106          complain we're out of room@>;
1107  return(section_name);
1108}
1109
1110@ Section names are placed into the |section_text| array with consecutive spaces,
1111tabs, and carriage-returns replaced by single spaces. There will be no
1112spaces at the beginning or the end. (We set |section_text[0]=' '| to facilitate
1113this, since the |section_lookup| routine uses |section_text[1]| as the first
1114character of the name.)
1115
1116@<Set init...@>=section_text[0]=' ';
1117
1118@ @<Put section name...@>=
1119k=section_text;
1120while (1) {
1121  if (loc>limit && get_line()==0) {
1122    err_print("! Input ended in section name");
1123@.Input ended in section name@>
1124    loc=buffer+1; break;
1125  }
1126  c=*loc;
1127  @<If end of name or erroneous nesting, |break|@>;
1128  loc++; if (k<section_text_end) k++;
1129  if (xisspace(c)) {
1130    c=' '; if (*(k-1)==' ') k--;
1131  }
1132*k=c;
1133}
1134if (k>=section_text_end) {
1135  printf("\n! Section name too long: ");
1136@.Section name too long@>
1137  term_write(section_text+1,25);
1138  printf("..."); mark_harmless;
1139}
1140if (*k==' ' && k>section_text) k--;
1141
1142@ @<If end of name or erroneous nesting,...@>=
1143if (c=='@@') {
1144  c=*(loc+1);
1145  if (c=='>') {
1146    loc+=2; break;
1147  }
1148  if (ccode[(eight_bits)c]==new_section) {
1149    err_print("! Section name didn't end"); break;
1150@.Section name didn't end@>
1151  }
1152  if (ccode[(eight_bits)c]==section_name) {
1153    err_print("! Nesting of section names not allowed"); break;
1154@.Nesting of section names...@>
1155  }
1156  *(++k)='@@'; loc++; /* now |c==*loc| again */
1157}
1158
1159@ At the present point in the program we
1160have |*(loc-1)==string|; we set |id_first| to the beginning
1161of the string itself, and |id_loc| to its ending-plus-one location in the
1162buffer.  We also set |loc| to the position just after the ending delimiter.
1163
1164@<Scan a verbatim string@>= {
1165  id_first=loc++; *(limit+1)='@@'; *(limit+2)='>';
1166  while (*loc!='@@' || *(loc+1)!='>') loc++;
1167  if (loc>=limit) err_print("! Verbatim string didn't end");
1168@.Verbatim string didn't end@>
1169  id_loc=loc; loc+=2;
1170  return(string);
1171}
1172
1173@* Scanning a macro definition.
1174The rules for generating the replacement texts corresponding to macros and
1175\CEE/ texts of a section are almost identical; the only differences are that
1176
1177\yskip \item{a)}Section names are not allowed in macros;
1178in fact, the appearance of a section name terminates such macros and denotes
1179the name of the current section.
1180
1181\item{b)}The symbols \.{@@d} and \.{@@f} and \.{@@c} are not allowed after
1182section names, while they terminate macro definitions.
1183
1184\item{c)}Spaces are inserted after right parentheses in macros, because the
1185ANSI \CEE/ preprocessor sometimes requires it.
1186
1187\yskip Therefore there is a single procedure |scan_repl| whose parameter
1188|t| specifies either |macro| or |section_name|. After |scan_repl| has
1189acted, |cur_text| will point to the replacement text just generated, and
1190|next_control| will contain the control code that terminated the activity.
1191
1192@d macro  0
1193@d app_repl(c)  {if (tok_ptr==tok_mem_end) overflow("token"); *tok_ptr++=c;}
1194
1195@<Global...@>=
1196text_pointer cur_text; /* replacement text formed by |scan_repl| */
1197eight_bits next_control;
1198
1199@ @c
1200void
1201scan_repl(t) /* creates a replacement text */
1202eight_bits t;
1203{
1204  sixteen_bits a; /* the current token */
1205  if (t==section_name) {@<Insert the line number into |tok_mem|@>;}
1206  while (1) switch (a=get_next()) {
1207      @<In cases that |a| is a non-|char| token (|identifier|,
1208        |section_name|, etc.), either process it and change |a| to a byte
1209        that should be stored, or |continue| if |a| should be ignored,
1210        or |goto done| if |a| signals the end of this replacement text@>@;
1211      case ')': app_repl(a);
1212        if (t==macro) app_repl(' ');
1213        break;
1214      default: app_repl(a); /* store |a| in |tok_mem| */
1215    }
1216  done: next_control=(eight_bits) a;
1217  if (text_ptr>text_info_end) overflow("text");
1218  cur_text=text_ptr; (++text_ptr)->tok_start=tok_ptr;
1219}
1220
1221@ Here is the code for the line number: first a |sixteen_bits| equal
1222to |0150000|; then the numeric line number; then a pointer to the
1223file name.
1224
1225@<Insert the line...@>=
1226store_two_bytes(0150000);
1227if (changing) id_first=change_file_name;
1228else id_first=cur_file_name;
1229id_loc=id_first+strlen(id_first);
1230if (changing) store_two_bytes((sixteen_bits)change_line);
1231else store_two_bytes((sixteen_bits)cur_line);
1232{int a=id_lookup(id_first,id_loc,0)-name_dir; app_repl((a / 0400)+0200);
1233  app_repl(a % 0400);}
1234
1235@ @<In cases that |a| is...@>=
1236case identifier: a=id_lookup(id_first,id_loc,0)-name_dir;
1237  app_repl((a / 0400)+0200);
1238  app_repl(a % 0400); break;
1239case section_name: if (t!=section_name) goto done;
1240  else {
1241    @<Was an `@@' missed here?@>;
1242    a=cur_section_name-name_dir;
1243    app_repl((a / 0400)+0250);
1244    app_repl(a % 0400);
1245    @<Insert the line number into |tok_mem|@>; break;
1246  }
1247case output_defs_code: if (t!=section_name) err_print("! Misplaced @@h");
1248@.Misplaced @@h@>
1249  else {
1250    output_defs_seen=1;
1251    a=output_defs_flag;
1252    app_repl((a / 0400)+0200);
1253    app_repl(a % 0400);
1254    @<Insert the line number into |tok_mem|@>;
1255  }
1256 break;
1257case constant: case string:
1258  @<Copy a string or verbatim construction or numerical constant@>;
1259case ord:
1260  @<Copy an ASCII constant@>;
1261case definition: case format_code: case begin_C: if (t!=section_name) goto done;
1262  else {
1263    err_print("! @@d, @@f and @@c are ignored in C text"); continue;
1264@.@@d, @@f and @@c are ignored in C text@>
1265  }
1266case new_section: goto done;
1267
1268@ @<Was an `@@'...@>= {
1269  char *try_loc=loc;
1270  while (*try_loc==' ' && try_loc<limit) try_loc++;
1271  if (*try_loc=='+' && try_loc<limit) try_loc++;
1272  while (*try_loc==' ' && try_loc<limit) try_loc++;
1273  if (*try_loc=='=') err_print ("! Missing `@@ ' before a named section");
1274@.Missing `@@ '...@>
1275  /* user who isn't defining a section should put newline after the name,
1276     as explained in the manual */
1277}
1278
1279@ @<Copy a string...@>=
1280  app_repl(a); /* |string| or |constant| */
1281  while (id_first < id_loc) { /* simplify \.{@@@@} pairs */
1282    if (*id_first=='@@') {
1283      if (*(id_first+1)=='@@') id_first++;
1284      else err_print("! Double @@ should be used in string");
1285@.Double @@ should be used...@>
1286    }
1287    app_repl(*id_first++);
1288  }
1289  app_repl(a); break;
1290
1291@ This section should be rewritten on machines that don't use ASCII
1292code internally.
1293@^ASCII code dependencies@>
1294
1295@<Copy an ASCII constant@>= {
1296  int c=(eight_bits) *id_first;
1297  if (c=='\\') {
1298    c=*++id_first;
1299    if (c>='0' && c<='7') {
1300      c-='0';
1301      if (*(id_first+1)>='0' && *(id_first+1)<='7') {
1302        c=8*c+*(++id_first) - '0';
1303        if (*(id_first+1)>='0' && *(id_first+1)<='7' && c<32)
1304          c=8*c+*(++id_first)- '0';
1305      }
1306    }
1307    else switch (c) {
1308    case 't':c='\t';@+break;
1309    case 'n':c='\n';@+break;
1310    case 'b':c='\b';@+break;
1311    case 'f':c='\f';@+break;
1312    case 'v':c='\v';@+break;
1313    case 'r':c='\r';@+break;
1314    case 'a':c='\7';@+break;
1315    case '?':c='?';@+break;
1316    case 'x':
1317      if (xisdigit(*(id_first+1))) c=*(++id_first)-'0';
1318      else if (xisxdigit(*(id_first+1))) {
1319        ++id_first;
1320        c=toupper(*id_first)-'A'+10;
1321      }
1322      if (xisdigit(*(id_first+1))) c=16*c+*(++id_first)-'0';
1323      else if (xisxdigit(*(id_first+1))) {
1324        ++id_first;
1325        c=16*c+toupper(*id_first)-'A'+10;
1326      }
1327      break;
1328    case '\\':c='\\';@+break;
1329    case '\'':c='\'';@+break;
1330    case '\"':c='\"';@+break;
1331    default: err_print("! Unrecognized escape sequence");
1332@.Unrecognized escape sequence@>
1333    }
1334  }@/
1335  /* at this point |c| should have been converted to its ASCII code number */
1336  app_repl(constant);
1337  if (c>=100) app_repl('0'+c/100);
1338  if (c>=10) app_repl('0'+(c/10)%10);
1339  app_repl('0'+c%10);
1340  app_repl(constant);
1341}
1342break;
1343
1344@* Scanning a section.
1345The |scan_section| procedure starts when `\.{@@\ }' or `\.{@@*}' has been
1346sensed in the input, and it proceeds until the end of that section.  It
1347uses |section_count| to keep track of the current section number; with luck,
1348\.{CWEAVE} and \.{CTANGLE} will both assign the same numbers to sections.
1349
1350@<Global...@>=
1351extern sixteen_bits section_count; /* the current section number */
1352
1353@ The body of |scan_section| is a loop where we look for control codes
1354that are significant to \.{CTANGLE}: those
1355that delimit a definition, the \CEE/ part of a module, or a new module.
1356
1357@c
1358void
1359scan_section()
1360{
1361  name_pointer p; /* section name for the current section */
1362  text_pointer q; /* text for the current section */
1363  sixteen_bits a; /* token for left-hand side of definition */
1364  section_count++; @+ no_where=1;
1365  if (*(loc-1)=='*' && show_progress) { /* starred section */
1366    printf("*%d",section_count); update_terminal;
1367  }
1368  next_control=0;
1369  while (1) {
1370    @<Skip ahead until |next_control| corresponds to \.{@@d}, \.{@@<},
1371      \.{@@\ } or the like@>;
1372    if (next_control == definition) {  /* \.{@@d} */
1373        @<Scan a definition@>@;
1374        continue;
1375    }
1376    if (next_control == begin_C) {  /* \.{@@c} or \.{@@p} */
1377      p=name_dir; break;
1378    }
1379    if (next_control == section_name) { /* \.{@@<} or \.{@@(} */
1380      p=cur_section_name;
1381      @<If section is not being defined, |continue| @>;
1382      break;
1383    }
1384    return; /* \.{@@\ } or \.{@@*} */
1385  }
1386  no_where=print_where=0;
1387  @<Scan the \CEE/ part of the current section@>;
1388}
1389
1390@ At the top of this loop, if |next_control==section_name|, the
1391section name has already been scanned (see |@<Get control code
1392and...@>|).  Thus, if we encounter |next_control==section_name| in the
1393skip-ahead process, we should likewise scan the section name, so later
1394processing will be the same in both cases.
1395
1396@<Skip ahead until |next_control| ...@>=
1397while (next_control<definition)
1398      /* |definition| is the lowest of the ``significant'' codes */
1399  if((next_control=skip_ahead())==section_name){
1400    loc-=2; next_control=get_next();
1401  }
1402
1403@ @<Scan a definition@>= {
1404  while ((next_control=get_next())=='\n'); /*allow newline before definition */
1405  if (next_control!=identifier) {
1406    err_print("! Definition flushed, must start with identifier");
1407@.Definition flushed...@>
1408    continue;
1409  }
1410  app_repl(((a=id_lookup(id_first,id_loc,0)-name_dir) / 0400)+0200);
1411        /* append the lhs */
1412  app_repl(a % 0400);
1413  if (*loc!='(') { /* identifier must be separated from replacement text */
1414    app_repl(string); app_repl(' '); app_repl(string);
1415  }
1416  scan_repl(macro);
1417  cur_text->text_link=0; /* |text_link==0| characterizes a macro */
1418}
1419
1420@ If the section name is not followed by \.{=} or \.{+=}, no \CEE/
1421code is forthcoming: the section is being cited, not being
1422defined.  This use is illegal after the definition part of the
1423current section has started, except inside a comment, but
1424\.{CTANGLE} does not enforce this rule; it simply ignores the offending
1425section name and everything following it, up to the next significant
1426control code.
1427
1428@<If section is not being defined, |continue| @>=
1429while ((next_control=get_next())=='+'); /* allow optional \.{+=} */
1430if (next_control!='=' && next_control!=eq_eq)
1431  continue;
1432
1433@ @<Scan the \CEE/...@>=
1434@<Insert the section number into |tok_mem|@>;
1435scan_repl(section_name); /* now |cur_text| points to the replacement text */
1436@<Update the data structure so that the replacement text is accessible@>;
1437
1438@ @<Insert the section number...@>=
1439store_two_bytes((sixteen_bits)(0150000+section_count));
1440  /* |0150000==0320*0400| */
1441
1442@ @<Update the data...@>=
1443if (p==name_dir||p==0) { /* unnamed section, or bad section name */
1444  (last_unnamed)->text_link=cur_text-text_info; last_unnamed=cur_text;
1445}
1446else if (p->equiv==(char *)text_info) p->equiv=(char *)cur_text;
1447  /* first section of this name */
1448else {
1449  q=(text_pointer)p->equiv;
1450  while (q->text_link<section_flag)
1451    q=q->text_link+text_info; /* find end of list */
1452  q->text_link=cur_text-text_info;
1453}
1454cur_text->text_link=section_flag;
1455  /* mark this replacement text as a nonmacro */
1456
1457@ @<Predec...@>=
1458void phase_one();
1459
1460@ @c
1461void
1462phase_one() {
1463  phase=1;
1464  section_count=0;
1465  reset_input();
1466  skip_limbo();
1467  while (!input_has_ended) scan_section();
1468  check_complete();
1469  phase=2;
1470}
1471
1472@ Only a small subset of the control codes is legal in limbo, so limbo
1473processing is straightforward.
1474
1475@<Predecl...@>=
1476void skip_limbo();
1477
1478@ @c
1479void
1480skip_limbo()
1481{
1482  char c;
1483  while (1) {
1484    if (loc>limit && get_line()==0) return;
1485    *(limit+1)='@@';
1486    while (*loc!='@@') loc++;
1487    if (loc++<=limit) {
1488      c=*loc++;
1489      if (ccode[(eight_bits)c]==new_section) break;
1490      switch (ccode[(eight_bits)c]) {
1491        case translit_code: @<Read in transliteration of a character@>; break;
1492        case format_code: case '@@': break;
1493        case control_text: if (c=='q' || c=='Q') {
1494          while ((c=skip_ahead())=='@@');
1495          if (*(loc-1)!='>')
1496            err_print("! Double @@ should be used in control text");
1497@.Double @@ should be used...@>
1498          break;
1499          } /* otherwise fall through */
1500        default: err_print("! Double @@ should be used in limbo");
1501@.Double @@ should be used...@>
1502      }
1503    }
1504  }
1505}
1506
1507@ @<Read in transliteration of a character@>=
1508  while(xisspace(*loc)&&loc<limit) loc++;
1509  loc+=3;
1510  if (loc>limit || !xisxdigit(*(loc-3)) || !xisxdigit(*(loc-2)) @|
1511         || (*(loc-3)>='0' && *(loc-3)<='7') || !xisspace(*(loc-1)))
1512    err_print("! Improper hex number following @@l");
1513@.Improper hex number...@>
1514  else {
1515    unsigned i;
1516    char *beg;
1517    sscanf(loc-3,"%x",&i);
1518    while(xisspace(*loc)&&loc<limit) loc++;
1519    beg=loc;
1520    while(loc<limit&&(xisalpha(*loc)||xisdigit(*loc)||*loc=='_')) loc++;
1521    if (loc-beg>=translit_length)
1522      err_print("! Replacement string in @@l too long");
1523@.Replacement string in @@l...@>
1524    else{
1525      strncpy(translit[i-0200],beg,loc-beg);
1526      translit[i-0200][loc-beg]='\0';
1527    }
1528  }
1529
1530@ Because on some systems the difference between two pointers is a |long|
1531but not an |int|, we use \.{\%ld} to print these quantities.
1532
1533@c
1534void
1535print_stats() {
1536  printf("\nMemory usage statistics:\n");
1537  printf("%ld names (out of %ld)\n",
1538          (long)(name_ptr-name_dir),(long)max_names);
1539  printf("%ld replacement texts (out of %ld)\n",
1540          (long)(text_ptr-text_info),(long)max_texts);
1541  printf("%ld bytes (out of %ld)\n",
1542          (long)(byte_ptr-byte_mem),(long)max_bytes);
1543  printf("%ld tokens (out of %ld)\n",
1544          (long)(tok_ptr-tok_mem),(long)max_toks);
1545}
1546
1547@** Index.
1548Here is a cross-reference table for \.{CTANGLE}.
1549All sections in which an identifier is
1550used are listed with that identifier, except that reserved words are
1551indexed only when they appear in format definitions, and the appearances
1552of identifiers in section names are not indexed. Underlined entries
1553correspond to where the identifier was declared. Error messages and
1554a few other things like ``ASCII code dependencies'' are indexed here too.
1555