1% otangle.ch: web2c changes to file tangle.ch
2%
3% This file is part of the Omega project, which
4% is based in the web2c distribution of TeX.
5%
6% Copyright (c) 1994--1998 John Plaice and Yannis Haralambous
7% applies only to the changes to the original tangle.ch.
8%
9% otangle.ch for C compilation with web2c.
10%
11%  10/9/82 (HT) Original version
12%  11/29   (HT) New version, with conversion to lowercase handled properly
13%               Also, new control sequence:
14%                       @=...text...@>   Put ...text... verbatim on a line
15%                                        by itself in the Pascal output.
16%                                        (argument must fit on one line)
17%               This control sequence facilitates putting #include "gcons.h"
18%               (for example) in files meant for the pc compiler.
19%               Also, changed command line usage, so that the absence of a
20%               change file implies no change file, rather than one with the
21%               same name as the web file, with .ch at the end.
22%  1/15/83 (HT) Changed to work with version 1.2, which incorporates the
23%               above change (though unbundling the output line breaking),
24%               so mainly had to remove stuff.
25%  2/17    (HT) Fixed bug that caused 0-9 in identifiers to be converted to
26%               Q-Y on output.
27%  3/18    (HT) Brought up to work with Version 1.5.  Added -r command line
28%               flag to cause a .rpl file to be written with all the lines
29%               of the .web file that were replaced because of the .ch file
30%               (useful for comparing with previous .rpl files, to see if a
31%               change file will still work with a new version of a .web file)
32%               Also, made it write a newline just before exit.
33%  4/12    (PC) Merged with Pavel's version, including adding a call to exit()
34%               at the end depending upon the value of history.
35%  4/16    (PC) Brought up to date with version 1.5 released April, 1983.
36%  6/28   (HWT) Brought up to date with version 1.7 released June, 1983.
37%               With new change file format, the -r option is now unnecessary.
38%  7/17   (HWT) Brought up to date with version 2.0 released July, 1983.
39% 12/18/83 (ETM) Brought up to date with version 2.5 released November, 1983.
40% 11/07/84 (ETM) Brought up to date with version 2.6.
41% 12/15/85 (ETM) Brought up to date with version 2.8.
42% 03/07/88 (ETM) Converted for use with WEB2C
43% 01/02/89 (PAM) Cosmetic upgrade to version 2.9
44% 11/30/89 (KB)  Version 4.
45% (more recent changes in the ChangeLog)
46
47@x [0] Print only changes.
48\pageno=\contentspagenumber \advance\pageno by 1
49@y
50\pageno=\contentspagenumber \advance\pageno by 1
51\let\maybe=\iffalse
52\def\title{TANGLE changes for C}
53@z
54
55@x [1] Define my_name
56@d banner=='This is OTANGLE, Version 4.4'
57@y
58@d my_name=='otangle'
59@d banner=='This is OTANGLE, Version 4.4'
60@z
61
62@x [2] Eliminate the |end_of_TANGLE| label.
63@d end_of_TANGLE = 9999 {go here to wrap it up}
64
65@y
66@z
67@x
68label end_of_TANGLE; {go here to finish}
69@y
70@z
71
72@x [?] Define and call parse_arguments.
73procedure initialize;
74  var @<Local variables for initialization@>@/
75  begin @<Set initial values@>@/
76@y
77@<Define |parse_arguments|@>
78procedure initialize;
79  var @<Local variables for initialization@>@/
80  begin
81    kpse_set_program_name (argv[0], my_name);
82    parse_arguments;
83    @<Set initial values@>@/
84@z
85
86@x [8] Constants: increase id lengths, for TeX--XeT and tex2pdf.
87@!buf_size=100; {maximum length of input line}
88@y
89@!buf_size=3000; {maximum length of input line}
90@z
91@x
92@!max_names=10239; {number of identifiers, strings, module names;
93  must be less than 10240}
94@!max_texts=10239; {number of replacement texts, must be less than 10240}
95@y
96@!max_names=10239; {number of identifiers, strings, module names;
97  must be less than 10240}
98@!max_texts=10239; {number of replacement texts, must be less than 10240}
99@z
100
101@x
102@!stack_size=50; {number of simultaneous levels of macro expansion}
103@!max_id_length=12; {long identifiers are chopped to this length, which must
104  not exceed |line_length|}
105@!unambig_length=7; {identifiers must be unique if chopped to this length}
106  {note that 7 is more strict than \PASCAL's 8, but this can be varied}
107@y
108@!stack_size=100; {number of simultaneous levels of macro expansion}
109@!max_id_length=50; {long identifiers are chopped to this length, which must
110  not exceed |line_length|}
111@!unambig_length=25; {identifiers must be unique if chopped to this length}
112@z
113
114% [??] The text_char type is used as an array index into xord.  The
115% default type `char' produces signed integers, which are bad array
116% indices in C.
117%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
118@x
119@d text_char == char {the data type of characters in text files}
120@y
121@d text_char == ASCII_code {the data type of characters in text files}
122@z
123
124@x [17] enable maximum character set
125for i:=1 to @'37 do xchr[i]:=' ';
126for i:=@'200 to @'377 do xchr[i]:=' ';
127@y
128for i:=1 to @'37 do xchr[i]:=chr(i);
129for i:=@'200 to @'377 do xchr[i]:=chr(i);
130@z
131
132@x [20] terminal output: use standard i/o
133@d print(#)==write(term_out,#) {`|print|' means write on the terminal}
134@y
135@d term_out==stdout
136@d print(#)==write(term_out,#) {`|print|' means write on the terminal}
137@z
138
139@x
140@<Globals...@>=
141@!term_out:text_file; {the terminal as an output file}
142@y
143@z
144
145@x [21] init terminal
146@ Different systems have different ways of specifying that the output on a
147certain file will appear on the user's terminal. Here is one way to do this
148on the \PASCAL\ system that was used in \.{TANGLE}'s initial development:
149@^system dependencies@>
150
151@<Set init...@>=
152rewrite(term_out,'TTY:'); {send |term_out| output to the terminal}
153@y
154@ Different systems have different ways of specifying that the output on a
155certain file will appear on the user's terminal.
156@^system dependencies@>
157
158@<Set init...@>=
159 {Nothing need be done for C.}
160@z
161
162@x [22] flush terminal buffer
163@d update_terminal == break(term_out) {empty the terminal output buffer}
164@y
165@d update_terminal == fflush(term_out) {empty the terminal output buffer}
166@z
167
168@x [24] open input files
169begin reset(web_file); reset(change_file);
170@y
171begin web_file := kpse_open_file(web_name, kpse_web_format);
172if chg_name then change_file := kpse_open_file(chg_name, kpse_web_format);
173@z
174
175@x [26] Open output files (except for the pool file).
176rewrite(Pascal_file); rewrite(pool);
177@y
178rewrite (Pascal_file, pascal_name);
179@z
180
181@x [28] Fix f^.
182    begin buffer[limit]:=xord[f^]; get(f);
183    incr(limit);
184    if buffer[limit-1]<>" " then final_limit:=limit;
185    if limit=buf_size then
186      begin while not eoln(f) do get(f);
187@y
188    begin buffer[limit]:=xord[getc(f)];
189    incr(limit);
190    if buffer[limit-1]<>" " then final_limit:=limit;
191    if limit=buf_size then
192      begin while not eoln(f) do vgetc(f);
193@z
194
195@x [??] Fix `jump_out'.
196@d fatal_error(#)==begin new_line; print(#); error; mark_fatal; jump_out;
197  end
198
199@<Error handling...@>=
200procedure jump_out;
201begin goto end_of_TANGLE;
202end;
203@y
204@d jump_out==uexit(1)
205@d fatal_error(#)==begin new_line; write(stderr, #);
206     error; mark_fatal; uexit(1);
207  end
208@z
209
210@x [38] Provide for a larger `byte_mem' and `tok_mem'. Extra capacity:
211@d ww=2 {we multiply the byte capacity by approximately this amount}
212@d zz=3 {we multiply the token capacity by approximately this amount}
213@y
214@d ww=3 {we multiply the byte capacity by approximately this amount}
215@d zz=4 {we multiply the token capacity by approximately this amount}
216@z
217
218@x [58] Remove conversion to uppercase
219    begin if buffer[i]>="a" then chopped_id[s]:=buffer[i]-@'40
220    else chopped_id[s]:=buffer[i];
221@y
222    begin chopped_id[s]:=buffer[i];
223@z
224
225@x [63] Remove conversion to uppercase
226    begin if c>="a" then c:=c-@'40; {merge lowercase with uppercase}
227@y
228    begin
229@z
230
231@x [64] Delayed pool file opening.
232@<Define and output a new string...@>=
233begin ilk[p]:=numeric; {strings are like numeric macros}
234if l-double_chars=2 then {this string is for a single character}
235  equiv[p]:=buffer[id_first+1]+1073741824
236else  begin equiv[p]:=string_ptr+1073741824;
237  l:=l-double_chars-1;
238@y
239@<Define and output a new string...@>=
240begin ilk[p]:=numeric; {strings are like numeric macros}
241if l-double_chars=2 then {this string is for a single character}
242  equiv[p]:=buffer[id_first+1]+1073741824
243else  begin
244  {Avoid creating empty pool files.}
245  if string_ptr = 65536 then begin
246    {Change |".web"| to |".pool"| and use the current directory.}
247    pool_name := basename_change_suffix (web_name, '.web', '.pool');
248    rewrite (pool, pool_name);
249  end;
250  equiv[p]:=string_ptr+1073741824;
251  l:=l-double_chars-1;
252@z
253
254@x [105] Accept DIV, div, MOD, and mod
255 (((out_contrib[1]="D")and(out_contrib[2]="I")and(out_contrib[3]="V")) or@|
256 ((out_contrib[1]="M")and(out_contrib[2]="O")and(out_contrib[3]="D")) ))or@|
257@^uppercase@>
258@y
259  (((out_contrib[1]="D")and(out_contrib[2]="I")and(out_contrib[3]="V")) or@|
260  ((out_contrib[1]="d")and(out_contrib[2]="i")and(out_contrib[3]="v")) or@|
261  ((out_contrib[1]="M")and(out_contrib[2]="O")and(out_contrib[3]="D")) or@|
262  ((out_contrib[1]="m")and(out_contrib[2]="o")and(out_contrib[3]="d")) ))or@|
263@z
264
265@x [110] lowercase ids
266@^uppercase@>
267  if ((out_buf[out_ptr-3]="D")and(out_buf[out_ptr-2]="I")and
268    (out_buf[out_ptr-1]="V"))or @/
269     ((out_buf[out_ptr-3]="M")and(out_buf[out_ptr-2]="O")and
270    (out_buf[out_ptr-1]="D")) then@/ goto bad_case
271@y
272  if ((out_buf[out_ptr-3]="D")and(out_buf[out_ptr-2]="I")and
273    (out_buf[out_ptr-1]="V"))or @/
274     ((out_buf[out_ptr-3]="d")and(out_buf[out_ptr-2]="i")and
275    (out_buf[out_ptr-1]="v"))or @/
276     ((out_buf[out_ptr-3]="M")and(out_buf[out_ptr-2]="O")and
277    (out_buf[out_ptr-1]="D"))or @/
278     ((out_buf[out_ptr-3]="m")and(out_buf[out_ptr-2]="o")and
279    (out_buf[out_ptr-1]="d")) then@/ goto bad_case
280@z
281
282@x [114] lowercase operators (`and', `or', etc.)
283and_sign: begin out_contrib[1]:="A"; out_contrib[2]:="N"; out_contrib[3]:="D";
284@^uppercase@>
285  send_out(ident,3);
286  end;
287not_sign: begin out_contrib[1]:="N"; out_contrib[2]:="O"; out_contrib[3]:="T";
288  send_out(ident,3);
289  end;
290set_element_sign: begin out_contrib[1]:="I"; out_contrib[2]:="N";
291  send_out(ident,2);
292  end;
293or_sign: begin out_contrib[1]:="O"; out_contrib[2]:="R"; send_out(ident,2);
294@y
295and_sign: begin out_contrib[1]:="a"; out_contrib[2]:="n"; out_contrib[3]:="d";
296  send_out(ident,3);
297  end;
298not_sign: begin out_contrib[1]:="n"; out_contrib[2]:="o"; out_contrib[3]:="t";
299  send_out(ident,3);
300  end;
301set_element_sign: begin out_contrib[1]:="i"; out_contrib[2]:="n";
302  send_out(ident,2);
303  end;
304or_sign: begin out_contrib[1]:="o"; out_contrib[2]:="r"; send_out(ident,2);
305@z
306
307@x [116] Remove conversion to uppercase
308@ Single-character identifiers represent themselves, while longer ones
309appear in |byte_mem|. All must be converted to uppercase,
310with underlines removed. Extremely long identifiers must be chopped.
311
312(Some \PASCAL\ compilers work with lowercase letters instead of
313uppercase. If this module of \.{TANGLE} is changed, it's also necessary
314to change from uppercase to lowercase in the modules that are
315listed in the index under ``uppercase''.)
316@^system dependencies@>
317@^uppercase@>
318
319@d up_to(#)==#-24,#-23,#-22,#-21,#-20,#-19,#-18,#-17,#-16,#-15,#-14,
320  #-13,#-12,#-11,#-10,#-9,#-8,#-7,#-6,#-5,#-4,#-3,#-2,#-1,#
321
322@<Cases related to identifiers@>=
323"A",up_to("Z"): begin out_contrib[1]:=cur_char; send_out(ident,1);
324  end;
325"a",up_to("z"): begin out_contrib[1]:=cur_char-@'40; send_out(ident,1);
326  end;
327identifier: begin k:=0; j:=byte_start[cur_val]; w:=cur_val mod ww;
328  while (k<max_id_length)and(j<byte_start[cur_val+ww]) do
329    begin incr(k); out_contrib[k]:=byte_mem[w,j]; incr(j);
330    if out_contrib[k]>="a" then out_contrib[k]:=out_contrib[k]-@'40
331    else if out_contrib[k]="_" then decr(k);
332    end;
333  send_out(ident,k);
334  end;
335@y
336@ Single-character identifiers represent themselves, while longer ones
337appear in |byte_mem|. All must be converted to lowercase,
338with underlines removed. Extremely long identifiers must be chopped.
339@^system dependencies@>
340
341@d up_to(#)==#-24,#-23,#-22,#-21,#-20,#-19,#-18,#-17,#-16,#-15,#-14,
342  #-13,#-12,#-11,#-10,#-9,#-8,#-7,#-6,#-5,#-4,#-3,#-2,#-1,#
343
344@<Cases related to identifiers@>=
345"A",up_to("Z"),
346"a",up_to("z"): begin out_contrib[1]:=cur_char; send_out(ident,1);
347  end;
348identifier: begin k:=0; j:=byte_start[cur_val]; w:=cur_val mod ww;
349  while (k<max_id_length)and(j<byte_start[cur_val+ww]) do
350    begin incr(k); out_contrib[k]:=byte_mem[w,j]; incr(j);
351    if out_contrib[k]="_" then decr(k);
352    end;
353  send_out(ident,k);
354  end;
355@z
356
357@x [179] make term_in = input
358any error stop will set |debug_cycle| to zero.
359@y
360any error stop will set |debug_cycle| to zero.
361
362@d term_in==stdin
363@z
364
365@x
366@!term_in:text_file; {the user's terminal as an input file}
367@y
368@z
369
370@x [180] remove term_in reset
371reset(term_in,'TTY:','/I'); {open |term_in| as the terminal, don't do a |get|}
372@y
373@z
374
375@x [182] write newline just before exit; use value of |history|
376print_ln(banner); {print a ``banner line''}
377@y
378print (banner); {print a ``banner line''}
379print_ln (version_string);
380@z
381
382@x Eliminate the |end_of_TANGLE| label.
383end_of_TANGLE:
384@y
385@z
386
387@x
388@<Print the job |history|@>;
389@y
390@<Print the job |history|@>;
391new_line;
392if (history <> spotless) and (history <> harmless_message)
393then uexit (1)
394else uexit (0);
395@z
396
397@x [188] System-dependent changes.
398This module should be replaced, if necessary, by changes to the program
399that are necessary to make \.{TANGLE} work at a particular installation.
400It is usually best to design your change file so that all changes to
401previous modules preserve the module numbering; then everybody's version
402will be consistent with the printed program. More extensive changes,
403which introduce new modules, can be inserted here; then only the index
404itself will get a new module number.
405@^system dependencies@>
406@y
407Parse a Unix-style command line.
408
409@d argument_is (#) == (strcmp (long_options[option_index].name, #) = 0)
410
411@<Define |parse_arguments|@> =
412procedure parse_arguments;
413const n_options = 3; {Pascal won't count array lengths for us.}
414var @!long_options: array[0..n_options] of getopt_struct;
415    @!getopt_return_val: integer;
416    @!option_index: c_int_type;
417    @!current_option: 0..n_options;
418begin
419  @<Define the option table@>;
420  repeat
421    getopt_return_val := getopt_long_only (argc, argv, '', long_options,
422                                           address_of (option_index));
423    if getopt_return_val = -1 then begin
424      {End of arguments; we exit the loop below.} ;
425
426    end else if getopt_return_val = "?" then begin
427      usage (my_name);
428
429    end else if argument_is ('help') then begin
430      usage_help (OTANGLE_HELP, nil);
431
432    end else if argument_is ('version') then begin
433      print_version_and_exit
434        (banner, nil, 'J. Plaice, Y. Haralambous, D.E. Knuth', nil);
435
436    end; {Else it was a flag; |getopt| has already done the assignment.}
437  until getopt_return_val = -1;
438
439  {Now |optind| is the index of first non-option on the command line.}
440  if (optind + 1 <> argc) and (optind + 2 <> argc) then begin
441    write_ln (stderr, my_name, ': Need one or two file arguments.');
442    usage (my_name);
443  end;
444
445  {Supply |".web"| and |".ch"| extensions if necessary.}
446  web_name := extend_filename (cmdline (optind), 'web');
447  if optind + 2 = argc then begin
448    chg_name := extend_filename (cmdline (optind + 1), 'ch');
449  end;
450
451  {Change |".web"| to |".p"| and use the current directory.}
452  pascal_name := basename_change_suffix (web_name, '.web', '.p');
453end;
454
455@ Here are the options we allow.  The first is one of the standard GNU options.
456@.-help@>
457
458@<Define the option...@> =
459current_option := 0;
460long_options[current_option].name := 'help';
461long_options[current_option].has_arg := 0;
462long_options[current_option].flag := 0;
463long_options[current_option].val := 0;
464incr (current_option);
465
466@ Another of the standard options.
467@.-version@>
468
469@<Define the option...@> =
470long_options[current_option].name := 'version';
471long_options[current_option].has_arg := 0;
472long_options[current_option].flag := 0;
473long_options[current_option].val := 0;
474incr (current_option);
475
476@ An element with all zeros always ends the list.
477
478@<Define the option...@> =
479long_options[current_option].name := 0;
480long_options[current_option].has_arg := 0;
481long_options[current_option].flag := 0;
482long_options[current_option].val := 0;
483
484@ Global filenames.
485
486@<Globals...@>=
487@!web_name,@!chg_name,@!pascal_name,@!pool_name:const_c_string;
488@z
489