1% PKtoGF.web
2%
3%  PKtoGF creates a generic font file from a packed pixel file.
4%
5% Preliminary 0.0 version:  January, 1988
6% Fixed bug to include specials in character (1.0): January 1988
7% Cleaned up description (bitweight errors) no version change:  July 1990
8% Fixed bug with empty character setting min_n to 1 (1.1): 19 October 1990
9\def\versiondate{19 October 1990}
10%
11\font\ninerm=cmr9
12\let\mc=\ninerm % medium caps for names like PASCAL
13\font\logo=logo10 % font used for the METAFONT logo
14\def\MF{{\logo META}\-{\logo FONT}}
15\def\PASCAL{{\mc Pascal}}
16\def\tamu{Texas A\char38 M}
17\def\(#1){} % this is used to make section names sort themselves better
18\def\9#1{} % this is used for sort keys in the index
19\def\title{PKtoGF}
20\def\contentspagenumber{0}
21\def\topofcontents{\null
22  \def\titlepage{F} % include headline on the contents page
23  \def\rheader{\mainfont\hfil \contentspagenumber}
24  \vfill
25  \centerline{\titlefont The {\ttitlefont PKtoGF} processor}
26  \vskip 15pt
27  \centerline{(Version 1.1, \versiondate)}
28  \vfill}
29\def\botofcontents{\vfill
30  \centerline{\hsize 5in\baselineskip9pt
31    \vbox{\ninerm\noindent
32    The preparation of this report
33    was supported in part by the National Science
34    Foundation under grants IST-8201926 and MCS-8300984,
35    and by the System Development Foundation. `\TeX' is a
36    trademark of the American Mathematical Society.}}}
37\pageno=\contentspagenumber \advance\pageno by 1
38
39@* Introduction.
40This program takes a packed, or \.{PK} file, and converts it into the
41standard \.{GF} format.  The resulting \.{GF} file is standard in
42every way, and is essentially identical to the \.{GF} file from which
43the \.{PK} file was produced in the first place.  Note that, however,
44\.{GF} to \.{PK} to \.{GF} is not an exact identity transformation, as
45the new \.{GF} file will have a different preamble string and the actual
46minimum bounding box will be used, instead of a possibly larger bounding
47box in the original \.{GF} file.
48
49@ The |banner| string defined here should be changed whenever \.{PKtoGF}
50gets modified.  You should update the preamble comment as well.
51
52@d banner=='This is PKtoGF, Version 1.1'
53         {printed when the program starts}
54@d preamble_comment=='PKtoGF 1.1 output'
55@d comm_length==17
56
57@ This program is written in standard \PASCAL, except where it is necessary
58to use extensions; for example, \.{PKtoGF} must read files whose names
59are dynamically specified, and that would be impossible in pure \PASCAL.
60
61@d othercases == others: {default for cases not listed explicitly}
62@d endcases == @+end {follows the default case in an extended |case| statement}
63@f othercases == else
64@f endcases == end
65
66@ Both the input and output come from binary files.  On line interaction
67is handled through \PASCAL's standard |input| and |output| files.
68
69@d print_ln(#)==write_ln(output,#)
70@d print(#)==write(output,#)
71
72@p program PKtoGF(input, output);
73label @<Labels in the outer block@>@/
74const @<Constants in the outer block@>@/
75type @<Types in the outer block@>@/
76var @<Globals in the outer block@>@/
77procedure initialize; {this procedure gets things started properly}
78  var i:integer; {loop index for initializations}
79  begin print_ln(banner);@/
80  @<Set initial values@>@/
81  end;
82
83@ If the program has to stop prematurely, it goes to the
84`|final_end|'.
85
86@d final_end=9999 {label for the end of it all}
87
88@<Labels...@>=final_end;
89
90@ These constants determine the maximum length of a file name and the length
91of the terminal line, as well as the maximum number of run counts allowed
92per line of the \.{GF} file.  (We need this to implement repeat counts.)
93@^system dependancies@>
94
95@<Constants...@>=
96@!name_length=80; {maximum length of a file name}
97@!terminal_line_length=132; {maximum length of an input line}
98@!max_counts=400; {maximum number of run counts in a raster line}
99
100@ Here are some macros for common programming idioms.
101
102@d incr(#) == #:=#+1 {increase a variable by unity}
103@d decr(#) == #:=#-1 {decrease a variable by unity}
104@d do_nothing == {empty statement}
105
106@ It is possible that a malformed packed file (heaven forbid!) or some other
107error might be detected by this program.  Such errors might occur in a deeply
108nested procedure, so the procedure called |jump_out| has been added to transfer
109to the very end of the program with an error message.
110
111@d abort(#)==begin print_ln(' ',#); jump_out; end
112
113@p procedure jump_out;
114begin goto final_end;
115end;
116
117@* The character set.
118Like all programs written with the  \.{WEB} system, \.{PKtoGF} can be
119used with any character set. But it uses ASCII code internally, because
120the programming for portable input-output is easier when a fixed internal
121code is used.
122
123The next few sections of \.{PKtoGF} have therefore been copied from the
124analogous ones in the \.{WEB} system routines. They have been considerably
125simplified, since \.{PKtoGF} need not deal with the controversial
126ASCII codes less than @'40.
127
128@<Types...@>=
129@!ASCII_code=" ".."~"; {a subrange of the integers}
130
131@ The original \PASCAL\ compiler was designed in the late 60s, when six-bit
132character sets were common, so it did not make provision for lower case
133letters. Nowadays, of course, we need to deal with both upper and lower case
134alphabets in a convenient way, especially in a program like \.{GFtoPK}.
135So we shall assume that the \PASCAL\ system being used for \.{GFtoPK}
136has a character set containing at least the standard visible characters
137of ASCII code (|"!"| through |"~"|).
138
139Some \PASCAL\ compilers use the original name |char| for the data type
140associated with the characters in text files, while other \PASCAL s
141consider |char| to be a 64-element subrange of a larger data type that has
142some other name.  In order to accommodate this difference, we shall use
143the name |text_char| to stand for the data type of the characters in the
144output file.  We shall also assume that |text_char| consists of
145the elements |chr(first_text_char)| through |chr(last_text_char)|,
146inclusive. The following definitions should be adjusted if necessary.
147@^system dependencies@>
148
149@d text_char == char {the data type of characters in text files}
150@d first_text_char=0 {ordinal number of the smallest element of |text_char|}
151@d last_text_char=127 {ordinal number of the largest element of |text_char|}
152
153@<Types...@>=
154@!text_file=packed file of text_char;
155
156@ The \.{GFtoPK} processor converts between ASCII code and
157the user's external character set by means of arrays |xord| and |xchr|
158that are analogous to \PASCAL's |ord| and |chr| functions.
159
160@<Globals...@>=
161@!xord: array [text_char] of ASCII_code;
162  {specifies conversion of input characters}
163@!xchr: array [0..255] of text_char;
164  {specifies conversion of output characters}
165
166@ Under our assumption that the visible characters of standard ASCII are
167all present, the following assignment statements initialize the
168|xchr| array properly, without needing any system-dependent changes.
169
170@<Set init...@>=
171for i:=0 to @'37 do xchr[i]:='?';
172xchr[@'40]:=' ';
173xchr[@'41]:='!';
174xchr[@'42]:='"';
175xchr[@'43]:='#';
176xchr[@'44]:='$';
177xchr[@'45]:='%';
178xchr[@'46]:='&';
179xchr[@'47]:='''';@/
180xchr[@'50]:='(';
181xchr[@'51]:=')';
182xchr[@'52]:='*';
183xchr[@'53]:='+';
184xchr[@'54]:=',';
185xchr[@'55]:='-';
186xchr[@'56]:='.';
187xchr[@'57]:='/';@/
188xchr[@'60]:='0';
189xchr[@'61]:='1';
190xchr[@'62]:='2';
191xchr[@'63]:='3';
192xchr[@'64]:='4';
193xchr[@'65]:='5';
194xchr[@'66]:='6';
195xchr[@'67]:='7';@/
196xchr[@'70]:='8';
197xchr[@'71]:='9';
198xchr[@'72]:=':';
199xchr[@'73]:=';';
200xchr[@'74]:='<';
201xchr[@'75]:='=';
202xchr[@'76]:='>';
203xchr[@'77]:='?';@/
204xchr[@'100]:='@@';
205xchr[@'101]:='A';
206xchr[@'102]:='B';
207xchr[@'103]:='C';
208xchr[@'104]:='D';
209xchr[@'105]:='E';
210xchr[@'106]:='F';
211xchr[@'107]:='G';@/
212xchr[@'110]:='H';
213xchr[@'111]:='I';
214xchr[@'112]:='J';
215xchr[@'113]:='K';
216xchr[@'114]:='L';
217xchr[@'115]:='M';
218xchr[@'116]:='N';
219xchr[@'117]:='O';@/
220xchr[@'120]:='P';
221xchr[@'121]:='Q';
222xchr[@'122]:='R';
223xchr[@'123]:='S';
224xchr[@'124]:='T';
225xchr[@'125]:='U';
226xchr[@'126]:='V';
227xchr[@'127]:='W';@/
228xchr[@'130]:='X';
229xchr[@'131]:='Y';
230xchr[@'132]:='Z';
231xchr[@'133]:='[';
232xchr[@'134]:='\';
233xchr[@'135]:=']';
234xchr[@'136]:='^';
235xchr[@'137]:='_';@/
236xchr[@'140]:='`';
237xchr[@'141]:='a';
238xchr[@'142]:='b';
239xchr[@'143]:='c';
240xchr[@'144]:='d';
241xchr[@'145]:='e';
242xchr[@'146]:='f';
243xchr[@'147]:='g';@/
244xchr[@'150]:='h';
245xchr[@'151]:='i';
246xchr[@'152]:='j';
247xchr[@'153]:='k';
248xchr[@'154]:='l';
249xchr[@'155]:='m';
250xchr[@'156]:='n';
251xchr[@'157]:='o';@/
252xchr[@'160]:='p';
253xchr[@'161]:='q';
254xchr[@'162]:='r';
255xchr[@'163]:='s';
256xchr[@'164]:='t';
257xchr[@'165]:='u';
258xchr[@'166]:='v';
259xchr[@'167]:='w';@/
260xchr[@'170]:='x';
261xchr[@'171]:='y';
262xchr[@'172]:='z';
263xchr[@'173]:='{';
264xchr[@'174]:='|';
265xchr[@'175]:='}';
266xchr[@'176]:='~';
267for i:=@'177 to 255 do xchr[i]:='?';
268
269@ The following system-independent code makes the |xord| array contain a
270suitable inverse to the information in |xchr|.
271
272@<Set init...@>=
273for i:=first_text_char to last_text_char do xord[chr(i)]:=@'40;
274for i:=" " to "~" do xord[xchr[i]]:=i;
275
276@* Generic font file format.
277The most important output produced by a typical run of \MF\ is the
278``generic font'' (\.{GF}) file that specifies the bit patterns of the
279characters that have been drawn. The term {\sl generic\/} indicates that
280this file format doesn't match the conventions of any name-brand manufacturer;
281but it is easy to convert \.{GF} files to the special format required by
282almost all digital phototypesetting equipment. There's a strong analogy
283between the \.{DVI} files written by \TeX\ and the \.{GF} files written
284by \MF; and, in fact, the file formats have a lot in common.
285
286A \.{GF} file is a stream of 8-bit bytes that may be
287regarded as a series of commands in a machine-like language. The first
288byte of each command is the operation code, and this code is followed by
289zero or more bytes that provide parameters to the command. The parameters
290themselves may consist of several consecutive bytes; for example, the
291`|boc|' (beginning of character) command has six parameters, each of
292which is four bytes long. Parameters are usually regarded as nonnegative
293integers; but four-byte-long parameters can be either positive or
294negative, hence they range in value from $-2^{31}$ to $2^{31}-1$.
295As in \.{TFM} files, numbers that occupy
296more than one byte position appear in BigEndian order,
297and negative numbers appear in two's complement notation.
298
299A \.{GF} file consists of a ``preamble,'' followed by a sequence of one or
300more ``characters,'' followed by a ``postamble.'' The preamble is simply a
301|pre| command, with its parameters that introduce the file; this must come
302first.  Each ``character'' consists of a |boc| command, followed by any
303number of other commands that specify ``black'' pixels,
304followed by an |eoc| command. The characters appear in the order that \MF\
305generated them. If we ignore no-op commands (which are allowed between any
306two commands in the file), each |eoc| command is immediately followed by a
307|boc| command, or by a |post| command; in the latter case, there are no
308more characters in the file, and the remaining bytes form the postamble.
309Further details about the postamble will be explained later.
310
311Some parameters in \.{GF} commands are ``pointers.'' These are four-byte
312quantities that give the location number of some other byte in the file;
313the first file byte is number~0, then comes number~1, and so on.
314
315@ The \.{GF} format is intended to be both compact and easily interpreted
316by a machine. Compactness is achieved by making most of the information
317relative instead of absolute. When a \.{GF}-reading program reads the
318commands for a character, it keeps track of two quantities: (a)~the current
319column number,~|m|; and (b)~the current row number,~|n|.  These are 32-bit
320signed integers, although most actual font formats produced from \.{GF}
321files will need to curtail this vast range because of practical
322limitations. (\MF\ output will never allow $\vert m\vert$ or $\vert
323n\vert$ to get extremely large, but the \.{GF} format tries to be more
324general.)
325
326How do \.{GF}'s row and column numbers correspond to the conventions
327of \TeX\ and \MF? Well, the ``reference point'' of a character, in \TeX's
328view, is considered to be at the lower left corner of the pixel in row~0
329and column~0. This point is the intersection of the baseline with the left
330edge of the type; it corresponds to location $(0,0)$ in \MF\ programs.
331Thus the pixel in \.{GF} row~0 and column~0 is \MF's unit square, comprising
332the region of the plane whose coordinates both lie between 0 and~1. The
333pixel in \.{GF} row~|n| and column~|m| consists of the points whose \MF\
334coordinates |(x,y)| satisfy |m<=x<=m+1| and |n<=y<=n+1|.  Negative values of
335|m| and~|x| correspond to columns of pixels {\sl left\/} of the reference
336point; negative values of |n| and~|y| correspond to rows of pixels {\sl
337below\/} the baseline.
338
339Besides |m| and |n|, there's also a third aspect of the current
340state, namely the @!|paint_switch|, which is always either \\{black} or
341\\{white}. Each \\{paint} command advances |m| by a specified amount~|d|,
342and blackens the intervening pixels if |paint_switch=black|; then
343the |paint_switch| changes to the opposite state. \.{GF}'s commands are
344designed so that |m| will never decrease within a row, and |n| will never
345increase within a character; hence there is no way to whiten a pixel that
346has been blackened.
347
348@ Here is a list of all the commands that may appear in a \.{GF} file. Each
349command is specified by its symbolic name (e.g., |boc|), its opcode byte
350(e.g., 67), and its parameters (if any). The parameters are followed
351by a bracketed number telling how many bytes they occupy; for example,
352`|d[2]|' means that parameter |d| is two bytes long.
353
354\yskip\hang|paint_0| 0. This is a \\{paint} command with |d=0|; it does
355nothing but change the |paint_switch| from \\{black} to \\{white} or
356vice~versa.
357
358\yskip\hang\\{paint\_1} through \\{paint\_63} (opcodes 1 to 63).
359These are \\{paint} commands with |d=1| to~63, defined as follows: If
360|paint_switch=black|, blacken |d|~pixels of the current row~|n|,
361in columns |m| through |m+d-1| inclusive. Then, in any case,
362complement the |paint_switch| and advance |m| by~|d|.
363
364\yskip\hang|paint1| 64 |d[1]|. This is a \\{paint} command with a specified
365value of~|d|; \MF\ uses it to paint when |64<=d<256|.
366
367\yskip\hang|@!paint2| 65 |d[2]|. Same as |paint1|, but |d|~can be as high
368as~65535.
369
370\yskip\hang|@!paint3| 66 |d[3]|. Same as |paint1|, but |d|~can be as high
371as $2^{24}-1$. \MF\ never needs this command, and it is hard to imagine
372anybody making practical use of it; surely a more compact encoding will be
373desirable when characters can be this large. But the command is there,
374anyway, just in case.
375
376\yskip\hang|boc| 67 |c[4]| |p[4]| |min_m[4]| |max_m[4]| |min_n[4]|
377|max_n[4]|. Beginning of a character:  Here |c| is the character code, and
378|p| points to the previous character beginning (if any) for characters having
379this code number modulo 256.  (The pointer |p| is |-1| if there was no
380prior character with an equivalent code.) The values of registers |m| and |n|
381defined by the instructions that follow for this character must
382satisfy |min_m<=m<=max_m| and |min_n<=n<=max_n|.  (The values of |max_m| and
383|min_n| need not be the tightest bounds possible.)  When a \.{GF}-reading
384program sees a |boc|, it can use |min_m|, |max_m|, |min_n|, and |max_n| to
385initialize the bounds of an array. Then it sets |m:=min_m|, |n:=max_n|, and
386|paint_switch:=white|.
387
388\yskip\hang|boc1| 68 |c[1]| |@!del_m[1]| |max_m[1]| |@!del_n[1]| |max_n[1]|.
389Same as |boc|, but |p| is assumed to be~$-1$; also |del_m=max_m-min_m|
390and |del_n=max_n-min_n| are given instead of |min_m| and |min_n|.
391The one-byte parameters must be between 0 and 255, inclusive.
392\ (This abbreviated |boc| saves 19~bytes per character, in common cases.)
393
394\yskip\hang|eoc| 69. End of character: All pixels blackened so far
395constitute the pattern for this character. In particular, a completely
396blank character might have |eoc| immediately following |boc|.
397
398\yskip\hang|skip0| 70. Decrease |n| by 1 and set |m:=min_m|,
399|paint_switch:=white|. \ (This finishes one row and begins another,
400ready to whiten the leftmost pixel in the new row.)
401
402\yskip\hang|skip1| 71 |d[1]|. Decrease |n| by |d+1|, set |m:=min_m|, and set
403|paint_switch:=white|. This is a way to produce |d| all-white rows.
404
405\yskip\hang|@!skip2| 72 |d[2]|. Same as |skip1|, but |d| can be as large
406as 65535.
407
408\yskip\hang|@!skip3| 73 |d[3]|. Same as |skip1|, but |d| can be as large
409as $2^{24}-1$. \MF\ obviously never needs this command.
410
411\yskip\hang|new_row_0| 74. Decrease |n| by 1 and set |m:=min_m|,
412|paint_switch:=black|. \ (This finishes one row and begins another,
413ready to {\sl blacken\/} the leftmost pixel in the new row.)
414
415\yskip\hang|@!new_row_1| through |@!new_row_164| (opcodes 75 to 238). Same as
416|new_row_0|, but with |m:=min_m+1| through |min_m+164|, respectively.
417
418\yskip\hang|xxx1| 239 |k[1]| |x[k]|. This command is undefined in
419general; it functions as a $(k+2)$-byte |no_op| unless special \.{GF}-reading
420programs are being used. \MF\ generates \\{xxx} commands when encountering
421a \&{special} string; this occurs in the \.{GF} file only between
422characters, after the preamble, and before the postamble. However,
423\\{xxx} commands might appear anywhere in \.{GF} files generated by other
424processors. It is recommended that |x| be a string having the form of a
425keyword followed by possible parameters relevant to that keyword.
426
427\yskip\hang|@!xxx2| 240 |k[2]| |x[k]|. Like |xxx1|, but |0<=k<65536|.
428
429\yskip\hang|xxx3| 241 |k[3]| |x[k]|. Like |xxx1|, but |0<=k<@t$2^{24}$@>|.
430\MF\ uses this when sending a \&{special} string whose length exceeds~255.
431
432\yskip\hang|@!xxx4| 242 |k[4]| |x[k]|. Like |xxx1|, but |k| can be
433ridiculously large; |k| mustn't be negative.
434
435\yskip\hang|yyy| 243 |y[4]|. This command is undefined in general;
436it functions as a 5-byte |no_op| unless special \.{GF}-reading programs
437are being used. \MF\ puts |scaled| numbers into |yyy|'s, as a
438result of \&{numspecial} commands; the intent is to provide numeric
439parameters to \\{xxx} commands that immediately precede.
440
441\yskip\hang|no_op| 244. No operation, do nothing. Any number of |no_op|'s
442may occur between \.{GF} commands, but a |no_op| cannot be inserted between
443a command and its parameters or between two parameters.
444
445\yskip\hang|char_loc| 245 |c[1]| |dx[4]| |dy[4]| |w[4]| |p[4]|.
446This command will appear only in the postamble, which will be explained
447shortly.
448
449\yskip\hang|@!char_loc0| 246 |c[1]| |@!dm[1]| |w[4]| |p[4]|.
450Same as |char_loc|, except that |dy| is assumed to be zero, and the value
451of~|dx| is taken to be |65536*dm|, where |0<=dm<256|.
452
453\yskip\hang|pre| 247 |i[1]| |k[1]| |x[k]|.
454Beginning of the preamble; this must come at the very beginning of the
455file. Parameter |i| is an identifying number for \.{GF} format, currently
456131. The other information is merely commentary; it is not given
457special interpretation like \\{xxx} commands are. (Note that \\{xxx}
458commands may immediately follow the preamble, before the first |boc|.)
459
460\yskip\hang|post| 248. Beginning of the postamble, see below.
461
462\yskip\hang|post_post| 249. Ending of the postamble, see below.
463
464\yskip\noindent Commands 250--255 are undefined at the present time.
465
466@d gf_id_byte=131 {identifies the kind of \.{GF} files described here}
467
468@ Here are the opcodes that \.{GFtoPK} actually refers to.
469
470@d paint_0=0 {beginning of the \\{paint} commands}
471@d paint1=64 {move right a given number of columns, then
472  black${}\leftrightarrow{}$white}
473@d boc=67 {beginning of a character}
474@d boc1=68 {abbreviated |boc|}
475@d eoc=69 {end of a character}
476@d skip0=70 {skip no blank rows}
477@d skip1=71 {skip over blank rows}
478@d new_row_0=74 {move down one row and then right}
479@d max_new_row=238 {move down one row and then right}
480@d no_op=247 {noop}
481@d xxx1=239 {for \&{special} strings}
482@d yyy=243 {for \&{numspecial} numbers}
483@d nop=244 {no operation}
484@d char_loc=245 {character locators in the postamble}
485@d char_loc0=246 {character locators in the postamble}
486@d pre=247 {preamble}
487@d post=248 {postamble beginning}
488@d post_post=249 {postamble ending}
489@d undefined_commands==250,251,252,253,254,255
490
491@ The last character in a \.{GF} file is followed by `|post|'; this command
492introduces the postamble, which summarizes important facts that \MF\ has
493accumulated. The postamble has the form
494$$\vbox{\halign{\hbox{#\hfil}\cr
495  |post| |p[4]| |@!ds[4]| |@!cs[4]| |@!hppp[4]| |@!vppp[4]|
496   |@!min_m[4]| |@!max_m[4]| |@!min_n[4]| |@!max_n[4]|\cr
497  $\langle\,$character locators$\,\rangle$\cr
498  |post_post| |q[4]| |i[1]| 223's$[{\G}4]$\cr}}$$
499Here |p| is a pointer to the byte following the final |eoc| in the file
500(or to the byte following the preamble, if there are no characters);
501it can be used to locate the beginning of \\{xxx} commands
502that might have preceded the postamble. The |ds| and |cs| parameters
503@^design size@> @^check sum@>
504give the design size and check sum, respectively, which are exactly the
505values put into the header of any \.{TFM} file that shares information with
506this \.{GF} file. Parameters |hppp| and |vppp| are the ratios of
507pixels per point, horizontally and vertically, expressed as |scaled| integers
508(i.e., multiplied by $2^{16}$); they can be used to correlate the font
509with specific device resolutions, magnifications, and ``at sizes.''  Then
510come |min_m|, |max_m|, |min_n|, and |max_n|, which bound the values that
511registers |m| and~|n| assume in all characters in this \.{GF} file.
512(These bounds need not be the best possible; |max_m| and |min_n| may, on the
513other hand, be tighter than the similar bounds in |boc| commands. For
514example, some character may have |min_n=-100| in its |boc|, but it might
515turn out that |n| never gets lower than |-50| in any character; then
516|min_n| can have any value |<=-50|. If there are no characters in the file,
517it's possible to have |min_m>max_m| and/or |min_n>max_n|.)
518
519@ Character locators are introduced by |char_loc| commands,
520which specify a character residue~|c|, character escapements (|dx,dy|),
521a character width~|w|, and a pointer~|p|
522to the beginning of that character. (If two or more characters have the
523same code~|c| modulo 256, only the last will be indicated; the others can be
524located by following backpointers. Characters whose codes differ by a
525multiple of 256 are assumed to share the same font metric information,
526hence the \.{TFM} file contains only residues of character codes modulo~256.
527This convention is intended for oriental languages, when there are many
528character shapes but few distinct widths.)
529@^oriental characters@>@^Chinese characters@>@^Japanese characters@>
530
531The character escapements (|dx,dy|) are the values of \MF's \&{chardx}
532and \&{chardy} parameters; they are in units of |scaled| pixels;
533i.e., |dx| is in horizontal pixel units times $2^{16}$, and |dy| is in
534vertical pixel units times $2^{16}$.  This is the intended amount of
535displacement after typesetting the character; for \.{DVI} files, |dy|
536should be zero, but other document file formats allow nonzero vertical
537escapement.
538
539The character width~|w| duplicates the information in the \.{TFM} file; it
540is $2^{24}$ times the ratio of the true width to the font's design size.
541
542The backpointer |p| points to the character's |boc|, or to the first of
543a sequence of consecutive \\{xxx} or |yyy| or |no_op| commands that
544immediately precede the |boc|, if such commands exist; such ``special''
545commands essentially belong to the characters, while the special commands
546after the final character belong to the postamble (i.e., to the font
547as a whole). This convention about |p| applies also to the backpointers
548in |boc| commands, even though it wasn't explained in the description
549of~|boc|. @^backpointers@>
550
551Pointer |p| might be |-1| if the character exists in the \.{TFM} file
552but not in the \.{GF} file. This unusual situation can arise in \MF\ output
553if the user had |proofing<0| when the character was being shipped out,
554but then made |proofing>=0| in order to get a \.{GF} file.
555
556@ The last part of the postamble, following the |post_post| byte that
557signifies the end of the character locators, contains |q|, a pointer to the
558|post| command that started the postamble.  An identification byte, |i|,
559comes next; this currently equals~131, as in the preamble.
560
561The |i| byte is followed by four or more bytes that are all equal to
562the decimal number 223 (i.e., @'337 in octal). \MF\ puts out four to seven of
563these trailing bytes, until the total length of the file is a multiple of
564four bytes, since this works out best on machines that pack four bytes per
565word; but any number of 223's is allowed, as long as there are at least four
566of them. In effect, 223 is a sort of signature that is added at the very end.
567@^Fuchs, David Raymond@>
568
569This curious way to finish off a \.{GF} file makes it feasible for
570\.{GF}-reading programs to find the postamble first, on most computers,
571even though \MF\ wants to write the postamble last. Most operating
572systems permit random access to individual words or bytes of a file, so
573the \.{GF} reader can start at the end and skip backwards over the 223's
574until finding the identification byte. Then it can back up four bytes, read
575|q|, and move to byte |q| of the file. This byte should, of course,
576contain the value 248 (|post|); now the postamble can be read, so the
577\.{GF} reader can discover all the information needed for individual
578characters.
579
580Unfortunately, however, standard \PASCAL\ does not include the ability to
581@^system dependencies@>
582access a random position in a file, or even to determine the length of a file.
583Almost all systems nowadays provide the necessary capabilities, so \.{GF}
584format has been designed to work most efficiently with modern operating
585systems.  \.{GFtoPK} first reads the postamble, and then scans the file from
586front to back.
587
588@* Packed file format.
589The packed file format is a compact representation of the data contained in a
590\.{GF} file.  The information content is the same, but packed (\.{PK}) files
591are almost always less than half the size of their \.{GF} counterparts.  They
592are also easier to convert into a raster representation because they do not
593have a profusion of \\{paint}, \\{skip}, and \\{new\_row} commands to be
594separately interpreted.  In addition, the \.{PK} format expressedly forbids
595\&{special} commands within a character.  The minimum bounding box for each
596character is explicit in the format, and does not need to be scanned for as in
597the \.{GF} format.  Finally, the width and escapement values are combined with
598the raster information into character ``packets'', making it simpler in many
599cases to process a character.
600
601A \.{PK} file is organized as a stream of 8-bit bytes.  At times, these bytes
602might be split into 4-bit nybbles or single bits, or combined into multiple
603byte parameters.  When bytes are split into smaller pieces, the `first' piece
604is always the most significant of the byte.  For instance, the first bit of
605a byte is the bit with value 128; the first nybble can be found by dividing
606a byte by 16.  Similarly, when bytes are combined into multiple byte
607parameters, the first byte is the most significant of the parameter.  If the
608parameter is signed, it is represented by two's-complement notation.
609
610The set of possible eight-bit values are separated into two sets, those that
611introduce a character definition, and those that do not.  The values that
612introduce a character definition comprise the range from 0 to 239; byte values
613above 239 are interpreted commands.  Bytes which introduce character
614definitions are called flag bytes, and various fields within the byte indicate
615various things about how the character definition is encoded.  Command bytes
616have zero or more parameters, and can never appear within a character
617definition or between parameters of another command, where they would be
618interpeted as data.
619
620A \.{PK} file consists of a preamble, followed by a sequence of one or more
621character definitions, followed by a postamble.  The preamble command must
622be the first byte in the file, followed immediately by its parameters.
623Any number of character definitions may follow, and any command but the
624preamble command and the postamble command may occur between character
625definitions.  The very last command in the file must be the postamble.
626
627@ The packed file format is intended to be easy to read and interpret by
628device drivers.  The small size of the file reduces the input/output overhead
629each time a font is defined.  For those drivers that load and save each font
630file into memory, the small size also helps reduce the memory requirements.
631The length of each character packet is specified, allowing the character raster
632data to be loaded into memory by simply counting bytes, rather than
633interpreting each command; then, each character can be interpreted on a demand
634basis.  This also makes it possible for a driver to skip a particular
635character quickly if it knows that the character is unused.
636
637@ First, the command bytes shall be presented; then the format of the
638character definitions will be defined.  Eight of the possible sixteen
639commands (values 240 through 255) are currently defined; the others are
640reserved for future extensions.  The commands are listed below.  Each command
641is specified by its symbolic name (e.g., \\{pk\_no\_op}), its opcode byte,
642and any parameters.  The parameters are followed by a bracketed number
643telling how many bytes they occupy, with the number preceded by a plus sign if
644it is a signed quantity.  (Four byte quantities are always signed, however.)
645
646\yskip\hang|pk_xxx1| 240 |k[1]| |x[k]|.  This command is undefined in general;
647it functions as a $(k+2)$-byte \\{no\_op} unless special \.{PK}-reading
648programs are being used.  \MF\ generates \\{xxx} commands when encountering
649a \&{special} string.  It is recommended that |x| be a string having the form
650of a keyword followed by possible parameters relevant to that keyword.
651
652\yskip\hang\\{pk\_xxx2} 241 |k[2]| |x[k]|.  Like |pk_xxx1|, but |0<=k<65536|.
653
654\yskip\hang\\{pk\_xxx3} 242 |k[3]| |x[k]|.  Like |pk_xxx1|, but
655|0<=k<@t$2^{24}$@>|.  \MF\ uses this when sending a \&{special} string whose
656length exceeds~255.
657
658\yskip\hang\\{pk\_xxx4} 243 |k[4]| |x[k]|.  Like |pk_xxx1|, but |k| can be
659ridiculously large; |k| musn't be negative.
660
661\yskip\hang|pk_yyy| 244 |y[4]|.  This command is undefined in general; it
662functions as a five-byte \\{no\_op} unless special \.{PK} reading programs
663are being used.  \MF\ puts |scaled| numbers into |yyy|'s, as a result of
664\&{numspecial} commands; the intent is to provide numeric parameters to
665\\{xxx} commands that immediately precede.
666
667\yskip\hang|pk_post| 245.  Beginning of the postamble.  This command is
668followed by enough |pk_no_op| commands to make the file a multiple
669of four bytes long.  Zero through three bytes are usual, but any number
670is allowed.
671This should make the file easy to read on machines which pack four bytes to
672a word.
673
674\yskip\hang|pk_no_op| 246.  No operation, do nothing.  Any number of
675|pk_no_op|'s may appear between \.{PK} commands, but a |pk_no_op| cannot be
676inserted between a command and its parameters, between two parameters, or
677inside a character definition.
678
679\yskip\hang|pk_pre| 247 |i[1]| |k[1]| |x[k]| |ds[4]| |cs[4]| |hppp[4]|
680|vppp[4]|.  Preamble command.  Here, |i| is the identification byte of the
681file, currently equal to 89.  The string |x| is merely a comment, usually
682indicating the source of the \.{PK} file.  The parameters |ds| and |cs| are
683the design size of the file in $1/2^{20}$ points, and the checksum of the
684file, respectively.  The checksum should match the \.{TFM} file and the
685\.{GF} files for this font.  Parameters |hppp| and |vppp| are the ratios
686of pixels per point, horizontally and vertically, multiplied by $2^{16}$; they
687can be used to correlate the font with specific device resolutions,
688magnifications, and ``at sizes''.  Usually, the name of the \.{PK} file is
689formed by concatenating the font name (e.g., cmr10) with the resolution at
690which the font is prepared in pixels per inch multiplied by the magnification
691factor, and the letters \.{PK}.  For instance, cmr10 at 300 dots per inch
692should be named CMR10.300PK; at one thousand dots per inch and magstephalf,
693it should be named CMR10.1095PK.
694
695@ We put a few of the above opcodes into definitions for symbolic use by
696this program.
697
698@d pk_id = 89 {the version of \.{PK} file described}
699@d pk_xxx1 = 240 {\&{special} commands}
700@d pk_yyy = 244 {\&{numspecial} commands}
701@d pk_post = 245 {postamble}
702@d pk_no_op = 246 {no operation}
703@d pk_pre = 247 {preamble}
704
705@ The \.{PK} format has two conflicting goals; to pack character raster and
706size information as compactly as possible, while retaining ease of translation
707into raster and other forms.  A suitable compromise was found in the use of
708run-encoding of the raster information.  Instead of packing the individual
709bits of the character, we instead count the number of consecutive `black' or
710`white' pixels in a horizontal raster row, and then encode this number.  Run
711counts are found for each row, from the top of the character to the bottom.
712This is essentially the way the \.{GF} format works.
713Instead of presenting each row individually, however, let us concatenate all
714of the horizontal raster rows into one long string of pixels, and encode this
715row.  With knowledge of the width of the bit-map, the original character glyph
716can be easily reconstructed.  In addition, we do not need special commands to
717mark the end of one row and the beginning of the next.
718
719Next, let us put the burden of finding the minimum bounding box on the part
720of the font generator, since the characters will usually be used much more
721often than they are generated.  The minimum bounding box is the smallest
722rectangle which encloses all `black' pixels of a character.  Let us also
723eliminate the need for a special end of character marker, by supplying
724exactly as many bits as are required to fill the minimum bounding box, from
725which the end of the character is implicit.
726
727Let us next consider the distribution of the run counts.  Analysis of several
728dozen pixel files at 300 dots per inch yields a distribution peaking at four,
729falling off slowly until ten, then a bit more steeply until twenty, and then
730asymptotically approaching the horizontal.  Thus, the great majority of our
731run counts will fit in a four-bit nybble.  The eight-bit byte is attractive for
732our run-counts, as it is the standard on many systems; however, the wasted four
733bits in the majority of cases seems a high price to pay.  Another possibility
734is to use a Huffman-type encoding scheme with a variable number of bits for
735each run-count; this was rejected because of the overhead in fetching and
736examining individual bits in the file.  Thus, the character raster definitions
737in the \.{PK} file format are based on the four-bit nybble.
738
739@ The analysis of the pixel files yielded another interesting statistic: fully
74037\char`\%\
741of the raster rows were duplicates of the previous row.  Thus, the \.{PK}
742format allows the specification of repeat counts, which indicate how many times
743a horizontal raster row is to be repeated.  These repeated rows are taken out
744of the character glyph before individual rows are concatenated into the long
745string of pixels.
746
747For elegance, we disallow a run count of zero.  The case of a null raster
748description should be gleaned from the character width and height being equal
749to zero, and no raster data should be read.  No other zero counts are ever
750necessary.  Also, in the absence of repeat counts, the repeat value is set to
751be zero (only the original row is sent.)  If a repeat count is seen, it takes
752effect on the current row.  The current row is defined as the row on which the
753first pixel of the next run count will lie.  The repeat count is set back to
754zero when the last pixel in the current row is seen, and the row is sent out.
755
756This poses a problem for entirely black and entirely white rows, however.  Let
757us say that the current row ends with four white pixels, and then we have five
758entirely empty rows, followed by a black pixel at the beginning of the next
759row, and the character width is ten pixels.  We would like to use a repeat
760count, but there is no legal place to put it.  If we put it before the white
761run count, it will apply to the current row.  If we put it after, it applies
762to the row with the black pixel at the beginning.  Thus, entirely white or
763entirely black repeated rows are always packed as large run counts (in this
764case, a white run count of 54) rather than repeat counts.
765
766@ Now let us turn our attention to the actual packing of the run counts and
767repeat counts into nybbles.  There are only sixteen possible nybble values.
768We need to indicate run counts and repeat counts.  Since the run counts are
769much more common, we will devote the majority of the nybble values to them.
770We therefore indicate a repeat count by a nybble of 14 followed by a packed
771number, where a packed number will be explained later.  Since the repeat
772count value of one is so common, we indicate a repeat one command by a single
773nybble of 15.  A 14 followed by the packed number 1 is still legal for a
774repeat one count, however.  The run counts are coded directly as packed
775numbers.
776
777For packed numbers, therefore, we have the nybble values 0 through 13.  We
778need to represent the positive integers up to, say, $2^{31}-1$.  We would
779like the more common smaller numbers to take only one or two nybbles, and
780the infrequent large numbers to take three or more.  We could therefore
781allocate one nybble value to indicate a large run count taking three or more
782nybbles.  We do this with the value 0.
783
784@ We are left with the values 1 through 13.  We can allocate some of these, say
785|dyn_f|, to be one-nybble run counts.
786These will work for the run counts |1..dyn_f|.  For subsequent run
787counts, we will use a nybble greater than |dyn_f|, followed by a second nybble,
788whose value can run from 0 through 15.  Thus, the two-byte nybble values will
789run from |dyn_f+1..(13-dyn_f)*16+dyn_f|.  We have our definition of large run
790count values now, being all counts greater than |(13-dyn_f)*16+dyn_f|.
791
792We can analyze our several dozen pixel files and determine an optimal value of
793|dyn_f|, and use this value for all of the characters.  Unfortunately, values
794of |dyn_f| that pack small characters well tend to pack the large characters
795poorly, and values that pack large characters well are not efficient for the
796smaller characters.  Thus, we choose the optimal |dyn_f| on a character basis,
797picking the value which will pack each individual character in the smallest
798number of nybbles.  Legal values of |dyn_f| run from 0 (with no one-byte run
799counts) to 13 (with no two-byte run counts).
800
801@ Our only remaining task in the coding of packed numbers is the large run
802counts.  We use a scheme suggested by D.~E.~Knuth
803@^Knuth, D.~E.@>
804which will simply and elegantly represent arbitrarily large values.  The
805general scheme to represent an integer |i| is to write its hexadecimal
806representation, with leading zeros removed.  Then we count the number of
807digits, and prepend one less than that many zeros before the hexadecimal
808representation.  Thus, the values from one to fifteen occupy one nybble;
809the values sixteen through 255 occupy three, the values 256 through 4095
810require five, etc.
811
812For our purposes, however, we have already represented the numbers one
813through |(13-dyn_f)*16+dyn_f|.  In addition, the one-nybble values have
814already been taken by our other commands, which means that only the values
815from sixteen up are available to us for long run counts.  Thus, we simply
816normalize our long run counts, by subtracting |(13-dyn_f)*16+dyn_f+1| and
817adding 16, and then representing the result according to the scheme above.
818
819@ The final algorithm for decoding the run counts based on the above scheme
820might look like this, assuming a procedure called \\{pk\_nyb} is available
821to get the next nybble from the file, and assuming that the global
822|repeat_count| indicates whether a row needs to be repeated.  Note that this
823routine is recursive, but since a repeat count can never directly follow
824another repeat count, it can only be recursive to one level.
825
826@<Packed number procedure@>=
827function pk_packed_num : integer ;
828var i, j, k : integer ;
829begin
830   i := get_nyb ;
831   if i = 0 then begin
832      repeat j := get_nyb ; incr(i) ; until j <> 0 ;
833      while i > 0 do begin j := j * 16 + get_nyb ; decr(i) ; end ;
834      pk_packed_num := j - 15 + (13-dyn_f)*16 + dyn_f ;
835   end else if i <= dyn_f then
836      pk_packed_num := i
837   else if i < 14 then
838      pk_packed_num := (i-dyn_f-1)*16+get_nyb+dyn_f+1
839   else begin
840      if i = 14 then
841         repeat_count := pk_packed_num
842      else
843         repeat_count := 1 ;
844      pk_packed_num := pk_packed_num ;
845   end ;
846end ;
847
848@ For low resolution fonts, or characters with `gray' areas, run encoding can
849often make the character many times larger.  Therefore, for those characters
850that cannot be encoded efficiently with run counts, the \.{PK} format allows
851bit-mapping of the characters.  This is indicated by a |dyn_f| value of
85214.  The bits are packed tightly, by concatenating all of the horizontal raster
853rows into one long string, and then packing this string eight bits to a byte.
854The number of bytes required can be calculated by |(width*height+7) div 8|.
855This format should only be used when packing the character by run counts takes
856more bytes than this, although, of course, it is legal for any character.
857Any extra bits in the last byte should be set to zero.
858
859@ At this point, we are ready to introduce the format for a character
860descripter.  It consists of three parts: a flag byte, a character preamble,
861and the raster data.  The most significant four bits of the flag byte
862yield the |dyn_f| value for that character.  (Notice that only values of
8630 through 14 are legal for |dyn_f|, with 14 indicating a bit mapped character;
864thus, the flag bytes do not conflict with the command bytes, whose upper nybble
865is always 15.)  The next bit (with weight 8) indicates whether the first run
866count is a black count or a white count, with a one indicating a black count.
867For bit-mapped characters, this bit should be set to a zero.  The next bit
868(with weight 4) indicates whether certain later parameters (referred to as size
869parameters) are given in one-byte or two-byte quantities, with a one indicating
870that they are in two-byte quantities.  The last two bits are concatenated on to
871the beginning of the length parameter in the character preamble, which will be
872explained below.
873
874However, if the last three bits of the flag byte are all set (normally
875indicating that the size parameters are two-byte values and that a 3 should be
876prepended to the length parameter), then a long format of the character
877preamble should be used instead of one of the short forms.
878
879Therefore, there are three formats for the character preamble, and which one
880is used depends on the least significant three bits of the flag byte.  If the
881least significant three bits are in the range zero through three, the short
882format is used.  If they are in the range four through six, the extended short
883format is used.  Otherwise, if the least significant bits are all set, then
884the long form of the character preamble is used.  The preamble formats are
885explained below.
886
887\yskip\hang Short form: |flag[1]| |pl[1]| |cc[1]| |tfm[3]| |dm[1]| |w[1]|
888|h[1]| |hoff[+1]| |voff[+1]|.
889If this format of the character preamble is used, the above
890parameters must all fit in the indicated number of bytes, signed or unsigned
891as indicated.  Almost all of the standard \TeX\ font characters fit; the few
892exceptions are fonts such as \.{aminch}.
893
894\yskip\hang Extended short form: |flag[1]| |pl[2]| |cc[1]| |tfm[3]| |dm[2]|
895|w[2]| |h[2]| |hoff[+2]| |voff[+2]|.  Larger characters use this extended
896format.
897
898\yskip\hang Long form: |flag[1]| |pl[4]| |cc[4]| |tfm[4]| |dx[4]| |dy[4]|
899|w[4]| |h[4]| |hoff[4]| |voff[4]|.  This is the general format which
900allows all of the
901parameters of the \.{GF} file format, including vertical escapement.
902\vskip\baselineskip
903The |flag| parameter is the flag byte.  The parameter |pl| (packet length)
904contains the offset
905of the byte following this character descripter, with respect to the beginning
906of the |tfm| width parameter.  This is given so a \.{PK} reading program can,
907once it has read the flag byte, packet length, and character code (|cc|), skip
908over the character by simply reading this many more bytes.  For the two short
909forms of the character preamble, the last two bits of the flag byte should be
910considered the two most-significant bits of the packet length.  For the short
911format, the true packet length might be calculated as |(flag mod 4)*256+pl|;
912for the extended format, it might be calculated as |(flag mod 4)*65536+pl|.
913
914The |w| parameter is the width and the |h| parameter is the height in pixels
915of the minimum bounding box.  The |dx| and |dy| parameters are the horizontal
916and vertical escapements, respectively.  In the short formats, |dy| is assumed
917to be zero and |dm| is |dy| but in pixels;
918in the long format, |dx| and |dy| are both
919in pixels multiplied by $2^{16}$.  The |hoff| is the horizontal offset from the
920upper left pixel to the reference pixel; the |voff| is the vertical offset.
921They are both given in pixels, with right and down being positive.  The
922reference pixel is the pixel which occupies the unit square in \MF; the
923\MF\ reference point is the lower left hand corner of this pixel.  (See the
924example below.)
925
926@ \TeX\ requires that all characters which have the same character codes
927modulo 256 also have the same |tfm| widths, and escapement values.  The \.{PK}
928format does not itself make this a requirement, but in order for the font to
929work correctly with the \TeX\ software, this constraint should be observed.
930
931
932Following the character preamble is the raster information for the
933character, packed by run counts or by bits, as indicated by the flag byte.
934If the character is packed by run counts and the required number of nybbles
935is odd, then the last byte of the raster description should have a zero
936for its least significant nybble.
937
938@ As an illustration of the \.{PK} format, the character \char4\ from the font
939amr10 at 300 dots per inch will be encoded.  (Note: amr fonts are obsolete,
940and the reference to this character is retained from an older version of
941the Computer Modern fonts solely for illustration.) This character was chosen
942because it illustrates some
943of the borderline cases.  The raster for the character looks like this (the
944row numbers are chosen for convenience, and are not \MF's row numbers.)
945
946\vskip\baselineskip
947\centerline{\vbox{\baselineskip=10pt
948\halign{\hfil#\quad&&\hfil#\hfil\cr
9490& & &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M\cr
9501& & &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M\cr
9512& & &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M\cr
9523& & &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M\cr
9534& & &M&M& & & & & & & & & & & & & & & & &M&M\cr
9545& & &M&M& & & & & & & & & & & & & & & & &M&M\cr
9556& & &M&M& & & & & & & & & & & & & & & & &M&M\cr
9567\cr
9578\cr
9589& & & & &M&M& & & & & & & & & & & & &M&M& & \cr
95910& & & & &M&M& & & & & & & & & & & & &M&M& & \cr
96011& & & & &M&M& & & & & & & & & & & & &M&M& & \cr
96112& & & & &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M& & \cr
96213& & & & &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M& & \cr
96314& & & & &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M& & \cr
96415& & & & &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M& & \cr
96516& & & & &M&M& & & & & & & & & & & & &M&M& & \cr
96617& & & & &M&M& & & & & & & & & & & & &M&M& & \cr
96718& & & & &M&M& & & & & & & & & & & & &M&M& & \cr
96819\cr
96920\cr
97021\cr
97122& & &M&M& & & & & & & & & & & & & & & & &M&M\cr
97223& & &M&M& & & & & & & & & & & & & & & & &M&M\cr
97324& & &M&M& & & & & & & & & & & & & & & & &M&M\cr
97425& & &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M\cr
97526& & &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M\cr
97627& & &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M\cr
97728&*& &M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M&M\cr
978&\hphantom{M}&\hphantom{M}\cr
979}}}
980The width of the minimum bounding box for this character is 20; its height
981is 29.  The `*' represents the reference pixel; notice how it lies outside the
982minimum bounding box.  The |hoff| value is $-2$, and the |voff| is~28.
983
984The first task is to calculate the run counts and repeat counts.  The repeat
985counts are placed at the first transition (black to white or white to black)
986in a row, and are enclosed in brackets.  White counts are enclosed in
987parentheses.  It is relatively easy to generate the counts list:
988\vskip\baselineskip
989\centerline{82 [2] (16) 2 (42) [2] 2 (12) 2 (4) [3]}
990\centerline{16 (4) [2] 2 (12) 2 (62) [2] 2 (16) 82}
991\vskip\baselineskip
992Note that any duplicated rows that are not all white or all black are removed
993before the repeat counts are calculated.  The rows thus removed are rows 5, 6,
99410, 11, 13, 14, 15, 17, 18, 23, and 24.
995
996@ The next step in the encoding of this character is to calculate the optimal
997value of |dyn_f|.  The details of how this calculation is done are not
998important here; suffice it to say that there is a simple algorithm which in one
999pass over the count list can determine the best value of |dyn_f|.  For this
1000character, the optimal value turns out to be 8 (atypically low).  Thus, all
1001count values less than or equal to 8 are packed in one nybble; those from
1002nine to $(13-8)*16+8$ or 88 are packed in two nybbles.  The run encoded values
1003now become (in hex, separated according to the above list):
1004\vskip\baselineskip
1005\centerline{\tt D9 E2 97 2 B1 E2 2 93 2 4 E3}
1006\centerline{\tt 97 4 E2 2 93 2 C5 E2 2 97 D9}
1007\vskip\baselineskip\noindent
1008which comes to 36 nybbles, or 18 bytes.  This is shorter than the 73 bytes
1009required for the bit map, so we use the run count packing.
1010
1011@ The short form of the character preamble is used because all of the
1012parameters fit in their respective lengths.  The packet length is therefore
101318 bytes for the raster, plus
1014eight bytes for the character preamble parameters following the character
1015code, or 26.  The |tfm| width for this character is 640796, or {\tt 9C71C} in
1016hexadecimal.  The horizontal escapement is 25 pixels.  The flag byte is
101788 hex, indicating the short preamble, the black first count, and the
1018|dyn_f| value of 8.  The final total character packet, in hexadecimal, is:
1019\vskip\baselineskip
1020$$\vbox{\halign{\hfil #\quad&&{\tt #\ }\cr
1021Flag byte&88\cr
1022Packet length&1A\cr
1023Character code&04\cr
1024|tfm| width&09&C7&1C\cr
1025Horizontal escapement (pixels)&19\cr
1026Width of bit map&14\cr
1027Height of bit map&1D\cr
1028Horizontal offset (signed)&FE\cr
1029Vertical offset&1C\cr
1030Raster data&D9&E2&97\cr
1031&2B&1E&22\cr
1032&93&24&E3\cr
1033&97&4E&22\cr
1034&93&2C&5E\cr
1035&22&97&D9\cr}}$$
1036
1037@ This format was written by Tomas Rokicki in August, 1985.
1038
1039@* Input and output.
1040There are two types of files that this program must deal with---standard
1041text files and files of bytes (packed files and generic font files.)
1042For our purposes, we shall consider an eight-bit byte to consist of the
1043values |0..255|.  If your system does not pack these values to a byte, it is
1044no major difficulty; you must only insure that the input function
1045|pk_byte| can read packed bytes, and that the output fuunction |gf_byte|
1046packs the bytes to be shipped.
1047
1048@<Types...@>=
1049@!eight_bits=0..255; {packed file byte}
1050@!byte_file=packed file of eight_bits ; {for packed file words}
1051@^system dependancies@>
1052
1053@ @<Glob...@>=
1054@!gf_file,@!pk_file:byte_file;  {the I/O streams}
1055@^system dependencies@>
1056
1057@ To prepare these files for input, we |reset| them. An extension of
1058\PASCAL\ is needed in the case of |gf_file|, since we want to associate
1059it with external files whose names are specified dynamically (i.e., not
1060known at compile time). The following code assumes that `|reset(f,s)|'
1061does this, when |f| is a file variable and |s| is a string variable that
1062specifies the file name. If |eof(f)| is true immediately after
1063|reset(f,s)| has acted, we assume that no file named |s| is accessible.
1064@^system dependencies@>
1065
1066@p procedure open_gf_file; {prepares to write packed bytes in a |gf_file|}
1067begin rewrite(gf_file,gf_name);
1068gf_loc := 0 ;
1069end;
1070@#
1071procedure open_pk_file; {prepares the input for reading}
1072begin reset(pk_file,pk_name);
1073pk_loc := 0 ;
1074end;
1075
1076@ We need a place to store the names of the input and output files, as well
1077as a byte counter for the output file.
1078
1079@<Glob...@>=
1080@!gf_name,@!pk_name:packed array[1..name_length] of char; {names of input
1081    and output files}
1082@!gf_loc, @!pk_loc:integer; {how many bytes have we sent?}
1083
1084@ We need a procedure that will write a byte to the \.{GF} file.  If the
1085particular system
1086@^system dependencies@>
1087requires buffering, here is the place to do it.
1088
1089@p procedure gf_byte (i : integer) ;
1090begin gf_file^ := i ;
1091put(gf_file) ;
1092incr(gf_loc) ;
1093end;
1094
1095@ We also need a function that will get a single byte from the \.{PK} file.
1096Again, buffering may be done in this procedure.
1097
1098@p function pk_byte : eight_bits ;
1099var nybble, temp : eight_bits ;
1100begin
1101   temp := pk_file^ ;
1102   get(pk_file) ;
1103   pk_loc := pk_loc + 1 ;
1104   pk_byte := temp ;
1105end ;
1106
1107@ Now we are ready to open the files and write the identification of the
1108pixel file.
1109
1110@<Open files@>=
1111open_pk_file ;
1112open_gf_file
1113
1114@ As we are reading the packed file, we often need to fetch 16 and 32 bit
1115quantities.  Here we have two procedures to do this.
1116
1117@p function signed_byte : integer ;
1118var a : integer ;
1119begin
1120   a := pk_byte ;
1121   if a > 127 then
1122      a := a - 256 ;
1123   signed_byte := a ;
1124end ;
1125@#
1126function get_16 : integer ;
1127var a : integer ;
1128begin
1129   a := pk_byte ;
1130   get_16 := a * 256 + pk_byte ;
1131end ;
1132@#
1133function signed_16 : integer ;
1134var a : integer ;
1135begin
1136   a := signed_byte ;
1137   signed_16 := a * 256 + pk_byte ;
1138end ;
1139@#
1140function get_32 : integer ;
1141var a : integer ;
1142begin
1143   a := get_16 ;
1144   if a > 32767 then a := a - 65536 ;
1145   get_32 := a * 65536 + get_16 ;
1146end ;
1147
1148@ As we are writing the \.{GF} file, we often need to write signed and
1149unsigned, one, two, three, and four-byte values.  These routines give
1150us that capability.
1151
1152@p procedure gf_sbyte(i : integer) ;
1153begin
1154   if i < 0 then
1155      i := i + 256 ;
1156   gf_byte(i) ;
1157end ;
1158@#
1159procedure gf_16(i : integer) ;
1160begin
1161   gf_byte(i div 256) ;
1162   gf_byte(i mod 256) ;
1163end ;
1164@#
1165procedure  gf_24(i : integer) ;
1166begin
1167   gf_byte(i div 65536) ;
1168   gf_16(i mod 65536) ;
1169end ;
1170@#
1171procedure gf_quad(i : integer) ;
1172begin
1173   if i >= 0 then begin
1174      gf_byte(i div 16777216) ;
1175   end else begin
1176      i := (i + 1073741824) + 1073741824 ;
1177      gf_byte(128 + (i div 16777216)) ;
1178   end ;
1179   gf_24(i mod 16777216) ;
1180end ;
1181
1182@* Character unpacking.
1183Now we deal with unpacking characters into the \.{GF} representation.
1184
1185@<Unpack and write character@>=
1186dyn_f := flag_byte div 16 ;
1187flag_byte := flag_byte mod 16 ;
1188turn_on := flag_byte >= 8 ;
1189if turn_on then flag_byte := flag_byte - 8 ;
1190if flag_byte = 7 then
1191   @<Read long character preamble@>
1192else if flag_byte > 3 then
1193   @<Read extended short character preamble@>
1194else
1195   @<Read short character preamble@> ;
1196@<Calculate and check |min_m|, |max_m|, |min_n|, and |max_n|@> ;
1197@<Save character locator@> ;
1198@<Write character preamble@> ;
1199@<Read and translate raster description@> ;
1200gf_byte(eoc) ;
1201last_eoc := gf_loc ;
1202if end_of_packet <> pk_loc then abort('Bad pk file!  Bad packet length.')
1203
1204@ We need a whole lot of globals used but not defined up there.
1205
1206@<Glob...@>=
1207@!i, @!j : integer ; {index pointers}
1208@!end_of_packet : integer ; {where we expect the end of the packet to be}
1209@!dyn_f : integer ; {dynamic packing variable}
1210@!car : integer ; {the character we are reading}
1211@!tfm_width : integer ; {the TFM width of the current character}
1212@!x_off, @!y_off : integer ; {the offsets for the character}
1213
1214@ Now we read and check the preamble of the \.{PK} file.  In the preamble, we
1215find the |hppp|, |design_size|, |checksum|.  We write the relevant parameters
1216to the \.{GF} file, including the preamble comment.
1217
1218@<Read preamble@>=
1219if pk_byte <> pk_pre then abort('Bad pk file!  pre command missing.') ;
1220gf_byte(pre) ;
1221if pk_byte <> pk_id then abort('Wrong version of packed file!.') ;
1222gf_byte(gf_id_byte) ;
1223j := pk_byte ;
1224for i := 1 to j do hppp := pk_byte ;
1225gf_byte(comm_length) ;
1226for i := 1 to comm_length do
1227   gf_byte(xord[comment[i]]) ;
1228design_size := get_32 ;
1229checksum := get_32 ;
1230hppp := get_32 ; vppp := get_32 ;
1231if hppp <> vppp then print_ln('Warning:  aspect ratio not 1:1!') ;
1232magnification := round(hppp * 72.27 * 5 / 65536) ;
1233last_eoc := gf_loc
1234
1235@ Of course, we need to define the above variables.
1236
1237@<Glob...@>=
1238@!comment : packed array[1..comm_length] of char ;
1239@!magnification : integer ; {resolution at which pixel file is prepared}
1240@!design_size : integer ; {design size in \.{FIXes}}
1241@!checksum : integer ; {checksum of pixel file}
1242@!hppp, @!vppp : integer ; {horizontal and vertical points per inch}
1243
1244@ @<Set init...@>=
1245comment := preamble_comment ;
1246
1247@ Now, the character preamble reading modules.  First, we have the general
1248case: the long character preamble format.
1249
1250@<Read long character preamble@>=
1251begin
1252   packet_length := get_32 ; car := get_32 ;
1253   end_of_packet := packet_length + pk_loc ;
1254   tfm_width := get_32 ;
1255   hor_esc := get_32 ;
1256   ver_esc := get_32 ;
1257   c_width := get_32 ;
1258   c_height := get_32 ;
1259   word_width := (c_width + 31) div 32 ;
1260   x_off := get_32 ;
1261   y_off := get_32 ;
1262end
1263
1264@ This module reads the character preamble with double byte parameters.
1265
1266@<Read extended short character preamble@>=
1267begin
1268   packet_length := (flag_byte - 4) * 65536 + get_16 ;
1269   car := pk_byte ;
1270   end_of_packet := packet_length + pk_loc ;
1271   i := pk_byte ;
1272   tfm_width := i * 65536 + get_16 ;
1273   hor_esc := get_16 * 65536 ;
1274   ver_esc := 0 ;
1275   c_width := get_16 ;
1276   c_height := get_16 ;
1277   word_width := (c_width + 31) div 32 ;
1278   x_off := signed_16 ;
1279   y_off := signed_16 ;
1280end
1281
1282@ Here we read the most common character preamble, that with single byte
1283parameters.
1284
1285@<Read short character preamble@>=
1286begin
1287   packet_length := flag_byte * 256 + pk_byte ;
1288   car := pk_byte ;
1289   end_of_packet := packet_length + pk_loc ;
1290   i := pk_byte ;
1291   tfm_width := i * 65536 + get_16 ;
1292   hor_esc := pk_byte * 65536 ;
1293   ver_esc := 0 ;
1294   c_width := pk_byte ;
1295   c_height := pk_byte ;
1296   word_width := (c_width + 31) div 32 ;
1297   x_off := signed_byte ;
1298   y_off := signed_byte ;
1299end
1300
1301@ Some more globals:
1302
1303@<Glob...@>=
1304@!c_height, @!c_width : integer ; {sizes of the character glyphs}
1305@!word_width : integer ; {width of character in raster words}
1306@!hor_esc, @!ver_esc : integer ; {the character escapement}
1307@!packet_length : integer ; {the length of the packet in bytes}
1308@!last_eoc : integer ; {the last end of character}
1309
1310@ The \.{GF} format requires the minimum and maximum |m| and |n|
1311values in the postamble, so we generate them here.  One thing
1312that should be noted, here.  The value |max_n-min_n| will be the
1313height of the character glyph, but for the width, you need to
1314use |max_m-min_m-1|, because of the peculiarities of the \.{GF}
1315format.
1316
1317@<Calculate and check |min_m|, |max_m|, |min_n|, and |max_n|@>=
1318if (c_height = 0) or (c_width = 0) then begin
1319   c_height := 0 ; c_width := 0 ; x_off := 0 ; y_off := 0 ;
1320end ;
1321min_m := - x_off ;
1322if min_m < mmin_m then
1323   mmin_m := min_m ;
1324max_m := c_width + min_m ;
1325if max_m > mmax_m then
1326   mmax_m := max_m ;
1327min_n := y_off - c_height + 1 ;
1328max_n := y_off ;
1329if min_n > max_n then
1330   min_n := max_n ;
1331if min_n < mmin_n then
1332   mmin_n := min_n ;
1333if max_n > mmax_n then
1334   mmax_n := max_n
1335
1336@ We have to declare the variables which hold the bounding box.  We
1337also need the arrays that hold the back pointers to the characters,
1338the horizontal and vertical escapements, and the \.{TFM} widths.
1339
1340@<Glob...@>=
1341@!min_m, @!max_m, @!min_n, @!max_n : integer ;
1342@!mmin_m, @!mmax_m, @!mmin_n, @!mmax_n : integer ;
1343@!char_pointer, @!s_tfm_width : array [0..255] of integer ;
1344@!s_hor_esc, @!s_ver_esc : array [0..255] of integer ;
1345@!this_char_ptr : integer ;
1346
1347@ We initialize these bounding box values to be ridiculous, and say
1348that there were no characters seen yet.
1349
1350@<Set init...@>=
1351mmin_m := 999999 ;
1352mmin_n := 999999 ;
1353mmax_m := -999999 ;
1354mmax_n := -999999 ;
1355for i := 0 to 255 do
1356   char_pointer[i] := -1 ;
1357
1358@ This module takes care of the simple job of writing the character
1359preamble, after picking one to fit.
1360
1361@<Write character preamble@>=
1362begin
1363   if (char_pointer[car mod 256] = -1) and
1364      (car >= 0) and (car < 256) and
1365      (max_m >= 0) and (max_m < 256) and
1366      (max_n >= 0) and (max_n < 256) and
1367      (max_m >= min_m) and (max_n >= min_n) and
1368      (max_m < min_m + 256) and (max_n < min_n + 256) then begin
1369      char_pointer[car mod 256] := this_char_ptr ;
1370      gf_byte(boc1) ;
1371      gf_byte(car) ;
1372      gf_byte(max_m - min_m) ;
1373      gf_byte(max_m) ;
1374      gf_byte(max_n - min_n) ;
1375      gf_byte(max_n) ;
1376   end else begin
1377      gf_byte(boc) ;
1378      gf_quad(car) ;
1379      gf_quad(char_pointer[car mod 256]) ;
1380      char_pointer[car mod 256] := this_char_ptr ;
1381      gf_quad(min_m) ;
1382      gf_quad(max_m) ;
1383      gf_quad(min_n) ;
1384      gf_quad(max_n) ;
1385   end ;
1386end
1387
1388@ In this routine we either save or check the current character
1389parameters.
1390
1391@<Save character locator@>=
1392begin
1393   i := car mod 256 ;
1394   if (char_pointer[i] = -1) then begin
1395      s_ver_esc[i] := ver_esc ;
1396      s_hor_esc[i] := hor_esc ;
1397      s_tfm_width[i] := tfm_width ;
1398   end else begin
1399      if (s_ver_esc[i] <> ver_esc) or
1400         (s_hor_esc[i] <> hor_esc) or
1401         (s_tfm_width[i] <> tfm_width) then
1402         print_ln('Two characters mod ', i:1,' have mismatched parameters') ;
1403   end ;
1404end
1405
1406@ And another module to write out those character locators we have so
1407carefully saved up the information for.
1408
1409@<Write character locators@>=
1410for i := 0 to 255 do
1411   if char_pointer[i] <> -1 then begin
1412      if (s_ver_esc[i] = 0) and (s_hor_esc[i] >= 0) and
1413         (s_hor_esc[i] < 16777216) and (s_hor_esc[i] mod 65536 = 0) then begin
1414         gf_byte(char_loc0) ;
1415         gf_byte(i) ;
1416         gf_byte(s_hor_esc[i] div 65536) ;
1417      end else begin
1418         gf_byte(char_loc) ;
1419         gf_byte(i) ;
1420         gf_quad(s_hor_esc[i]) ;
1421         gf_quad(s_ver_esc[i]) ;
1422      end ;
1423      gf_quad(s_tfm_width[i]) ;
1424      gf_quad(char_pointer[i]) ;
1425   end
1426
1427@ Now we have the most important part of the program, where we actually
1428interpret the commands in the raster description.  First of all, we need
1429a procedure to get a single nybble from the file, as well as one to get
1430a single bit.  We also use the |pk_packed_num| procedure defined in the
1431\.{PK} file description.
1432
1433@p function get_nyb : integer ;
1434var temp : eight_bits ;
1435begin
1436   if bit_weight = 0 then begin
1437      input_byte := pk_byte ;
1438      bit_weight := 16 ;
1439   end ;
1440   temp := input_byte div bit_weight ;
1441   input_byte := input_byte - temp * bit_weight ;
1442   bit_weight := bit_weight div 16 ;
1443   get_nyb := temp ;
1444end ;
1445@#
1446function get_bit : boolean ;
1447var temp : boolean ;
1448begin
1449   bit_weight := bit_weight div 2 ;
1450   if bit_weight = 0 then begin
1451      input_byte := pk_byte ;
1452      bit_weight := 128 ;
1453   end ;
1454   temp := input_byte >= bit_weight ;
1455   if temp then
1456      input_byte := input_byte - bit_weight ;
1457   get_bit := temp ;
1458end ;
1459@<Packed number procedure@>
1460
1461@ Now, the globals to help communication between these procedures, and a buffer
1462for the raster row counts.
1463
1464@<Glob...@>=
1465@!input_byte : eight_bits ; {the byte we are currently decimating}
1466@!bit_weight : eight_bits ; {weight of the current bit}
1467@!nybble : eight_bits ; {the current nybble}
1468@!row_counts : array [0..max_counts] of integer ;
1469     {where the row is constructed}
1470@!rcp : integer ; { the row counts pointer }
1471
1472@ Actually, if the character is a bit mapped character, then we
1473make it look like run counts by determining the appropriate
1474values ourselves.  Thus, we have a routine which gets the next
1475count value, below.
1476
1477@<Get next count value into |count|@>=
1478begin
1479   turn_on := not turn_on ;
1480   if dyn_f = 14 then begin
1481      count := 1 ;
1482      done := false ;
1483      while not done do begin
1484         if count_down <= 0 then
1485            done := true
1486         else if (turn_on = get_bit) then
1487            count := count + 1
1488         else
1489            done := true ;
1490         count_down := count_down - 1 ;
1491      end ;
1492   end else
1493      count := pk_packed_num ;
1494end
1495
1496@ And the main procedure.
1497
1498@<Read and translate raster description@>=
1499if (c_width > 0) and (c_height > 0) then begin
1500   bit_weight := 0 ;
1501   count_down := c_height * c_width - 1 ;
1502   if dyn_f = 14 then
1503      turn_on := get_bit ;
1504   repeat_count := 0 ;
1505   x_to_go := c_width ;
1506   y_to_go := c_height ;
1507   cur_n := c_height ;
1508   count := 0 ;
1509   first_on := turn_on ;
1510   turn_on := not turn_on ;
1511   rcp := 0 ;
1512   while y_to_go > 0 do begin
1513      if count = 0 then
1514         @<Get next count...@> ;
1515      if rcp = 0 then
1516         first_on := turn_on ;
1517      while count >= x_to_go do begin
1518         row_counts[rcp] := x_to_go ;
1519         count := count - x_to_go ;
1520         for i := 0 to repeat_count do begin
1521            @<Output row@> ;
1522            y_to_go := y_to_go - 1 ;
1523         end ;
1524         repeat_count := 0 ;
1525         x_to_go := c_width ;
1526         rcp := 0 ;
1527         if (count > 0) then
1528            first_on := turn_on ;
1529      end ;
1530      if count > 0 then begin
1531         row_counts[rcp] := count ;
1532         if rcp = 0 then
1533            first_on := turn_on ;
1534         rcp := rcp + 1 ;
1535         if rcp > max_counts then begin
1536            print_ln('A character had too many run counts') ;
1537            jump_out ;
1538         end ;
1539         x_to_go := x_to_go - count ;
1540         count := 0 ;
1541      end ;
1542   end ;
1543end
1544
1545@ This routine actually outputs a row to the \.{GF} file.
1546
1547@<Output row@>=
1548if (rcp > 0) or first_on then begin
1549   j := 0 ;
1550   max := rcp ;
1551   if not turn_on then
1552      max := max - 1 ;
1553   if cur_n - y_to_go = 1 then begin
1554      if first_on then
1555         gf_byte(new_row_0)
1556      else if row_counts[0] < 165 then begin
1557         gf_byte(new_row_0 + row_counts[0]) ;
1558         j := j + 1 ;
1559      end else
1560         gf_byte(skip0) ;
1561   end else if cur_n > y_to_go then begin
1562      if cur_n - y_to_go < 257 then begin
1563         gf_byte(skip1) ;
1564         gf_byte(cur_n - y_to_go - 1) ;
1565      end else begin
1566         gf_byte(skip1+1) ;
1567         gf_16(cur_n - y_to_go - 1) ;
1568      end ;
1569      if first_on then
1570         gf_byte(paint_0) ;
1571   end else if first_on then
1572      gf_byte(paint_0) ;
1573   cur_n := y_to_go ;
1574   while j <= max do begin
1575      if row_counts[j] < 64 then
1576         gf_byte(paint_0 + row_counts[j])
1577      else if row_counts[j] < 256 then begin
1578         gf_byte(paint1) ;
1579         gf_byte(row_counts[j]) ;
1580      end else begin
1581         gf_byte(paint1+1) ;
1582         gf_16(row_counts[j]) ;
1583      end ;
1584      j := j + 1 ;
1585   end ;
1586end
1587
1588@ Here we need the array which counts down the number of bits, and
1589the current state flag.
1590
1591@<Glob...@>=
1592@!count_down : integer ; { have we run out of bits yet? }
1593@!done : boolean ; { are we done yet? }
1594@!max : integer ; { the maximum number of counts to output }
1595@!repeat_count : integer ; {how many times to repeat the next row?}
1596@!x_to_go, @!y_to_go : integer ; {how many columns/rows left?}
1597@!turn_on, @!first_on : boolean ; {are we black here?}
1598@!count : integer ; {how many bits of current color left?}
1599@!cur_n : integer ; {what row are we at?}
1600
1601@ To finish the \.{GF} file, we write out a postamble, including the
1602character locators that we stored away.
1603
1604@<Write \.{GF} postamble@>=
1605j := gf_loc ;
1606gf_byte(post) ;
1607gf_quad(last_eoc) ;
1608gf_quad(design_size) ;
1609gf_quad(checksum) ;
1610gf_quad(hppp) ;
1611gf_quad(vppp) ;
1612gf_quad(mmin_m) ;
1613gf_quad(mmax_m) ;
1614gf_quad(mmin_n) ;
1615gf_quad(mmax_n) ;
1616@<Write character locators@> ;
1617gf_byte(post_post) ;
1618gf_quad(j) ;
1619gf_byte(gf_id_byte) ;
1620for i := 0 to 3 do
1621   gf_byte(223) ;
1622while gf_loc mod 4 <> 0 do
1623   gf_byte(223)
1624
1625@ We need the |flag_byte| variable.
1626
1627@<Glob...@>=
1628@!flag_byte : integer ; {command or character flag byte}
1629
1630@ Another necessary procedure skips over any specials between characters
1631and before and after the postamble.  (It echoes the specials exactly.)
1632
1633@p procedure skip_specials ;
1634var i, j, k : integer ;
1635begin
1636   this_char_ptr := gf_loc ;
1637   repeat
1638      flag_byte := pk_byte ;
1639      if flag_byte >= 240 then
1640         case flag_byte of
1641            240, 241, 242, 243 :
1642begin
1643   i := 0 ;
1644   gf_byte(flag_byte-1) ;
1645   for j := 240 to flag_byte do begin
1646      k := pk_byte ;
1647      gf_byte(k) ;
1648      i := 256 * i + k ;
1649   end ;
1650   for j := 1 to i do gf_byte(pk_byte) ;
1651end ;
1652            244 :
1653begin
1654   gf_byte(243) ;
1655   gf_quad(get_32) ;
1656end ;
1657            245 : begin end ;
1658            246 : begin end ;
1659            247, 248, 249, 250, 251, 252, 253, 254, 255 :
1660              abort('Unexpected ', flag_byte:1,'!') ;
1661         endcases ;
1662   until (flag_byte < 240) or (flag_byte = pk_post) ;
1663end ;
1664
1665@* Terminal communication.
1666We must get the file names and determine whether input is to be in
1667hexadecimal or binary.  To do this, we use the standard input path
1668name.  We need a procedure to flush the input buffer.  For most systems,
1669this will be an empty statement.  For other systems, a |print_ln| will
1670provide a quick fix.  We also need a routine to get a line of input from
1671the terminal.  On some systems, a simple |read_ln| will do.  Finally,
1672a macro to print a string to the first blank is required.
1673
1674@d flush_buffer == begin end
1675@d get_line(#) == if eoln(input) then read_ln(input) ;
1676   i := 1 ;
1677   while not (eoln(input) or eof(input)) do begin
1678      #[i] := input^ ;
1679      incr(i) ;
1680      get(input) ;
1681   end ;
1682   #[i] := ' '
1683
1684@ @p procedure dialog ;
1685var i : integer ; {index variable}
1686buffer : packed array [1..name_length] of char; {input buffer}
1687begin
1688   for i := 1 to name_length do begin
1689      gf_name[i] := ' ' ;
1690      pk_name[i] := ' ' ;
1691   end;
1692   print('Input file name:  ') ;
1693   flush_buffer ;
1694   get_line(pk_name) ;
1695   print('Output file name:  ') ;
1696   flush_buffer ;
1697   get_line(gf_name) ;
1698end ;
1699
1700@* The main program.
1701Now that we have all the pieces written, let us put them together.
1702
1703@p begin
1704initialize ;
1705dialog ;
1706@<Open files@> ;
1707@<Read preamble@> ;
1708skip_specials ;
1709while flag_byte <> pk_post do begin
1710   @<Unpack and write character@> ;
1711   skip_specials ;
1712end ;
1713while not eof(pk_file) do i := pk_byte ;
1714@<Write \.{GF} postamble@> ;
1715print_ln(pk_loc:1,' bytes unpacked to ',gf_loc:1,' bytes.');
1716final_end :
1717end .
1718
1719@* System-dependent changes.
1720This section should be replaced, if necessary, by changes to the program
1721that are necessary to make \.{PKtoGF} work at a particular installation.
1722Any additional routines should be inserted here.
1723@^system dependencies@>
1724
1725@* Index.
1726Pointers to error messages appear here together with the section numbers
1727where each ident\-i\-fier is used.
1728