1 // serialize.cpp                                Copyright (C) 2021 Codemist
2 
3 /**************************************************************************
4  * Copyright (C) 2021, Codemist.                         A C Norman       *
5  *                                                                        *
6  * Redistribution and use in source and binary forms, with or without     *
7  * modification, are permitted provided that the following conditions are *
8  * met:                                                                   *
9  *                                                                        *
10  *     * Redistributions of source code must retain the relevant          *
11  *       copyright notice, this list of conditions and the following      *
12  *       disclaimer.                                                      *
13  *     * Redistributions in binary form must reproduce the above          *
14  *       copyright notice, this list of conditions and the following      *
15  *       disclaimer in the documentation and/or other materials provided  *
16  *       with the distribution.                                           *
17  *                                                                        *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS    *
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT      *
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS      *
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE         *
22  * COPYRIGHT OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,   *
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,   *
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
25  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
26  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR  *
27  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF     *
28  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH   *
29  * DAMAGE.                                                                *
30  *************************************************************************/
31 
32 // $Id: serialize.cpp 5745 2021-03-20 17:35:28Z arthurcnorman $
33 
34 
35 //=========================================================================
36 //=========================================================================
37 //
38 // This is code that serializes and deserializes Lisp data - up to and
39 // including a complete heap image.
40 // It represents the new code that supports preserve/restart and checkpoint
41 // facilities in Lisp, and I may in the future use it as a replacement
42 // for the current "fasl" scheme, so that a common serialized representation
43 // is used everywhere. It could then also provide users who wanted it with
44 // a way to write Lisp structures to disc (or transmit them across a
45 // network) such that structure sharing and cyclic structures were supported.
46 
47 // There are a collection of things in this code that represent various
48 // levels of planning and thought, so I will try to summarize them here
49 // so that one can then look to see how they interweave and one part of the
50 // code depends on another.
51 //
52 // I will serialize data into a bytestream that essentially describes it
53 // in prefix form. Thus a list (a b) will be treated as (a . (b . nil)) and
54 // sent as "CONS SYM 'a' CONS SYM 'b' SYM 'nil'". In the bytestream
55 // some codes are used with 3-bits of opcode and 5 of embedded data. For
56 // instance integers in the range -16 to +15 can be handled that way in
57 // a single byte. Looped and shared structures are handled by allowing the
58 // serialized form to keep a record of selected items as they are read and
59 // then refer back to them. References to the most recent 32 such items can
60 // be done especially efficiently. A form of "move to front" buffering is
61 // used in the table of re-used items. This is organised as a binary heap
62 // (as in heapsort). When any entry is is accessed it is moved to the top
63 //  of the heap (taking log(n) steps if there are n items in the table of
64 // old objects).
65 //
66 // When integers or offsets or length information is required in the byte
67 // stream it is coded in a variable length format, where each byte holds
68 // 7 bits from the number, and the final one has its top bit set. As a
69 // special case if there have been 8 non-final bytes the one after that is
70 // assumed to be final and all 8 of its bits can be used. This scheme
71 // allows full 64-bit values to be encoded in at most 9 bytes.
72 //
73 // There could be problems if either reader or writer was written using
74 // C++ recursion, because stack use could become unreasonable. To cope with
75 // this they both use pointer-reversing schemes so that almost all of
76 // the information needed to cope with nesting is stored within the data
77 // structure that is being traversed. For vectors that hold references to
78 // Lisp data some extra space is required while reading, and this is
79 // provided using fresh cons cells. They could be collected onto a freelist
80 // for use when they were no longer needed, but at present I do not do that.
81 // The writing code can be certain that garbage collection could not happen
82 // in the middle of its work, so it can afford to leave pointers into the
83 // middle of vectors as it goes, so it does not need any extra space at all.
84 //
85 // One might worry about "large" vectors that are stored internally as an
86 // index array and then the data. Well this serialization code just looks
87 // at the representation not at the higher level semantics, and so it will
88 // process the index vector much like any other vector and when data is back
89 // in memory again it should be in exactly the state that it needs to be!
90 //
91 // The writer works in two passes. The first pass just has to identify and
92 // record where there are objects referenced multiple times. It uses a sparse
93 // bitmap to record which objects it has visited. When it finds that it is
94 // re-visiting an address it enters that in a hash table which in due course
95 // arranges the back-reference scheme mentioned earlier.
96 // The second pass then does not need the bitmap - it can be aware of and
97 // avoid cycles by inspecting the hash table of multiply-referenced iteme.
98 // This means it checks this table for every object it visits, and almost
99 // always the lookup will fail. This is kept fast by using Cuckoo hashing
100 // where each item in the table has only a few locations where it could live.
101 // So lookup in the table, successful or unsuccessful, only costs four
102 // probes, with these being in two pairs of consecutive words (which is
103 // a fairly cache-friendly way or arranging the search).
104 //
105 // Everything has to be written in a byte-order neutral format and re-read
106 // so that if the machine receiving the data uses a different byte order
107 // from the one creating it all is well. That is perhaps especially
108 // messy for floating point values, and it is not clear that there is a
109 // sane reliable interchange format for floats that are wider then 64 bits,
110 // so at present "long double" is not really supported. If I did not care
111 // about reading in serialized data on a machine different from the one it
112 // was written on there would not be an issue!
113 //
114 // I provide two rather different schemes for serializing symbols. The first
115 // is for user-level code and it treats a symbol pretty much as just its
116 // name. When the data is read back the string forming the name is looked
117 // up in the object list. Attributes (property list, value, function
118 // definition etc) are not considered. The only special case there is that
119 // of gensyms. In CSL a gensym that has not printed has just a stub as
120 // its name (often just "G"). When a gensym is first printed it gets
121 // allocated a sequence number and its name is expanded to something like
122 // "G0751". In the serialization code the stub of the name is recovered and
123 // included in the bye-stream along with a marker indicating that the
124 // symbol is a gensym. The effect is then the version that is read back has
125 // "not yet printed" status.
126 //
127 // The other treatment of symbols is for use when this code is checkpointing
128 // a running system. It treats symbols as merely a variation on vector or
129 // record style data. That means that all the componenent fields used
130 // in their implementation get serialized. A special challenge there is
131 // the function cells. For all built-in functions that is handled via
132 // a table that tabulates all their addresses.
133 //
134 // Lisp hash tables represent a jolly special case! After they have been
135 // passed to a different system the hash values of many objects may have
136 // changed. This can arise if, for instance, hash codes are based on
137 // a memory address or are sensitive to byte-order.
138 // During garbage collection and when re-read from a
139 // serialized form ant object that might have a header saying TYPE_HASH
140 // has that updates to be TYPE_HASHX where this new marker indicates a
141 // hash table whose elements may not be in the correct locations. Then any
142 // operation on the hash table can check for TYPE_HASHX and if it sees
143 // it re-hash and reset the table to TYPE_HASH.
144 //
145 // There are a few types where I feel that serialization is probably never
146 // going to make sense. For instance open streams, references to foreign
147 // functions, ... and if at some stage a native compiler is created then
148 // native compiled code could transfer to machines of just the same
149 // flavour but not to others - but issues of position independence or
150 // code relocation tables would apply. To leave an opportunity to
151 // address some of those issues in the future, the serialization byte
152 // code has a number of unallocated codes left available for extension.
153 
154 //=========================================================================
155 //=========================================================================
156 
157 #include "headers.h"
158 
159 // Here is a bit of raw information. I looked at the length of the names
160 // of symbols in a Reduce image in mid 2016, and I find
161 //  length  count
162 //   1 -  5: 2028
163 //   6 - 10: 2928
164 //  11 - 15:  932
165 //  16 - 20:  319
166 //  21 - 24:   87
167 //  25 - 30:   32
168 //  31 - 35:   15
169 //  36+        18
170 //
171 // A different study looked at all the strings present at the end of a
172 // garbage collection where all Reduce code (from all packages) had been
173 // loaded. The strings there included many that are messages that are to be
174 // printed. The average string length in that situation was 22 bytes... but
175 // that will represent a somewhat bi-modal length distribution with short
176 // names for symbols and long(ish) ones for messages that get printed.
177 
178 
179 // I will have a byte-coded instruction set that describes a structure that
180 // is to be constructed. It will be based on having a 3-bit opcode in each
181 // byte
182 
183 static bool descend_symbols = true;
184 
185 #define SER_OPBITS         0xe0     // top 3 bits of byte are major opcode
186 
187 
188 // 64 most recent shared items in a single byte.
189 #define SER_BACKREF0       0x00     // reference to item 1 to 32 ago
190 #define SER_BACKREF1       0x20     // reference to item 33 to 64 ago
191 
192 // I expect strings to be important enough that at least short ones have
193 // special treatment. The length-code here will stand for 1-32 not 0-31.
194 #define SER_STRING         0x40     // a string with 1-32 bytes
195 
196 // very small integers perhaps also deserve help.
197 #define SER_FIXNUM         0x60     // integer -16 to +15
198 
199 // In CSL header words have a 7-bit field that identifies the type
200 // of the object. Two bits there discriminate between bit-vectors, vectors
201 // holding lists, vectors holding binary data and anything else. I can fit in
202 // 5 bits here so to simplify coding I will use that as just a copy of the
203 // remaining 5 bits from the type field in the two important classes of
204 // vector.  Bit-vectors will need to be dealt with otherwise. These cases
205 // will deal with simple lisp vectors, with bignums and with strings that are
206 // too long for the special SER_STRING case.
207 #define SER_LVECTOR        0x80     // vector holding lists
208 #define SER_BVECTOR        0xa0     // vector holding binary info
209 
210 // SER_LIST has 32 variants, and these are used to build lists of length
211 // 1, 2, 3 or 4 with an arbitrary mix of the cons cells involved being
212 // ones that will be referenced again.
213 // In the explanation here I will use CONS to stand for data that is not
214 // shared and DCONS for one that must be entered into the duplicates table.
215 // It is not clear to me that there is a really nice way to pick the opcode
216 // values here to make either writing or reading these really neat and
217 // clean.
218 // The naming convention I use for the actual opcodes is a prefix of SER_L_,
219 // then "a" for a unique CONS and "A" for one that will be shared, and if the
220 // opcode puts a NIL on the end of the list that is it, if it adds on more
221 // data (as for instance like (LIST* a b c tail) there will be a final _S.
222 //
223 // I introduce these because I expect that in many Lisp contexts that
224 // a substantial proportion of lists will be rather short, so for instance
225 // using these a functon call (f a b c) will be rendered as
226 //           L_aaaa f a b c                  (ideally 5 bytes)
227 // whereas with my original scheme it would have been
228 //           CONS f CONS a CONS b CONS c NIL (ideally 9 bytes)
229 // and (f) will now be L_a_S f (2 bytes) while it used to be CONS f nil (3).
230 // A serious fraction of the space used in a bootstrapreduce image file
231 // is saved function definitions, and they are certainly heavy on lists
232 // of length 1, 2, 3 and 4, and so should benefit. Of course the saving will
233 // in gebneral not be as good as the above best cases indicate, and the net
234 // saving aftger zlib has done some compression may be even less impressive!
235 
236 #define SER_LIST           0xc0
237 
238 // Cases that involve a single CONS cell, which may or may not be shared.
239 #define SER_L_a            0xc0     // (CONS a nil)
240 #define SER_CONS_NIL       SER_L_a  //   alternative name!
241 #define SER_L_A            0xc1     // (DCONS a nil)
242 #define SER_CONS_DUP_NIL   SER_L_A
243 #define SER_L_a_S          0xc2     // (CONS b a)
244 #define SER_CONS           SER_L_a_S
245 #define SER_L_A_S          0xc3     // (DCONS b a)
246 #define SER_CONS_DUP       SER_L_A_S
247 // Cases with two CONS cells, each of which may or may not be shared.
248 #define SER_L_aa           0xc4     // (CONS b (CONS a nil))
249 #define SER_L_Aa           0xc5     // (DCONS b (CONS a nil))
250 #define SER_L_aA           0xc6     // (CONS b (DCONS a nil))
251 #define SER_L_AA           0xc7     // (DCONS b (DCONS a nil))
252 #define SER_L_aa_S         0xc8     // (CONS c (CONS b a))
253 #define SER_L_Aa_S         0xc9     // (DCONS c (CONS b a))
254 #define SER_L_aA_S         0xca     // (CONS c (DCONS b a))
255 #define SER_L_AA_S         0xcb     // (DCONS c (DCONS b a))
256 // For cases that create 4 CONS cells I will only allow for the very first
257 // one to be shared. I expect this case to apply if I have a really
258 // long list because I rather expect that only the head of the list stands
259 // much change of having multiple references to it. This case will
260 // also cope nicely with re-loading the source form of function calls
261 // (f c b a) with 3 arguments. If there is sharing on other than the
262 // first item I will need to use one of the opcodes that create 3 CONS cells
263 // to build the front of the list.
264 #define SER_L_aaaa         0xcc     // (CONS d (CONS c (CONS b (CONS a nil))
265 #define SER_L_Aaaa         0xcd     // (DCONS d (CONS c (CONS b (CONS a nil))
266 #define SER_L_aaaa_S       0xce     // (CONS e (CONS d (CONS c (CONS b a))
267 #define SER_L_Aaaa_S       0xcf     // (DCONS e (CONS d (CONS c (CONS b a))
268 // Lots of cases with 3 CONS cells, as in (list c b a) and (SER_L* d c b a)
269 // again with full control over whether the cells are shared or not.
270 #define SER_L_aaa          0xd0     // (CONS c (CONS b (CONS a nil))
271 #define SER_L_Aaa          0xd1     // (DCONS c (CONS b (CONS a nil))
272 #define SER_L_aAa          0xd2     // (CONS c (DCONS b (CONS a nil))
273 #define SER_L_AAa          0xd3     // (DCONS c (DCONS b (CONS a nil))
274 #define SER_L_aaA          0xd4     // (CONS c (CONS b (DCONS a nil))
275 #define SER_L_AaA          0xd5     // (DCONS c (CONS b (DCONS a nil))
276 #define SER_L_aAA          0xd6     // (CONS c (DCONS b (DCONS a nil))
277 #define SER_L_AAA          0xd7     // (DCONS c (DCONS b (DCONS a nil))
278 #define SER_L_aaa_S        0xd8     // (CONS d (CONS c (CONS b a))
279 #define SER_L_Aaa_S        0xd9     // (DCONS c (CONS b (CONS a nil))
280 #define SER_L_aAa_S        0xda     // (CONS d (DCONS c (CONS b a))
281 #define SER_L_AAa_S        0xdb     // (DCONS c (DCONS b (CONS a nil))
282 #define SER_L_aaA_S        0xdc     // (CONS d (CONS c (DCONS b a))
283 #define SER_L_AaA_S        0xdd     // (DCONS c (CONS b (DCONS a nil))
284 #define SER_L_aAA_S        0xde     // (CONS d (DCONS c (DCONS b a))
285 #define SER_L_AAA_S        0xdf     // (DCONS c (DCONS b (DCONS a nil))
286 
287 // SER_VARIOUS is used for a collection of individual bytes-codes
288 // that cover symbols, floating point values, characters and bit-vectors.
289 // There are also codes to support large values where the other dedicated
290 // opcodes provide optimised support for small ones, and control bytes
291 // such as DUP and END. At present I have 12 values unallocated so those can
292 // de deployed if I find further features that I need or that would represent
293 // really useful optimisations.
294 
295 #define SER_VARIOUS        0xe0
296 
297 // In heap image mode I use the two "RAWSYMBOL" codes, while in FASL file
298 // mode I use SYMBOl & GENSYM. I could common-up the opcodes and so give
299 // myself two more spare codes!
300 #define   SER_RAWSYMBOL    0xe0    // a symbol
301 #define   SER_DUPRAWSYMBOL 0xe1    // a symbol, but will be referenced again
302 #define   SER_SYMBOL       0xe2    // a symbol, but intern it as you read
303 #define   SER_DUPSYMBOL    0xe3    // as above, but will be referenced again
304 #define   SER_GENSYM       0xe4    // a gensym
305 #define   SER_DUPGENSYM    0xe5    // a gensym that will be referenced again
306 #define   SER_BIGBACKREF   0xe6    // reference more than 64 items ago
307 #define   SER_POSFIXNUM    0xe7    // positive (or unsigned) 64-bit integer
308 #define   SER_NEGFIXNUM    0xe8    // negative integer up to 63 bits
309 #define   SER_FLOAT28      0xe9    // short float
310 #define   SER_FLOAT32      0xea    // single float
311 #define   SER_FLOAT64      0xeb    // double float
312 #define   SER_FLOAT128     0xec    // long float
313 #define   SER_CHARSPID     0xed    // char object, "special identifier" etc
314 #define   SER_DUP          0xee    // used with items that have multiple references
315 #define   SER_BITVEC       0xef    // bit-vector
316 #define   SER_NIL          0xf0    // the very special case of NIL
317 #define   SER_END          0xf1    // a (redundant) marker for end of heap dump
318 #define   SER_OPNEXT       0xf2    // for debugging
319 #define   SER_NIL2         0xf3    // NIL NIL
320 #define   SER_NIL3         0xf4    // NIL NIL NIL
321 #define   SER_REPEAT       0xf5    // provides simple run-length coding
322 #define   SER_spare_f6     0xf6
323 #define   SER_spare_f7     0xf7
324 #define   SER_spare_f8     0xf8
325 #define   SER_spare_f9     0xf9
326 #define   SER_spare_fa     0xfa
327 #define   SER_spare_fb     0xab
328 #define   SER_spare_fc     0xfc
329 #define   SER_spare_fd     0xfd
330 #define   SER_spare_fe     0xfe
331 
332 // I make the byte 0xff illegal so that if I get EOF (which is traditionally
333 // the value -1) and mask it to 8 bits I will see a complaint.
334 #define   SER_ILLEGAL      0xff
335 
336 
337 #ifdef DEBUG_SERIALIZE
338 
339 static const char *ser_opnames[] =
340 {   "BACKREF0",
341     "BACKREF1",
342     "STRING",
343     "FIXNUM",
344     "LVECTOR",
345     "BVECTOR",
346     "list",
347     "various"
348 };
349 
350 static const char *ser_list_names[] =
351 {   "L_a       (CONS a nil)",
352     "L_A       (DCONS a nil)",
353     "L_a_S     (CONS b a)",
354     "L_A_S     (DCONS b a)",
355     "L_aa      (CONS b (CONS a nil))",
356     "L_Aa      (DCONS b (CONS a nil))",
357     "L_aA      (CONS b (DCONS a nil))",
358     "L_AA      (DCONS b (DCONS a nil))",
359     "L_aa_S    (CONS c (CONS b a))",
360     "L_Aa_S    (DCONS c (CONS b a))",
361     "L_aA_S    (CONS c (DCONS b a))",
362     "L_AA_S    (DCONS c (DCONS b a))",
363     "L_aaaa    (CONS d (CONS c (CONS b (CONS a nil))",
364     "L_Aaaa    (DCONS d (CONS c (CONS b (CONS a nil))",
365     "L_aaaa_S  (CONS e (CONS d (CONS c (CONS b a))",
366     "L_Aaaa_S  (DCONS e (CONS d (CONS c (CONS b a))",
367     "L_aaa     (CONS c (CONS b (CONS a nil))",
368     "L_Aaa     (DCONS c (CONS b (CONS a nil))",
369     "L_aAa     (CONS c (DCONS b (CONS a nil))",
370     "L_AAa     (DCONS c (DCONS b (CONS a nil))",
371     "L_aaA     (CONS c (CONS b (DCONS a nil))",
372     "L_AaA     (DCONS c (CONS b (DCONS a nil))",
373     "L_aAA     (CONS c (DCONS b (DCONS a nil))",
374     "L_AAA     (DCONS c (DCONS b (DCONS a nil))",
375     "L_aaa_S   (CONS d (CONS c (CONS b a))",
376     "L_Aaa_S   (DCONS c (CONS b (CONS a nil))",
377     "L_aAa_S   (CONS d (DCONS c (CONS b a))",
378     "L_AAa_S   (DCONS c (DCONS b (CONS a nil))",
379     "L_aaA_S   (CONS d (CONS c (DCONS b a))",
380     "L_AaA_S   (DCONS c (CONS b (DCONS a nil))",
381     "L_aAA_S   (CONS d (DCONS c (DCONS b a))",
382     "L_AAA_S   (DCONS c (DCONS b (DCONS a nil))"
383 };
384 
385 static const char *ser_various_names[] =
386 {   "RAWSYMBOL",    // only used in image files
387     "DUPRAWSYMBOL", // only used in image files
388     "SYMBOL",       // only in FASL files
389     "DUPSYMBOL",    // only in FASL files
390     "GENSYM",       // only in FASL files
391     "DUPGENSYM",    // only in FASL files
392     "BIGBACKREF",
393     "POSFIXNUM",
394     "NEGFIXNUM",
395     "FLOAT28",
396     "FLOAT32",
397     "FLOAT64",
398     "FLOAT128",
399     "CHARSPID",
400     "DUP",
401     "BITVEC",
402     "NIL",
403     "END",
404     "OPNEXT",  // only used while debugging
405     "NIL2",
406     "NIL3",
407     "REPEAT",
408     "op16",    // spare
409     "op17",    // spare
410     "op18",    // spare
411     "op19",    // spare
412     "op1a",    // spare
413     "op1b",    // spare
414     "op1c",    // spare
415     "op1d",    // spare
416     "op1e",    // spare
417     "ILLEGAL"  // so that EOF is illegal
418 };
419 
ser_print_opname(int n)420 static void ser_print_opname(int n)
421 {   int top = (n >> 5) & 0x7;
422     if (top == (SER_VARIOUS>>5))
423         std::fprintf(stderr, "%s", ser_various_names[n & 0x1f]);
424     else if (top == (SER_LIST>>5))
425         std::fprintf(stderr, "%s", ser_list_names[n & 0x1f]);
426     else std::fprintf(stderr, "%s %d", ser_opnames[top], n & 0x1f);
427 }
428 
429 #endif // DEBUG_SERIALIZE
430 
431 // For a full Reduce image there are around 7000 items that have multiple
432 // references to them, but my code makes the tables that I use expand as
433 // necessary.
434 //
435 // I ought to think about garbage collection safety here. Well I will set
436 // a rule that garbage collection is not allowed to happen while a heap
437 // is being written, and only the writing thread is allowed to be active.
438 // That allows me to (temporarily) scramble data using pointer-reversal
439 // as I traverse the structures that I am dumping, and that in turn means I
440 // only use bounded stack space. I will have restored all structures by
441 // the end of writing an image.
442 //
443 // During reading I will allow garbage collection to happen.
444 // I rather do not expect it to when re-loading a fresh heap-image,
445 // but the code here can also be used in the middle of running
446 // perfectly ordinary code to serialize data so it is stored compactly
447 // on disc. Indeed this may end up replacing the previous "fasl" format
448 // that I had.
449 
450 // I will need a hash table that records information about items in the
451 // heap that are visited several times. I use the one from inthash.cpp.
452 
453 // I have a scheme where if some opcode is used many times in a row
454 // I can put a SER_REPEAT prefix. I detect and handle such cases using a
455 // peep-hole optimiser here. I put declarations here so I can refer to
456 // the variables even though the code that does interesting stuff with
457 // them is lower down.
458 
459 static int delayed_byte = -1;
460 static uint64_t delayed_count = 0, delayed_arg = 0;
461 static bool delayed_has_arg = false;
462 static char delayed_message[80];
463 
464 static inthash repeat_hash;
465 LispObject *repeat_heap = nullptr;
466 size_t repeat_heap_size = 0, repeat_count = 0;
467 
468 class TidyRepeatHeap
469 {
470 public:
~TidyRepeatHeap()471     ~TidyRepeatHeap()
472     {   if (repeat_heap != nullptr) delete [] repeat_heap;
473         repeat_heap = nullptr;
474     }
475 };
476 static TidyRepeatHeap tidyRepeatHeap;
477 
478 
reader_setup_repeats(size_t n)479 void reader_setup_repeats(size_t n)
480 {   if (n == 0) n = 1;
481     if (repeat_heap == nullptr || repeat_heap_size < n)
482     {   repeat_heap_size = n;
483         if (repeat_heap != nullptr) delete [] repeat_heap;
484         repeat_heap = new (std::nothrow) LispObject[n+1];
485     }
486     if (repeat_heap == nullptr)
487     {   std::fprintf(stderr, "\n+++ unable to allocate repeat heap\n");
488         my_abort("repeat heap");
489     }
490     repeat_count = 0;
491 // I fill the vector with fixnum_of_int(0) so it is GC safe.
492     for (size_t i=0; i<repeat_heap_size; i++)
493         repeat_heap[i] = fixnum_of_int(0);
494 }
495 
writer_setup_repeats()496 void writer_setup_repeats()
497 {   if (repeat_heap == nullptr || repeat_heap_size < repeat_hash.count)
498     {   repeat_heap_size = repeat_hash.count;
499         if (repeat_heap != nullptr) delete [] repeat_heap;
500         repeat_heap = new (std::nothrow) LispObject[repeat_heap_size+1];
501     }
502     if (repeat_heap == nullptr)
503     {   std::fprintf(stderr, "\n+++ unable to allocate repeat heap\n");
504         my_abort("run out of memory");
505     }
506     repeat_count = 0;
507     for (size_t i=0; i<=repeat_heap_size; i++)
508         repeat_heap[i] = fixnum_of_int(0);
509 // I will call this before generating any output bytes, and so here is a
510 // convenient place to prime the peephole optimiser.
511     delayed_byte = -1;
512     delayed_count = 0;
513 }
514 
515 // Given an index 1, 2, ... find the item that was referred to recently
516 // that is identified by that index, and apply a move to the front
517 // process that should lead to a significant proportion of these accesses
518 // being in the range 1..64.
519 // NOTE that index value zero is not used.
520 //
521 // The implementation here uses a binary heap as a priority queue
522 // so that in there are N items in it bringing something to the top
523 // costs log(n) steps. This is intended to be a compromise between
524 // a scheme that brings each referenced item to the front demoting
525 // all other items one place (and having linear cost per access) and
526 // not using move-to-front at all.
527 
reader_repeat_old(size_t n)528 LispObject reader_repeat_old(size_t n)
529 {   if (n == 1) return repeat_heap[1];
530     LispObject w;
531     for (;;)
532     {   size_t n2 = n/2;           // parent in binary heap
533         w = repeat_heap[n];
534         repeat_heap[n] = repeat_heap[n2];
535         repeat_heap[n2] = w;
536         if (n2 == 1) break;  // item has been moved to front
537         n = n2;
538     }
539     return w;
540 }
541 
542 // Given an item that is now seen for the first time but that it is known
543 // will be referenced again later, add it to the repeat table and if
544 // it falls beyond the first 64 bring it to the first position.
545 
reader_repeat_new(LispObject x)546 LispObject reader_repeat_new(LispObject x)
547 {   repeat_heap[++repeat_count] = x;
548     return reader_repeat_old(repeat_count);
549 }
550 
551 // There are two phases involved when writing out data. One merely
552 // inserts items into the hash table of repeats and makes it possible
553 // to check for this. This is needed because until all data has
554 // been scanned one can not tell if there will be a second reference to
555 // an object that is processed early, but when it actually comes
556 // ro writing out data that early mention must arrange to remember
557 // the item for its later re-use.
558 
559 // Given the location in the repeats hash table of an item that I want to
560 // dump, work out where it will live. Note that repeat_count will be zero
561 // at the start of dumping an image, and that the updating of repeat_heap
562 // here must match that done in the reading code. I return the location that
563 // the item was before the move-to-front operation. I can use this when a
564 // new item is to be entered in the repeat heap.
565 
find_index_in_repeats(size_t h)566 size_t find_index_in_repeats(size_t h)
567 {   size_t n = hash_get_value(&repeat_hash, h);
568 // if n == 0 then this is the first time we have seen this item. So it
569 // needs to be inserted into repeat_hash.
570     if (n == 0)
571     {   n = ++repeat_count;
572         repeat_heap[n] = h;
573         hash_set_value(&repeat_hash, h, n);
574     }
575     if (n == 1) return 1;
576 // I now need to perform the same move-to-top operation that will be performed
577 // during reading. But as I do so I will need to update values in the
578 // repeat_hash so that I can still find the moved items.
579     h = n;
580     for (;;)
581     {   size_t n2 = n/2;           // parent in binary heap
582         LispObject w = repeat_heap[n];
583         repeat_heap[n] = repeat_heap[n2];
584         hash_set_value(&repeat_hash, repeat_heap[n2],  n);
585         repeat_heap[n2] = w;
586         hash_set_value(&repeat_hash, w, n2);
587         if (n2 == 1) break;  // item has been moved to front
588         n = n2;
589     }
590     return h;
591 }
592 
read_opcode_byte()593 int read_opcode_byte()
594 {   int r;
595 #if defined DEBUG_SERIALIZE && defined DEBUG_OPNEXT
596 // In this case each serialization opcode is preceeded by SER_OPNEXT. This
597 // should mean that if anything gets out of step because serialzation
598 // data is malformed that this is noticed and reported promptly. Such failures
599 // either reflext internal inconsistency between the serialization read and
600 // write code or some corruption of data after writing but before reading.
601     r = Zgetc() & 0xff;
602     std::fprintf(stderr, "Read %d = %.2x ", r, r);
603     if (r != SER_OPNEXT)
604     {   std::fprintf(stderr, "\nExpected OPNEXT but did not find it\n");
605         my_abort("bad bytecodes");
606     }
607     else std::fprintf(stderr, "SER_OPNEXT\n");
608 #endif // DEBUG_SERIALIZE & DEBUG_OPNEXT
609     r = Zgetc() & 0xff;
610 #ifdef DEBUG_SERIALIZE
611     std::fprintf(stderr, "Read %d = %.2x ", r, r);
612     ser_print_opname(r);
613     std::fprintf(stderr, "\n");
614 #endif // DEBUG_SERIALIZE
615     return r;
616 }
617 
read_data_byte()618 int read_data_byte()
619 {   int r;
620     r = Zgetc() & 0xff;
621 #ifdef DEBUG_SERIALIZE
622     std::fprintf(stderr, "Read %d = %.2x\n", r, r);
623 #endif // DEBUG_SERIALIZE
624     return r;
625 }
626 
read_string_byte()627 int read_string_byte()
628 {   int r;
629     r = Zgetc() & 0xff;
630 #ifdef DEBUG_SERIALIZE
631     std::fprintf(stderr, "Read %d = %.2x ", r, r);
632     if (0x20 <= r && r <= 0x7f) std::fprintf(stderr, " = '%c'", r);
633     std::fprintf(stderr, "\n");
634 #endif // DEBUG_SERIALIZE
635     return r;
636 }
637 
638 
639 // If DEBUG_SERIALIZE is set I will arrange that the dumping code can print
640 // out a human-readable transcript of what there is.
641 // Arranging to set up and pass down the strings that form parts of this
642 // will have costs, so I will try to pass dummy data in the production
643 // version. That will either be a fixed string or a reference to a character
644 // array that has been left uninitialized.
645 
646 extern void write_u64(uint64_t n);
647 extern void write_opcode(int byte, const char *msg, ...);
648 
write_delayed(int byte,const char * msg,...)649 void write_delayed(int byte, const char *msg, ...)
650 {
651 // If the new byte matches a previously delayed one I increment the
652 // repeat count. If it does not then I must flush any pending repeated
653 // sequence before starting this new one.
654     if (byte == delayed_byte) delayed_count++;
655     else
656     {   if (delayed_byte != -1) write_opcode(-1, "flushing delayed");
657 #ifdef DEBUG_SERIALIZE
658         std::va_list a;
659         va_start(a, msg);
660         std::vsprintf(delayed_message, msg, a);
661         va_end(a);
662 #endif // DEBUG_SERIALIZE
663         delayed_byte = byte;
664         delayed_has_arg = false;
665         delayed_count = 1;
666     }
667 }
668 
write_delayed_with_arg(int byte,uint64_t arg,const char * msg,...)669 void write_delayed_with_arg(int byte, uint64_t arg, const char *msg, ...)
670 {
671     if (byte == delayed_byte &&
672         arg == delayed_arg) delayed_count++;
673     else
674     {   if (delayed_byte != -1) write_opcode(-1, "flushing delayed");
675 #ifdef DEBUG_SERIALIZE
676         std::va_list a;
677         va_start(a, msg);
678         std::vsprintf(delayed_message, msg, a);
679         va_end(a);
680 #endif // DEBUG_SERIALIZE
681         delayed_byte = byte;
682         delayed_arg = arg;
683         delayed_has_arg = true;
684         delayed_count = 1;
685     }
686 }
687 
write_opcode(int byte,const char * msg,...)688 void write_opcode(int byte, const char *msg, ...)
689 {
690 // If I have something pending I need to write it out. If there is a long
691 // enough run I will use the SER_REPEAT prefix, and that becomes worthwhile
692 // slightly sooner for cases that take an operand. If that is not
693 // useful I will just emit the pending material 1, 2 or 3 times in a
694 // simple manner.
695     if (delayed_count != 0)
696     {   uint64_t n = delayed_count;
697         int b = delayed_byte;
698         delayed_count = 0;
699 // If I called write_opcode from here before resetting delayed_count to
700 // zero that would generate an infinite recursion! Also the recursive calls
701 // to write_opcode here clobber delayed_byte, which is why I just captured
702 // it into the variable b.
703         if (delayed_has_arg)
704         {   if (n >= 3)
705             {   write_opcode(SER_REPEAT, "repeat %" PRIu64, n);
706                 write_u64(n-3);
707                 write_opcode(b, delayed_message);
708                 write_u64(delayed_arg);
709             }
710             else for (uint64_t i=0; i<n; i++)
711             {   write_opcode(b, delayed_message);
712                 write_u64(delayed_arg);
713             }
714             delayed_has_arg = false;
715             delayed_arg = 0;
716         }
717         else
718         {   if (n >= 4)
719             {   write_opcode(SER_REPEAT, "repeat %" PRIu64, n);
720                 write_u64(n-3);
721                 write_opcode(b, delayed_message);
722             }
723 // I view NIL as special enough that I provide single bytes for even short
724 // runs.
725             else if (n < 4 && b == SER_NIL)
726             {   if (n == 1) write_opcode(SER_NIL, "NIL");
727                 else if (n == 2) write_opcode(SER_NIL2, "NIL NIL");
728                 else if (n == 3) write_opcode(SER_NIL3, "NIL NIL NIL");
729                 else my_abort("serialization failure");
730             }
731             else for (uint64_t i=0; i<n; i++)
732                 write_opcode(b, delayed_message);
733         }
734     }
735 // Arrange that the peephole scheme is tidy.
736     delayed_byte = -1;
737     delayed_count = 0;
738 // Sometimes I just want to call this to flush the delay. I can do that
739 // by using the otherwise illegal value -1 as an opcode.
740     if (byte == -1) return;
741 #ifdef DEBUG_SERIALIZE
742 #ifdef DEBUG_OPNEXT
743 // In cases of extreme debugging I could be worried that the byte-stream
744 // gets out of sync. To detect such cases I can define DEBUG_OPNEXT and that
745 // puts an extra SER_OPNEXT byte just before every opcode byte (but not
746 // before data bytes). So if you are reading a stream and are expecting
747 // an opcode then you should see this particular prefix byte first. Inserting
748 // these extra bytes adds to the bulk of the stream (quite badly) so it is
749 // not done in a production system.
750     std::fprintf(stderr, "<opcode prefix> %.2x\n", SER_OPNEXT);
751     Zputc(SER_OPNEXT);
752 #endif // DEBUG_OPNEXT
753     std::fprintf(stderr, "%.2x: ", byte & 0xff);
754     std::va_list a;
755     va_start(a, msg);
756     std::vfprintf(stderr, msg, a);
757     std::fprintf(stderr, " : ");
758     ser_print_opname(byte);
759     std::fprintf(stderr, "\n");
760     va_end(a);
761 #endif // DEBUG_SERIALIZE
762     Zputc(byte);
763 }
764 
write_byte(int byte,const char * msg,...)765 void write_byte(int byte, const char *msg, ...)
766 {
767 #ifdef DEBUG_SERIALIZE
768     std::va_list a;
769     std::fprintf(stderr, "%.2x: ", byte & 0xff);
770     va_start(a, msg);
771     std::vfprintf(stderr, msg, a);
772     std::printf("\n");
773     va_end(a);
774 #endif // DEBUG_SERIALIZE
775     Zputc(byte);
776 }
777 
778 // This reads from 1 to 9 bytes in a variable length encoding to make up an
779 // unsigned 64-bit value. The bytes represent the number most significant bits
780 // first, and if there are 1-8 of them the last one has its top bit set, but
781 // all the leading ones have their top bits clear. If there are 8 bytes all
782 // of which have their top bit zero then the final byte is treated as a full
783 // 8 bits.
784 
read_u64()785 uint64_t read_u64()
786 {   uint64_t r = 0;
787     int b, i;
788     for (i=0; i<8; i++)
789     {   if (((b = read_data_byte()) & 0x80) != 0)
790             return (r << 7)  | (b & 0x7f);
791         r = (r << 7) | b;
792     }
793     return (r << 8) | read_data_byte();
794 }
795 
796 // Write a 64-bit unsigned value in a format compatible with read_u64()
797 
write_u64(uint64_t n)798 void write_u64(uint64_t n)
799 {   char msg[40];
800     if (n == (n & 0x7f))
801     {
802 #ifdef DEBUG_SERIALIZE
803         std::sprintf(msg, "small int %#.2x = %d", static_cast<int>(n), static_cast<int>(n));
804 #endif // DEBUG_SERIALIZE
805         write_byte(n | 0x80, msg);
806         return;
807     }
808     int final = 7;
809     bool any = false;
810 // There is a case here that has caught me out.
811 // A number with 57 bits (eg 0x01aabbbbccccdddd) needs 9 bytes to encode it,
812 // and these will be used for 7+7+7+7+7+7+7+8 bits apiece. Thus the leading
813 // byte will be all zero (because the final byte will use all 8 of its bits).
814 // So schemes that otherwise discard leading zeros must not do so in this
815 // particular situation!
816     if ((n & UINT64_C(0xff00000000000000)) != 0)
817     {   final = 8;
818         any = true;
819     }
820     for (int i=0; i<8; i++)
821     {   int b = (n >> (7*(7-i)+final)) & 0x7f;
822         if (any || (b != 0))
823         {   any = true;
824 #ifdef DEBUG_SERIALIZE
825             std::sprintf(msg, "%#" PRIx64, ((uint64_t)b) << (7*(7-i)+final));
826 #endif // DEBUG_SERIALIZE
827             write_byte(b, msg);
828         }
829     }
830     if (final == 7)
831     {
832 #ifdef DEBUG_SERIALIZE
833         std::sprintf(msg, "%#.2x = %" PRIu64, static_cast<int>(n) & 0x7f, n);
834 #endif // DEBUG_SERIALIZE
835         write_byte(0x80 | (n & 0x7f), msg);
836     }
837     else
838     {
839 #ifdef DEBUG_SERIALIZE
840         std::sprintf(msg, "%#.2x = %" PRIu64, static_cast<int>(n) & 0xff, n);
841 #endif // DEBUG_SERIALIZE
842         write_byte(n & 0xff, msg);
843     }
844 }
845 
846 // Note that the type-punning used here (even as against an array of char)
847 // seems to go beyond what C++ guarantees to support. I believe that at
848 // least at present gcc guarantees to treat it in a way where those of an
849 // old-fashioned recollection of "the spirit of C" will not be upset. So
850 // I hope that this will work.
851 
852 typedef union _float32u
853 {   char i[4];
854     float f;
855 } float32u;
856 
857 // softfloat.h will have defined LITTLEENDIAN in the case that applies
858 // for (eg) Intel, and not for the case of Sun/Sparc.
859 
read_f32()860 float read_f32()
861 {   float32u u;
862 #ifdef LITTLEENDIAN
863     u.i[0] = read_data_byte();
864     u.i[1] = read_data_byte();
865     u.i[2] = read_data_byte();
866     u.i[3] = read_data_byte();
867 #else
868     u.i[3] = read_data_byte();
869     u.i[2] = read_data_byte();
870     u.i[1] = read_data_byte();
871     u.i[0] = read_data_byte();
872 #endif
873     return u.f;
874 }
875 
write_f32(double f)876 void write_f32(double f)
877 {   float32u u;
878     u.f = f;
879 #ifdef LITTLEENDIAN
880     write_byte(u.i[0], "part of float");
881     write_byte(u.i[1], "part of float");
882     write_byte(u.i[2], "part of float");
883     write_byte(u.i[3], "part of float");
884 #else
885     write_byte(u.i[3], "part of float");
886     write_byte(u.i[2], "part of float");
887     write_byte(u.i[1], "part of float");
888     write_byte(u.i[0], "part of float");
889 #endif
890 }
891 
892 typedef union _float64u
893 {   char i[8];
894     double f;
895 } float64u;
896 
read_f64()897 double read_f64()
898 {   float64u u;
899 #ifdef LITTLEENDIAN
900     u.i[0] = read_data_byte();
901     u.i[1] = read_data_byte();
902     u.i[2] = read_data_byte();
903     u.i[3] = read_data_byte();
904     u.i[4] = read_data_byte();
905     u.i[5] = read_data_byte();
906     u.i[6] = read_data_byte();
907     u.i[7] = read_data_byte();
908 #else
909     u.i[7] = read_data_byte();
910     u.i[6] = read_data_byte();
911     u.i[5] = read_data_byte();
912     u.i[4] = read_data_byte();
913     u.i[3] = read_data_byte();
914     u.i[2] = read_data_byte();
915     u.i[1] = read_data_byte();
916     u.i[0] = read_data_byte();
917 #endif
918     return u.f;
919 }
920 
write_f64(double f)921 void write_f64(double f)
922 {   float64u u;
923     u.f = f;
924 #ifdef LITTLEENDIAN
925     write_byte(u.i[0], "part of double");
926     write_byte(u.i[1], "part of double");
927     write_byte(u.i[2], "part of double");
928     write_byte(u.i[3], "part of double");
929     write_byte(u.i[4], "part of double");
930     write_byte(u.i[5], "part of double");
931     write_byte(u.i[6], "part of double");
932     write_byte(u.i[7], "part of double");
933 #else
934     write_byte(u.i[7], "part of double");
935     write_byte(u.i[6], "part of double");
936     write_byte(u.i[5], "part of double");
937     write_byte(u.i[4], "part of double");
938     write_byte(u.i[3], "part of double");
939     write_byte(u.i[2], "part of double");
940     write_byte(u.i[1], "part of double");
941     write_byte(u.i[0], "part of double");
942 #endif
943 }
944 
945 
946 #ifdef HAVE_SOFTFLOAT
read_f128()947 float128_t read_f128()
948 {   float128_t r;
949 #ifdef LITTLEENDIAN
950     r.v[0] = read_u64();
951     r.v[1] = read_u64();
952 #else
953     r.v[1] = read_u64();
954     r.v[0] = read_u64();
955 #endif
956     return r;
957 }
958 
write_f128(float128_t f)959 void write_f128(float128_t f)
960 {
961 #ifdef LITTLEENDIAN
962     write_u64(f.v[0]);
963     write_u64(f.v[1]);
964 #else
965     write_u64(f.v[1]);
966     write_u64(f.v[0]);
967 #endif
968 }
969 #endif // HAVE_SOFTFLOAT
970 
971 // At times I need to read and write values that are the entrypoints of
972 // functions that are defined in the kernel. I do this by referring back to
973 // the tables that originally set them up. A bad feature of the scheme I use
974 // here is that it makes serialized files that refer to functions specific
975 // to the particular ordering etc in those tables. If changes have been
976 // bade since the data was written things could fail. I will deal with that
977 // by using a (simple) checksum on the tables and insisting it matches as
978 // between reader and writer. The checksum is a 64-bit CRC - here is some
979 // code that computes same, starting with its author and copyright
980 // information:
981 
982 /* Redis uses the CRC64 variant with "Jones" coefficients and init value of 0.
983  *
984  * Specification of this CRC64 variant follows:
985  * Name: crc-64-jones
986  * Width: 64 bites
987  * Poly: 0xad93d23594c935a9
988  * Reflected In: True
989  * Xor_In: 0xffffffffffffffff
990  * Reflected_Out: True
991  * Xor_Out: 0x0
992  * Check("123456789"): 0xe9c6d914c4b8d9ca
993  *
994  * Copyright (c) 2012, Salvatore Sanfilippo <antirez at gmail dot com>
995  * All rights reserved.
996  *
997  * Redistribution and use in source and binary forms, with or without
998  * modification, are permitted provided that the following conditions are met:
999  *
1000  *   * Redistributions of source code must retain the above copyright notice,
1001  *     this list of conditions and the following disclaimer.
1002  *   * Redistributions in binary form must reproduce the above copyright
1003  *     notice, this list of conditions and the following disclaimer in the
1004  *     documentation and/or other materials provided with the distribution.
1005  *   * Neither the name of Redis nor the names of its contributors may be used
1006  *     to endorse or promote products derived from this software without
1007  *     specific prior written permission.
1008  *
1009  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
1010  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1011  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1012  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
1013  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1014  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1015  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1016  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
1017  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1018  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1019  * POSSIBILITY OF SUCH DAMAGE. */
1020 
1021 // #define __STDC_CONSTANT_MACROS 1
1022 // #define __STDC_FORMAT_MACROS
1023 // #include <cstdint>
1024 // Code layout adjusted bc ACN (using astyle) plus adaptations to fit C++
1025 // rather than C.
1026 
1027 static const uint64_t crc64_tab[256] =
1028 {   UINT64_C(0x0000000000000000), UINT64_C(0x7ad870c830358979),
1029     UINT64_C(0xf5b0e190606b12f2), UINT64_C(0x8f689158505e9b8b),
1030     UINT64_C(0xc038e5739841b68f), UINT64_C(0xbae095bba8743ff6),
1031     UINT64_C(0x358804e3f82aa47d), UINT64_C(0x4f50742bc81f2d04),
1032     UINT64_C(0xab28ecb46814fe75), UINT64_C(0xd1f09c7c5821770c),
1033     UINT64_C(0x5e980d24087fec87), UINT64_C(0x24407dec384a65fe),
1034     UINT64_C(0x6b1009c7f05548fa), UINT64_C(0x11c8790fc060c183),
1035     UINT64_C(0x9ea0e857903e5a08), UINT64_C(0xe478989fa00bd371),
1036     UINT64_C(0x7d08ff3b88be6f81), UINT64_C(0x07d08ff3b88be6f8),
1037     UINT64_C(0x88b81eabe8d57d73), UINT64_C(0xf2606e63d8e0f40a),
1038     UINT64_C(0xbd301a4810ffd90e), UINT64_C(0xc7e86a8020ca5077),
1039     UINT64_C(0x4880fbd87094cbfc), UINT64_C(0x32588b1040a14285),
1040     UINT64_C(0xd620138fe0aa91f4), UINT64_C(0xacf86347d09f188d),
1041     UINT64_C(0x2390f21f80c18306), UINT64_C(0x594882d7b0f40a7f),
1042     UINT64_C(0x1618f6fc78eb277b), UINT64_C(0x6cc0863448deae02),
1043     UINT64_C(0xe3a8176c18803589), UINT64_C(0x997067a428b5bcf0),
1044     UINT64_C(0xfa11fe77117cdf02), UINT64_C(0x80c98ebf2149567b),
1045     UINT64_C(0x0fa11fe77117cdf0), UINT64_C(0x75796f2f41224489),
1046     UINT64_C(0x3a291b04893d698d), UINT64_C(0x40f16bccb908e0f4),
1047     UINT64_C(0xcf99fa94e9567b7f), UINT64_C(0xb5418a5cd963f206),
1048     UINT64_C(0x513912c379682177), UINT64_C(0x2be1620b495da80e),
1049     UINT64_C(0xa489f35319033385), UINT64_C(0xde51839b2936bafc),
1050     UINT64_C(0x9101f7b0e12997f8), UINT64_C(0xebd98778d11c1e81),
1051     UINT64_C(0x64b116208142850a), UINT64_C(0x1e6966e8b1770c73),
1052     UINT64_C(0x8719014c99c2b083), UINT64_C(0xfdc17184a9f739fa),
1053     UINT64_C(0x72a9e0dcf9a9a271), UINT64_C(0x08719014c99c2b08),
1054     UINT64_C(0x4721e43f0183060c), UINT64_C(0x3df994f731b68f75),
1055     UINT64_C(0xb29105af61e814fe), UINT64_C(0xc849756751dd9d87),
1056     UINT64_C(0x2c31edf8f1d64ef6), UINT64_C(0x56e99d30c1e3c78f),
1057     UINT64_C(0xd9810c6891bd5c04), UINT64_C(0xa3597ca0a188d57d),
1058     UINT64_C(0xec09088b6997f879), UINT64_C(0x96d1784359a27100),
1059     UINT64_C(0x19b9e91b09fcea8b), UINT64_C(0x636199d339c963f2),
1060     UINT64_C(0xdf7adabd7a6e2d6f), UINT64_C(0xa5a2aa754a5ba416),
1061     UINT64_C(0x2aca3b2d1a053f9d), UINT64_C(0x50124be52a30b6e4),
1062     UINT64_C(0x1f423fcee22f9be0), UINT64_C(0x659a4f06d21a1299),
1063     UINT64_C(0xeaf2de5e82448912), UINT64_C(0x902aae96b271006b),
1064     UINT64_C(0x74523609127ad31a), UINT64_C(0x0e8a46c1224f5a63),
1065     UINT64_C(0x81e2d7997211c1e8), UINT64_C(0xfb3aa75142244891),
1066     UINT64_C(0xb46ad37a8a3b6595), UINT64_C(0xceb2a3b2ba0eecec),
1067     UINT64_C(0x41da32eaea507767), UINT64_C(0x3b024222da65fe1e),
1068     UINT64_C(0xa2722586f2d042ee), UINT64_C(0xd8aa554ec2e5cb97),
1069     UINT64_C(0x57c2c41692bb501c), UINT64_C(0x2d1ab4dea28ed965),
1070     UINT64_C(0x624ac0f56a91f461), UINT64_C(0x1892b03d5aa47d18),
1071     UINT64_C(0x97fa21650afae693), UINT64_C(0xed2251ad3acf6fea),
1072     UINT64_C(0x095ac9329ac4bc9b), UINT64_C(0x7382b9faaaf135e2),
1073     UINT64_C(0xfcea28a2faafae69), UINT64_C(0x8632586aca9a2710),
1074     UINT64_C(0xc9622c4102850a14), UINT64_C(0xb3ba5c8932b0836d),
1075     UINT64_C(0x3cd2cdd162ee18e6), UINT64_C(0x460abd1952db919f),
1076     UINT64_C(0x256b24ca6b12f26d), UINT64_C(0x5fb354025b277b14),
1077     UINT64_C(0xd0dbc55a0b79e09f), UINT64_C(0xaa03b5923b4c69e6),
1078     UINT64_C(0xe553c1b9f35344e2), UINT64_C(0x9f8bb171c366cd9b),
1079     UINT64_C(0x10e3202993385610), UINT64_C(0x6a3b50e1a30ddf69),
1080     UINT64_C(0x8e43c87e03060c18), UINT64_C(0xf49bb8b633338561),
1081     UINT64_C(0x7bf329ee636d1eea), UINT64_C(0x012b592653589793),
1082     UINT64_C(0x4e7b2d0d9b47ba97), UINT64_C(0x34a35dc5ab7233ee),
1083     UINT64_C(0xbbcbcc9dfb2ca865), UINT64_C(0xc113bc55cb19211c),
1084     UINT64_C(0x5863dbf1e3ac9dec), UINT64_C(0x22bbab39d3991495),
1085     UINT64_C(0xadd33a6183c78f1e), UINT64_C(0xd70b4aa9b3f20667),
1086     UINT64_C(0x985b3e827bed2b63), UINT64_C(0xe2834e4a4bd8a21a),
1087     UINT64_C(0x6debdf121b863991), UINT64_C(0x1733afda2bb3b0e8),
1088     UINT64_C(0xf34b37458bb86399), UINT64_C(0x8993478dbb8deae0),
1089     UINT64_C(0x06fbd6d5ebd3716b), UINT64_C(0x7c23a61ddbe6f812),
1090     UINT64_C(0x3373d23613f9d516), UINT64_C(0x49aba2fe23cc5c6f),
1091     UINT64_C(0xc6c333a67392c7e4), UINT64_C(0xbc1b436e43a74e9d),
1092     UINT64_C(0x95ac9329ac4bc9b5), UINT64_C(0xef74e3e19c7e40cc),
1093     UINT64_C(0x601c72b9cc20db47), UINT64_C(0x1ac40271fc15523e),
1094     UINT64_C(0x5594765a340a7f3a), UINT64_C(0x2f4c0692043ff643),
1095     UINT64_C(0xa02497ca54616dc8), UINT64_C(0xdafce7026454e4b1),
1096     UINT64_C(0x3e847f9dc45f37c0), UINT64_C(0x445c0f55f46abeb9),
1097     UINT64_C(0xcb349e0da4342532), UINT64_C(0xb1eceec59401ac4b),
1098     UINT64_C(0xfebc9aee5c1e814f), UINT64_C(0x8464ea266c2b0836),
1099     UINT64_C(0x0b0c7b7e3c7593bd), UINT64_C(0x71d40bb60c401ac4),
1100     UINT64_C(0xe8a46c1224f5a634), UINT64_C(0x927c1cda14c02f4d),
1101     UINT64_C(0x1d148d82449eb4c6), UINT64_C(0x67ccfd4a74ab3dbf),
1102     UINT64_C(0x289c8961bcb410bb), UINT64_C(0x5244f9a98c8199c2),
1103     UINT64_C(0xdd2c68f1dcdf0249), UINT64_C(0xa7f41839ecea8b30),
1104     UINT64_C(0x438c80a64ce15841), UINT64_C(0x3954f06e7cd4d138),
1105     UINT64_C(0xb63c61362c8a4ab3), UINT64_C(0xcce411fe1cbfc3ca),
1106     UINT64_C(0x83b465d5d4a0eece), UINT64_C(0xf96c151de49567b7),
1107     UINT64_C(0x76048445b4cbfc3c), UINT64_C(0x0cdcf48d84fe7545),
1108     UINT64_C(0x6fbd6d5ebd3716b7), UINT64_C(0x15651d968d029fce),
1109     UINT64_C(0x9a0d8ccedd5c0445), UINT64_C(0xe0d5fc06ed698d3c),
1110     UINT64_C(0xaf85882d2576a038), UINT64_C(0xd55df8e515432941),
1111     UINT64_C(0x5a3569bd451db2ca), UINT64_C(0x20ed197575283bb3),
1112     UINT64_C(0xc49581ead523e8c2), UINT64_C(0xbe4df122e51661bb),
1113     UINT64_C(0x3125607ab548fa30), UINT64_C(0x4bfd10b2857d7349),
1114     UINT64_C(0x04ad64994d625e4d), UINT64_C(0x7e7514517d57d734),
1115     UINT64_C(0xf11d85092d094cbf), UINT64_C(0x8bc5f5c11d3cc5c6),
1116     UINT64_C(0x12b5926535897936), UINT64_C(0x686de2ad05bcf04f),
1117     UINT64_C(0xe70573f555e26bc4), UINT64_C(0x9ddd033d65d7e2bd),
1118     UINT64_C(0xd28d7716adc8cfb9), UINT64_C(0xa85507de9dfd46c0),
1119     UINT64_C(0x273d9686cda3dd4b), UINT64_C(0x5de5e64efd965432),
1120     UINT64_C(0xb99d7ed15d9d8743), UINT64_C(0xc3450e196da80e3a),
1121     UINT64_C(0x4c2d9f413df695b1), UINT64_C(0x36f5ef890dc31cc8),
1122     UINT64_C(0x79a59ba2c5dc31cc), UINT64_C(0x037deb6af5e9b8b5),
1123     UINT64_C(0x8c157a32a5b7233e), UINT64_C(0xf6cd0afa9582aa47),
1124     UINT64_C(0x4ad64994d625e4da), UINT64_C(0x300e395ce6106da3),
1125     UINT64_C(0xbf66a804b64ef628), UINT64_C(0xc5bed8cc867b7f51),
1126     UINT64_C(0x8aeeace74e645255), UINT64_C(0xf036dc2f7e51db2c),
1127     UINT64_C(0x7f5e4d772e0f40a7), UINT64_C(0x05863dbf1e3ac9de),
1128     UINT64_C(0xe1fea520be311aaf), UINT64_C(0x9b26d5e88e0493d6),
1129     UINT64_C(0x144e44b0de5a085d), UINT64_C(0x6e963478ee6f8124),
1130     UINT64_C(0x21c640532670ac20), UINT64_C(0x5b1e309b16452559),
1131     UINT64_C(0xd476a1c3461bbed2), UINT64_C(0xaeaed10b762e37ab),
1132     UINT64_C(0x37deb6af5e9b8b5b), UINT64_C(0x4d06c6676eae0222),
1133     UINT64_C(0xc26e573f3ef099a9), UINT64_C(0xb8b627f70ec510d0),
1134     UINT64_C(0xf7e653dcc6da3dd4), UINT64_C(0x8d3e2314f6efb4ad),
1135     UINT64_C(0x0256b24ca6b12f26), UINT64_C(0x788ec2849684a65f),
1136     UINT64_C(0x9cf65a1b368f752e), UINT64_C(0xe62e2ad306bafc57),
1137     UINT64_C(0x6946bb8b56e467dc), UINT64_C(0x139ecb4366d1eea5),
1138     UINT64_C(0x5ccebf68aecec3a1), UINT64_C(0x2616cfa09efb4ad8),
1139     UINT64_C(0xa97e5ef8cea5d153), UINT64_C(0xd3a62e30fe90582a),
1140     UINT64_C(0xb0c7b7e3c7593bd8), UINT64_C(0xca1fc72bf76cb2a1),
1141     UINT64_C(0x45775673a732292a), UINT64_C(0x3faf26bb9707a053),
1142     UINT64_C(0x70ff52905f188d57), UINT64_C(0x0a2722586f2d042e),
1143     UINT64_C(0x854fb3003f739fa5), UINT64_C(0xff97c3c80f4616dc),
1144     UINT64_C(0x1bef5b57af4dc5ad), UINT64_C(0x61372b9f9f784cd4),
1145     UINT64_C(0xee5fbac7cf26d75f), UINT64_C(0x9487ca0fff135e26),
1146     UINT64_C(0xdbd7be24370c7322), UINT64_C(0xa10fceec0739fa5b),
1147     UINT64_C(0x2e675fb4576761d0), UINT64_C(0x54bf2f7c6752e8a9),
1148     UINT64_C(0xcdcf48d84fe75459), UINT64_C(0xb71738107fd2dd20),
1149     UINT64_C(0x387fa9482f8c46ab), UINT64_C(0x42a7d9801fb9cfd2),
1150     UINT64_C(0x0df7adabd7a6e2d6), UINT64_C(0x772fdd63e7936baf),
1151     UINT64_C(0xf8474c3bb7cdf024), UINT64_C(0x829f3cf387f8795d),
1152     UINT64_C(0x66e7a46c27f3aa2c), UINT64_C(0x1c3fd4a417c62355),
1153     UINT64_C(0x935745fc4798b8de), UINT64_C(0xe98f353477ad31a7),
1154     UINT64_C(0xa6df411fbfb21ca3), UINT64_C(0xdc0731d78f8795da),
1155     UINT64_C(0x536fa08fdfd90e51), UINT64_C(0x29b7d047efec8728),
1156 };
1157 
crc64(uint64_t crc,const void * buf,size_t size)1158 uint64_t crc64(uint64_t crc, const void *buf, size_t size)
1159 {   const std::uint8_t *p = reinterpret_cast<const std::uint8_t *>(buf);
1160     while (size-- != 0)
1161         crc = crc64_tab[static_cast<std::uint8_t>(crc) ^ *p++] ^ (crc >> 8);
1162     return crc;
1163 }
1164 
1165 /* Test main */
1166 #ifdef TEST_MAIN
1167 
1168 #include <cstdio>
main()1169 int main()
1170 {   std::printf("e9c6d914c4b8d9ca == %016" PRIx64 "\n",
1171                 crc64(0,(const std::uint8_t *)"123456789",9));
1172     return 0;
1173 }
1174 
1175 #endif
1176 
1177 // [End of crc64 source code]
1178 
1179 std::vector<no_args *> codepointers0;
1180 std::vector<one_arg *> codepointers1;
1181 std::vector<two_args *> codepointers2;
1182 std::vector<three_args *> codepointers3;
1183 std::vector<fourup_args *> codepointers4up;
1184 std::unordered_map<no_args *,size_t> codehash0;
1185 std::unordered_map<one_arg *,size_t> codehash1;
1186 std::unordered_map<two_args *,size_t> codehash2;
1187 std::unordered_map<three_args *,size_t> codehash3;
1188 std::unordered_map<fourup_args *,size_t> codehash4up;
1189 
insert_codepointer0(no_args * x)1190 bool insert_codepointer0(no_args *x)
1191 {   if (codehash0.count(x) == 0)
1192     {   codehash0[x] = codepointers0.size();
1193         codepointers0.push_back(x);
1194         return true;
1195     }
1196     else return false;
1197 }
1198 
insert_codepointer1(one_arg * x)1199 bool insert_codepointer1(one_arg *x)
1200 {   if (codehash1.count(x) == 0)
1201     {   codehash1[x] = codepointers1.size();
1202         codepointers1.push_back(x);
1203         return true;
1204     }
1205     else return false;
1206 }
1207 
insert_codepointer2(two_args * x)1208 bool insert_codepointer2(two_args *x)
1209 {   if (codehash2.count(x) == 0)
1210     {   codehash2[x] = codepointers2.size();
1211         codepointers2.push_back(x);
1212         return true;
1213     }
1214     else return false;
1215 }
1216 
insert_codepointer3(three_args * x)1217 bool insert_codepointer3(three_args *x)
1218 {   if (codehash3.count(x) == 0)
1219     {   codehash3[x] = codepointers3.size();
1220         codepointers3.push_back(x);
1221         return true;
1222     }
1223     else return false;
1224 }
1225 
insert_codepointer4up(fourup_args * x)1226 bool insert_codepointer4up(fourup_args *x)
1227 {   if (codehash4up.count(x) == 0)
1228     {   codehash4up[x] = codepointers4up.size();
1229         codepointers4up.push_back(x);
1230         return true;
1231     }
1232     else return false;
1233 }
1234 
use_setup(uint64_t crc,const setup_type * p)1235 uint64_t use_setup(uint64_t crc, const setup_type *p)
1236 {   while (p->name != nullptr)
1237     {   unsigned char n = 0;
1238         if (insert_codepointer0(p->zero)) n += 1;
1239         if (insert_codepointer1(p->one)) n += 2;
1240         if (insert_codepointer2(p->two)) n += 4;
1241         if (insert_codepointer3(p->three)) n += 8;
1242         if (insert_codepointer4up(p->fourup)) n += 16;
1243         crc = crc64(crc, &n, 1);
1244         crc = crc64(crc, p->name, std::strlen(p->name));
1245         p++;
1246     }
1247     return crc;
1248 }
1249 
1250 uint64_t function_crc = 0;
1251 
set_up_function_tables()1252 void set_up_function_tables()
1253 {   uint64_t crc = 0;
1254     codehash0.clear();
1255     codehash1.clear();
1256     codehash2.clear();
1257     codehash3.clear();
1258     codehash4up.clear();
1259     codepointers0.clear();
1260     codepointers1.clear();
1261     codepointers2.clear();
1262     codepointers3.clear();
1263     codepointers4up.clear();
1264 // The code here must find all the function addresses that are built
1265 // into CSL that might legitimately end up within a heap image. The
1266 // code sets up a 64-bit CRC code this is intended to be a signature
1267 // of just what is used, and so can help ensure that a heap image dumped
1268 // buy one system does not get re-loaded by an incompatible one.
1269 // Each entrypoint is allocated a sequence number and everything is
1270 // collected both in a hash table (codehash) that can map code-pointers
1271 // to index values, and a table (codepointers) that is an indexable array
1272 // of the entrypoints. For Reduce there are somewhat under
1273 // 4000 pointers to handle here, so costs are not too severe.
1274     for (entry_point0 *p = &entries_table0[1]; p->p!=nullptr; p++)
1275     {   insert_codepointer0(p->p);
1276         crc = crc64(crc, p->s, std::strlen(p->s));
1277     }
1278     for (entry_point1 *p = &entries_table1[1]; p->p!=nullptr; p++)
1279     {   insert_codepointer1(p->p);
1280         crc = crc64(crc, p->s, std::strlen(p->s));
1281     }
1282     for (entry_point2 *p = &entries_table2[1]; p->p!=nullptr; p++)
1283     {   insert_codepointer2(p->p);
1284         crc = crc64(crc, p->s, std::strlen(p->s));
1285     }
1286     for (entry_point3 *p = &entries_table3[1]; p->p!=nullptr; p++)
1287     {   insert_codepointer3(p->p);
1288         crc = crc64(crc, p->s, std::strlen(p->s));
1289     }
1290     for (entry_point4up *p = &entries_table4up[1]; p->p!=nullptr; p++)
1291     {   insert_codepointer4up(p->p);
1292         crc = crc64(crc, p->s, std::strlen(p->s));
1293     }
1294     for (entry_point1 *p = &entries_tableio[1]; p->p!=nullptr; p++)
1295     {   insert_codepointer1(p->p);
1296         crc = crc64(crc, p->s, std::strlen(p->s));
1297     }
1298     const setup_type **p = setup_tables;
1299     while (*p != nullptr) crc = use_setup(crc, *p++);
1300     p++;  // setup_tables is in two parts, separated by a nullptr.
1301     while (*p != nullptr) crc = use_setup(crc, *p++);
1302 
1303     function_crc = crc;
1304 }
1305 
read_function0()1306 no_args *read_function0()
1307 {   uint64_t handle = read_u64();
1308     if (handle >= codepointers0.size())
1309     {   std::fprintf(stderr, "Invalid code handle read (%" PRIu64 " / %" PRIx64 ")\n",
1310                 handle, handle);
1311         my_abort("bad code handle");
1312     }
1313     return codepointers0[handle];
1314 }
1315 
read_function1()1316 one_arg *read_function1()
1317 {   uint64_t handle = read_u64();
1318     if (handle >= codepointers1.size())
1319     {   std::fprintf(stderr, "Invalid code handle read (%" PRIu64 " / %" PRIx64 ")\n",
1320                 handle, handle);
1321         my_abort("bad code handle");
1322     }
1323     return codepointers1[handle];
1324 }
1325 
read_function2()1326 two_args *read_function2()
1327 {   uint64_t handle = read_u64();
1328     if (handle >= codepointers2.size())
1329     {   std::fprintf(stderr, "Invalid code handle read (%" PRIu64 " / %" PRIx64 ")\n",
1330                 handle, handle);
1331         my_abort("bad code handle");
1332     }
1333     return codepointers2[handle];
1334 }
1335 
read_function3()1336 three_args *read_function3()
1337 {   uint64_t handle = read_u64();
1338     if (handle >= codepointers3.size())
1339     {   std::fprintf(stderr, "Invalid code handle read (%" PRIu64 " / %" PRIx64 ")\n",
1340                 handle, handle);
1341         my_abort("bad code handle");
1342     }
1343     return codepointers3[handle];
1344 }
1345 
read_function4up()1346 fourup_args *read_function4up()
1347 {   uint64_t handle = read_u64();
1348     if (handle >= codepointers4up.size())
1349     {   std::fprintf(stderr, "Invalid code handle read (%" PRIu64 " / %" PRIx64 ")\n",
1350                 handle, handle);
1351         my_abort("bad code handle");
1352     }
1353     return codepointers4up[handle];
1354 }
1355 
write_function0(no_args * p)1356 void write_function0(no_args *p)
1357 {   size_t handle = codehash0[p];
1358     write_u64(handle);
1359 }
1360 
write_function1(one_arg * p)1361 void write_function1(one_arg *p)
1362 {   size_t handle = codehash1[p];
1363     write_u64(handle);
1364 }
1365 
write_function2(two_args * p)1366 void write_function2(two_args *p)
1367 {   size_t handle = codehash2[p];
1368     write_u64(handle);
1369 }
1370 
write_function3(three_args * p)1371 void write_function3(three_args *p)
1372 {   size_t handle = codehash3[p];
1373     write_u64(handle);
1374 }
1375 
write_function4up(fourup_args * p)1376 void write_function4up(fourup_args *p)
1377 {   size_t handle = codehash4up[p];
1378     write_u64(handle);
1379 }
1380 
1381 // In places here I need to find the start of a tagged vector-like
1382 // object which may be tagged with either TAG_VECTOR or TAG_NUMBERS
1383 // or possibly even TAG_FLOAT. This messy macro masks off the tag bits
1384 // and adjusts to allow for the header word at the start of the object.
1385 
1386 #define start_contents(p) (((uintptr_t)(p) & ~(uintptr_t)7) + CELL)
1387 
1388 // In places where the contents of an array will be 64-bit items and I
1389 // am running on a 32-bit machine I need to pad by 32-bits so that the
1390 // object starts | header32 | padding | 64-bit data ... |
1391 
1392 #define start_contents64(p) (((uintptr_t)(p) & ~(uintptr_t)7) + 8)
1393 
1394 // Within serial_read there are a number of places where I have to
1395 // take special care to be garbage-collection safe. This macro encapsulates
1396 // the rather ugly scheme that I use. Note that p is a raw pointer to
1397 // somewhere within the object that pbase identifies (or as a special
1398 // case p=&r and pbase is fixnum_of_int(0)). The protected statement must
1399 // not assign to any of the variables that are stacked here! It will
1400 // usually be a good idea to go "GC_PROTECT(prev = ...);".
1401 //
1402 // When I have a conservative garbage collector this complication will
1403 // become unnecessary!
1404 
1405 #define GC_PROTECT(stmt)                                \
1406     do                                                  \
1407     {   Save save(r, s, pbase, b);                      \
1408         ip = reinterpret_cast<LispObject>(p) - pbase;   \
1409         stmt;                                           \
1410         save.restore(r, s, pbase, b);                   \
1411         p = reinterpret_cast<LispObject *>(pbase + ip); \
1412     } while (0)
1413 
1414 
1415 // This code will need to be able to report failure if either
1416 // the underlying byte-stream is corrupted or if storage allocation
1417 // here fails. It also needs to be safe against garbage collection.
1418 
serial_read()1419 LispObject serial_read()
1420 {   LispObject *p;    // a pointer to where to put the next item
1421     LispObject r,     // result of the entire reading process
1422                pbase, // needed to make the code GC safe
1423                ip,    // ditto
1424                prev,  // recent object read, for use with SER_DUP
1425                w,     // working variable
1426                s,     // a (linked) stack used with vectors (and symbols
1427                       // if the SER_RAWSYMBOL opcode is encountered).
1428                b;     // a back-pointer chain
1429     int c;
1430     prev = pbase = r = s = b = fixnum_of_int(0);
1431     p = &r;
1432     uint64_t opcode_repeats = 0, repeat_arg = 0;
1433     bool repeat_arg_ready = false;
1434 down:
1435 // read_byte() needs to read from the stream representation of things
1436 // and return a code... In this initial sketch I will only need to look
1437 // at three cases. One is for CONS which is sort of obvious. The next is
1438 // VECTOR. This covers all the cases where there are pointers within the
1439 // object, including symbols and rational and complex numbers. The final
1440 // case is LEAF which will include immediate data such as FIXNUMS, but
1441 // alse big-numbers, strings and back-references to previously-read
1442 // structures.
1443 // If the next opcode is to be executed just once I need to read it.
1444     if (opcode_repeats == 0)
1445     {   c = read_opcode_byte();
1446         repeat_arg_ready = false;
1447     }
1448     else opcode_repeats--;
1449     switch (c & 0xe0)
1450     {   case SER_VARIOUS:
1451         case SER_LIST:
1452             switch (c)
1453             {   case SER_ILLEGAL:
1454 // If read_opcode_byte() fails it will return EOF and I am assuming that
1455 // has the value -1. I am further assuming that my machine is a twos
1456 // complement one and hence (-1) & 0xff will be 0xff, which is SER_ILLEGAL.
1457 // If I see that I will abandon reading and return in an error state.
1458                     return aerror("Failure in serialization");
1459                 case SER_REPEAT:
1460                     assert(opcode_repeats == 0);
1461 // If you prefix something with "SER_REPEAT nn" then the opcode you next
1462 // use will be used nn+3 times. If the opcode uses operands then they
1463 // will be read for each instance, so perhaps this will make most sense
1464 // for 1-byte codes. Consider when this will give an improvement: if a
1465 // single byte opcode is used only 3 times it will be good enough to
1466 // write it out repeatedly. Only when it will be used 4 or more times
1467 // will use of REPEAT save space.
1468 //   op op op op
1469 //   REPEAT <4> op
1470 // If I could support opcodes that took an integer follower replicated its
1471 // value as well as the opcode the first saving case (assuming that the
1472 // integers all fit in 1 byte) would be
1473 //   op arg op arg op arg
1474 //   REPEAT <3> op arg
1475 // and it is because of that that I arrange that I offset the argument to
1476 // REPEAT by 3.
1477 // The "3+" is because the data in the byte-stream is offset by that
1478 // amount because shorter runs do not benefit from use of SER_REPEAT.
1479                     opcode_repeats = 3 + read_u64();
1480 // Normally I would read the opcode byte at the label "down:", but here I
1481 // have just set the variable that disables that! Observe that the code
1482 // here treats "SER_REPEAT n" as a prefix put immediately before the opcode
1483 // that is to be repeated. The processing of that opcode must then
1484 // decrement opcode_repeats, and if it did not then you would just
1485 // get an infinite loop. So it is important to be just a bit careful that
1486 // a REPEAT is only seen where it is wanted! I am putting in assert statements
1487 // to police that!
1488                     c = read_opcode_byte();
1489                     goto down;
1490 
1491 // The various sub-cases op SER_LIST all build fragments of list with
1492 // various lengths, termination and extents of sharing. It seems hard to
1493 // common up the code as much as would be desirable, but I have at least
1494 // arranged thinhs such that for an opcode that create n CONS cells
1495 // the n bottom bits indicate which of those cells will have multiple
1496 // references and so need to go in the repeat table.
1497                 case SER_L_a:
1498                 case SER_L_A:
1499                     GC_PROTECT(prev = cons(fixnum_of_int(0), nil));
1500                     if (c & 1) reader_repeat_new(prev);
1501                     *(atomic<LispObject>*)p = prev;
1502                     pbase = prev;
1503                     p = reinterpret_cast<LispObject *>(vcaraddr(pbase));
1504                     goto down;
1505 
1506                 case SER_L_a_S:
1507                 case SER_L_A_S:
1508                     GC_PROTECT(prev = cons(b, fixnum_of_int(0)));
1509                     if (c & 1) reader_repeat_new(prev);
1510                     *(atomic<LispObject>*)p = b = prev;
1511                     pbase = b;
1512                     p = reinterpret_cast<LispObject *>(vcdraddr(b));
1513                     goto down;
1514 
1515                 case SER_L_aa:
1516                 case SER_L_aA:
1517                 case SER_L_Aa:
1518                 case SER_L_AA:
1519                     GC_PROTECT(prev = list2(nil, nil));
1520 // Here I need to set things up just as if I had CONS CONS NIL
1521 // where note that the old sequence CONS a b would create (CONS b a), ie
1522 // the CDR field is transmitted before the CAR one.
1523                     if (c & 1) reader_repeat_new(prev);
1524                     if (c & 2) reader_repeat_new(cdr(prev));
1525                     setcar(prev, b);
1526                     b = *(atomic<LispObject>*)p = prev;
1527                     pbase = cdr(b);
1528                     p = reinterpret_cast<LispObject *>(vcaraddr(pbase));
1529                     goto down;
1530 
1531                 case SER_L_aa_S:
1532                 case SER_L_aA_S:
1533                 case SER_L_Aa_S:
1534                 case SER_L_AA_S:
1535                     GC_PROTECT(prev = list2(nil, nil));
1536 // Here I need to set things up just as if I had CONS CONS in the
1537 // old model (ie L_a_S L_a_S in the new one!)
1538                     if (c & 1) reader_repeat_new(prev);
1539                     if (c & 2) reader_repeat_new(cdr(prev));
1540                     setcar(prev, b);
1541                     b = *(atomic<LispObject>*)p = prev;
1542                     pbase = cdr(b);
1543                     setcar(pbase, b);
1544                     b = pbase;
1545                     p = reinterpret_cast<LispObject *>(vcdraddr(pbase));
1546                     goto down;
1547 
1548                 case SER_L_aaa:
1549                 case SER_L_aaA:
1550                 case SER_L_aAa:
1551                 case SER_L_aAA:
1552                 case SER_L_Aaa:
1553                 case SER_L_AaA:
1554                 case SER_L_AAa:
1555                 case SER_L_AAA:
1556                     GC_PROTECT(prev = list3(nil, nil, nil));
1557                     if (c & 1) reader_repeat_new(prev);
1558                     if (c & 2) reader_repeat_new(cdr(prev));
1559                     if (c & 4) reader_repeat_new(cdr(cdr(prev)));
1560                     setcar(prev, b);
1561                     b = *(atomic<LispObject>*)p = prev;
1562                     pbase = cdr(b);
1563                     setcar(pbase, b);
1564                     b = pbase;
1565                     pbase = cdr(b);
1566                     p = reinterpret_cast<LispObject *>(vcaraddr(pbase));
1567                     goto down;
1568 
1569                 case SER_L_aaa_S:
1570                 case SER_L_aaA_S:
1571                 case SER_L_aAa_S:
1572                 case SER_L_aAA_S:
1573                 case SER_L_Aaa_S:
1574                 case SER_L_AaA_S:
1575                 case SER_L_AAa_S:
1576                 case SER_L_AAA_S:
1577                     GC_PROTECT(prev = list3(nil, nil, nil));
1578                     if (c & 1) reader_repeat_new(prev);
1579                     if (c & 2) reader_repeat_new(cdr(prev));
1580                     if (c & 4) reader_repeat_new(cdr(cdr(prev)));
1581                     setcar(prev, b);
1582                     b = *(atomic<LispObject>*)p = prev;
1583                     pbase = cdr(b);
1584                     setcar(pbase, b);
1585                     b = pbase;
1586                     pbase = cdr(b);
1587                     setcar(pbase, b);
1588                     b = pbase;
1589                     p = reinterpret_cast<LispObject *>(vcdraddr(pbase));
1590                     goto down;
1591 
1592                 case SER_L_aaaa:
1593                 case SER_L_Aaaa:
1594                     GC_PROTECT(prev = list4(nil, nil, nil, nil));
1595 // Note that for the longest sequence here only the start CONS cell
1596 // can be shared.
1597                     if (c & 1) reader_repeat_new(prev);
1598                     setcar(prev, b);
1599                     b = *(atomic<LispObject>*)p = prev;
1600                     pbase = cdr(b);
1601                     setcar(pbase, b);
1602                     b = pbase;
1603                     pbase = cdr(b);
1604                     setcar(pbase, b);
1605                     b = pbase;
1606                     pbase = cdr(pbase);
1607                     p = reinterpret_cast<LispObject *>(vcaraddr(pbase));
1608                     goto down;
1609 
1610                 case SER_L_aaaa_S:
1611                 case SER_L_Aaaa_S:
1612                     GC_PROTECT(prev = list4(nil, nil, nil, nil));
1613                     if (c & 1) reader_repeat_new(prev);
1614                     setcar(prev, b);
1615                     b = *(atomic<LispObject>*)p = prev;
1616                     pbase = cdr(b);
1617                     setcar(pbase, b);
1618                     b = pbase;
1619                     pbase = cdr(b);
1620                     setcar(pbase, b);
1621                     b = pbase;
1622                     pbase = cdr(pbase);
1623                     setcar(pbase, b);
1624                     b = pbase;
1625                     p = reinterpret_cast<LispObject *>(vcdraddr(pbase));
1626                     goto down;
1627 
1628                 case SER_BIGBACKREF:
1629 // I expect there to be a great many uses of back-references and that
1630 // could include runs if reference to the same item. But note that when
1631 // you may a back-reference that applies a move to front strategy and
1632 // so multiple successive references to the same item will (after the
1633 // first) be "BACKREF 1" and that is not the "big" case as present here.
1634                     assert(opcode_repeats == 0);
1635 // Back references with an offset from 1..64 are dealt with using special
1636 // compact opcodes. Here I have something that reaches further back. The main
1637 // opcode is followed by a sequence of bytes and if this represents the value
1638 // n then the offset denotes is 65+n.
1639 // The effect is that references 65 - 192 are represented as
1640 //     BIGBACKREF 128+(n-65)
1641 // then 193-16448 will be
1642 //     BIGBACKREF (n-65)/128 (128+(n-65)%128)
1643 // and so on using 7 bits per byte... up until I have used 8 bytes.
1644 // If one is needed beyond that it can be a final 8-bit value.
1645 // This allows for up to 2^64 back-references.
1646                     *(atomic<LispObject>*)p = reader_repeat_old(1 + 64 + read_u64());
1647                     goto up;
1648 
1649                 case SER_DUP:
1650 // Beware with SER_REPEAT that SER_DUP is a postfix to an opcode. I think
1651 // that REPEAT should not be used if DUP will.
1652                     assert(opcode_repeats == 0);
1653 // This is issued just after a SER_VECTOR (etc) code that will
1654 // have left pbase referring to the object just allocated.
1655                     reader_repeat_new(prev);
1656                     goto down;
1657 
1658                 case SER_POSFIXNUM:
1659 // This case reads a 64-bit value and construct either a fixnum or a boxnum
1660 // as relevant. If it creates a boxnum then that could possibly be a shared
1661 // object, and against the possibility of that I set pbase so that a
1662 // SER_DUP opcode can behave meaningfully. Note that this can make
1663 // a full 64-bit positive integer because it reads and processes its input
1664 // as an unsigned value.
1665 //
1666 // This is the first instance in this code of a case that will handle
1667 // SER_REPEAT, so I will put in a commentary about how it works.
1668 // First I will note that if I have repeat_arg_ready that will suppress
1669 // reading the operand. In all cases the operand will end up in
1670 // repeat_arg.
1671                     if (!repeat_arg_ready)
1672                     {   repeat_arg = read_u64();
1673                         repeat_arg_ready = true;
1674                     }
1675 // If I repeat a fixnum then all the repeated copied will end up EQ
1676 // while if I made them one at a time they could end up separate if
1677 // they needed to be bignums. Since they are immutable objects I do not
1678 // believe that should cause any trouble.
1679                     GC_PROTECT(prev = make_lisp_unsigned64(repeat_arg));
1680                     *(atomic<LispObject>*)p = prev;
1681                     goto up;
1682 
1683                 case SER_NEGFIXNUM:
1684 // Negative (small to medium-sized) integers are given a separate code here
1685 // beause packing then using sign-and-magnitude seems easier. The extra "-1"
1686 // here is both to avoid having the duplicated case of +0 and -0 and to
1687 // arrange that the set of values that pack into a given number of bytes
1688 // matches 2s complement. Eg with just 1 following byte the range goes from
1689 // -128 to +127 (rather than -127 to +127).
1690 // Note that the code that writes stuff out should only generate this
1691 // when the value concerned will fit within a 64-bit signed value.
1692                     if (!repeat_arg_ready)
1693                     {   repeat_arg = -1-read_u64();
1694                         repeat_arg_ready = true;
1695                     }
1696                     GC_PROTECT(prev = make_lisp_integer64(repeat_arg));
1697                     *(atomic<LispObject>*)p = prev;
1698                     goto up;
1699 
1700                 case SER_DUPRAWSYMBOL:
1701                 case SER_RAWSYMBOL:
1702 // SER_RAWSYMBOL is used while dumping complete heap-images. The opcode here
1703 // is followed by information to go into the header word of the symbol (various
1704 // flag bits such as whether the symbol is global or fluid), then a dump of
1705 // information about what goes in the function call and count components. After
1706 // that all the list-like components will be transmitted (much as if they were
1707 // elements in a vector). The key parts of this work using the same scheme as
1708 // for SER_LVECTOR.
1709                     assert(opcode_repeats == 0);
1710                     GC_PROTECT(prev =
1711                         get_basic_vector(TAG_SYMBOL, TYPE_SYMBOL, symhdr_length));
1712                     *(atomic<LispObject>*)p = w = prev;
1713                     if (c == SER_DUPRAWSYMBOL) reader_repeat_new(prev);
1714 // Note that the vector as created will have its LENGTH encoded in the
1715 // header, but for symbols that is incorrect so I need to re-write the
1716 // header wholesale here. Note that a symbol header has the normal tag for
1717 // headers in its low bits then two zero bits to indicate that it is
1718 // a symbol.
1719                     setheader(w, static_cast<Header>((read_u64()<<(Tw+4)) + TAG_HDR_IMMED));
1720 // I will first fill in the fields that hold binary data or pointers to
1721 // executable code.
1722                     qfn0(w) = read_function0();
1723                     qfn1(w) = read_function1();
1724                     qfn2(w) = read_function2();
1725                     qfn3(w) = read_function3();
1726                     qfn4up(w) = read_function4up();
1727                     {   uint64_t nn = read_u64();
1728                         qcountLow(w) = static_cast<uint32_t>(nn);
1729                         qcountHigh(w) = static_cast<uint32_t>(nn>>32);
1730                     }
1731 // Now to allow me to feel safe I will put NIL in all the other fields
1732 // on a provisional basis. They get their proper values later.
1733                     setvalue(w, nil);
1734                     setenv(w, nil);
1735                     setpname(w, nil);
1736                     setplist(w, nil);
1737                     setfastgets(w, nil);
1738                     setpackage(w, nil);
1739                     setvalue(w, b); // the back-pointer.
1740                     b = w;
1741                     {   const int PNAME_INDEX = pnameaddr(w) - valueaddr(w);
1742                         GC_PROTECT(prev = cons(fixnum_of_int(PNAME_INDEX), s));
1743                     }
1744                     s = prev;
1745                     prev = pbase = b;
1746                     p = reinterpret_cast<LispObject *>(pnameaddr(b));
1747                     goto down;
1748 
1749 
1750                 case SER_SYMBOL:
1751                 case SER_DUPSYMBOL:
1752                 case SER_GENSYM:
1753                 case SER_DUPGENSYM:
1754 // All of these cases are followed by a length marker and the the octets
1755 // making up the UTF-8 name of the symbol. If the "DUP" options is present
1756 // then the symbol must be entered in the table of items that are referenced
1757 // repeatedly. in the "GENSYM" case the name is the base-name of the gensym,
1758 // pergaps very often just "G", and the name read in will be set up as
1759 // if not yet printed, so a sequence number will be added leter.
1760                     assert(opcode_repeats == 0);
1761                     {   size_t len = read_u64();
1762                         boffop = 0;
1763 // HAHAHA - if BOFFO does not exist properly at this stage then I am in
1764 // deep trouble. But these opcodes will only be used at times I am
1765 // serializing for re-loading into a working Lisp. Note that the whole
1766 // issue of the interaction between serialization and a package system is
1767 // not thought through at present - things will be read in in the
1768 // current package (to the extent that such a concept exists or makes sense).
1769 // Well what I say above is not quite true after all. Serialization is used
1770 // for FASL files, so a FASL file that used a symbol with an absurdly
1771 // long name could lead to boffo overflow here, triggering garbage collection.
1772                         for (size_t i=0; i<len; i++)
1773                         {   if (boffop >=
1774                                 length_of_byteheader(vechdr(boffo))-CELL-8)
1775                             GC_PROTECT(packbyte(read_string_byte()));
1776                             else packbyte(read_string_byte());
1777                         }
1778                         if (c == SER_GENSYM || c == SER_DUPGENSYM)
1779                         {   GC_PROTECT(prev = copy_string(boffo, boffop));
1780                             GC_PROTECT(prev = Lgensym1(nil, prev));
1781                         }
1782                         else GC_PROTECT(prev = iintern(boffo, (int32_t)boffop, CP, 0));
1783                         *(atomic<LispObject>*)p = prev;
1784                         if (c == SER_DUPSYMBOL || c == SER_DUPGENSYM)
1785                             reader_repeat_new(prev);
1786                         goto up;
1787                     }
1788 
1789                 case SER_FLOAT28:
1790 // A 28-bit short float
1791                     assert(opcode_repeats == 0);
1792                     std::fprintf(stderr, "SER_FLOAT28 not coded yet\n");
1793                     my_abort("FLOAT28");
1794 
1795                 case SER_FLOAT32:
1796 // a 32-bit single float
1797                     assert(opcode_repeats == 0);
1798                     GC_PROTECT(prev = make_boxfloat(read_f32(), TYPE_SINGLE_FLOAT));
1799                     *(atomic<LispObject>*)p = prev;
1800                     goto up;
1801 
1802                 case SER_FLOAT64:
1803 // a 64-bit (long) float
1804                     assert(opcode_repeats == 0);
1805 // I can image the case of dumping a vector all of whose elements were the
1806 // value 0.0, and in the case supporting repeats here could be helpful.
1807 // But at present I think that will be an uncommon case with Reduce and so
1808 // I will give priority to other cases.
1809                     GC_PROTECT(prev = make_boxfloat(read_f64(), TYPE_DOUBLE_FLOAT));
1810                     *(atomic<LispObject>*)p = prev;
1811                     goto up;
1812 
1813 #ifdef HAVE_SOFTFLOAT
1814                 case SER_FLOAT128:
1815 // a 128-bit (double-length) float.
1816                     assert(opcode_repeats == 0);
1817                     GC_PROTECT(prev = make_boxfloat(0.0, TYPE_LONG_FLOAT));
1818                     long_float_val(prev) = read_f128();
1819                     *(atomic<LispObject>*)p = prev;
1820                     goto up;
1821 #endif // HAVE_SOFTFLOAT
1822 
1823                 case SER_CHARSPID:
1824 // A packed characters literal. Characters that are Basic Latin can be coded
1825 // here with just 2 bytes, so for instance 'A' is SER_CHAR/0x41. Codes up to
1826 // U+3fff come in 3 bytes and so on. Note that the encoding is NOT utf8 - it is
1827 // the variable length encoding.
1828 // SPIDs are alse encoded here. In each case they are with the low bits
1829 // shown here and I just send the rest of the data as a raw number.
1830 // It turns out that this case arises with a repeat rather often because the
1831 // "fastget" scheme means that rather a lot of symbols end up with vector of
1832 // length 64 attached to them and most of the vector contents will be
1833 // SPID_NOPROP. Well somewhat to my astonishment when I look at a freshly-
1834 // loaded bootstrapreduce I find that almost 3000 symbols have a fastget
1835 // table associated with them (so around 1.5Mbytes is taken up with those
1836 // vectors on a 64-bit machine, and that before zlib compression around 300K
1837 // may be used for them in the saved image file before I use SER_REPEAT.
1838 // If I use full Reduce the basic number of fastget-vectors is about the same.
1839 // As I load packages the number used can increase noticably.
1840                     if (!repeat_arg_ready)
1841                     {   repeat_arg = read_u64();
1842                         repeat_arg_ready = true;
1843                     }
1844                     prev = *(atomic<LispObject>*)p = (static_cast<LispObject>(repeat_arg)<<(Tw+2)) | TAG_HDR_IMMED;
1845                     goto up;
1846 
1847                 case SER_BITVEC:
1848                     assert(opcode_repeats == 0);
1849                     w = read_u64();
1850                     {   size_t len = CELL + (w + 7)/8; // length in bytes
1851                         GC_PROTECT(prev =
1852                             get_basic_vector(TAG_VECTOR, bitvechdr_(w), len));
1853                         *(atomic<LispObject>*)p = prev;
1854                         char *x = reinterpret_cast<char *>(&basic_celt(prev, 0));
1855                         for (size_t i=0; i<(size_t)w; i++)
1856                             *x++ = read_data_byte();
1857                         while (((intptr_t)x & 7) != 0) *x++ = 0;
1858                     }
1859                     goto up;
1860 
1861                 case SER_NIL3:
1862                     assert(opcode_repeats == 0);
1863                     opcode_repeats++;
1864                 case SER_NIL2:
1865                     assert(c == SER_NIL3 || opcode_repeats == 0);
1866                     opcode_repeats++;
1867                     c = SER_NIL;
1868                 case SER_NIL:
1869                     prev = *(atomic<LispObject>*)p = nil;
1870                     goto up;
1871 
1872                 case SER_END:
1873                     std::fprintf(stderr, "End of dump marker found - this is an error situation\n");
1874                     my_abort("truncated image file");
1875 
1876                 case SER_OPNEXT:
1877                     std::fprintf(stderr, "OPNEXT opcode out of place\n");
1878                     my_abort("bad image file");
1879 
1880                 case SER_spare_f6:
1881                 case SER_spare_f7:
1882                 case SER_spare_f8:
1883                 case SER_spare_f9:
1884                 case SER_spare_fa:
1885                 case SER_spare_fb:
1886                 case SER_spare_fc:
1887                 case SER_spare_fd:
1888                 case SER_spare_fe:
1889                 default:
1890                     std::fprintf(stderr, "Unimplemented/unknown reader opcode (a) %.2x\n", c);
1891                     my_abort("unknown code in image file");
1892             }
1893             break;
1894         case SER_LVECTOR:
1895 // One thing to observe here. If I have a vector that is a hash table using
1896 // EQ as its key then reading it in here will leave its entries all the right
1897 // values but not in the right places. My response to that is to
1898 // arrange that a potentially messed up hash table has type code TYPE_HASHX
1899 // rather than TYPE_HASH. The hash code accessing functions will check for
1900 // that, and if they find it they re-hash before use, restoring the key to
1901 // just TYPE_HASH. The consequence is that the rehashing work is not done
1902 // until and unless it is actually needed.
1903             assert(opcode_repeats == 0);
1904             {   int type = ((c & 0x1f) << (Tw + 2)) | (0x01 << Tw),
1905                     tag = is_number_header_full_test(type) ? TAG_NUMBERS :
1906                                                              TAG_VECTOR;
1907 // If I have a HASH object that is a huge vector then it will have
1908 // a top level INDEXVEC and all the sub-vectors will start off as
1909 // HASH. The adjustment here can set ALL of the sub-vectors to be
1910 // HASHX but when I re-hash I will probably only reset the first one
1911 // back to HASH. The same may well arise in the garbage collector.
1912                 if (type == TYPE_HASH) type = TYPE_HASHX;
1913 // The size here will be the number of Lisp items held in the vector, so
1914 // what I need to pass to get_basic_vector makes that into a byte count and
1915 // allows for the header word as well.
1916                 size_t n = read_u64();
1917                 GC_PROTECT(prev = get_basic_vector(tag, type, CELL*(n+1)));
1918                 w = *(atomic<LispObject>*)p = prev;
1919 // Note that the "vector" just created may be tagged with TAG_NUMBERS
1920 // rather than TAG_VECTOR, so I use the access macro "vselt" rather than
1921 // "elt" - and that survives whichever case I am in.
1922 // Now if I am on a 32-bit system and the vector uses a header word and then
1923 // and even number of words of data it will use a padder word to bring its
1924 // total size up to a 64-bit boundary. Tidy up that final word. This OUGHT
1925 // not to matter, but is still tidy.
1926                 size_t n1 = n;
1927 // In case there is a GC before I have finished filling in proper values
1928 // in the vector I will out in values that are at least safe.
1929                 for (size_t i=0; i<n1; i++) vselt(w, i) = fixnum_of_int(0);
1930 // If the vector does not have any content at all then I am now done.
1931                 if (n == 0) goto up;
1932                 if (is_mixed_header(vechdr(w))) n = 2;  // Ie elements 0, 1 and 2
1933                 else n--; // final element is at n-1
1934 // Vectors chain through the first entry. If a vector was empty so it did not
1935 // have a first entry to use here it would have needed to be dumped as a LEAF.
1936 // But a special additional issue is that if the vector omly has one item in it
1937 // then I must NOT set up back-pointers and the "s-stack" in quite the usual
1938 // manner...
1939                 if (n == 0)
1940                 {   p = reinterpret_cast<LispObject *>(&vselt(w, 0));
1941                     goto down;
1942                 }
1943                 vselt(w, 0) = b;
1944                 b = w;
1945                 GC_PROTECT(prev = cons(fixnum_of_int(n), s));
1946                 s = prev;
1947                 prev = pbase = b;
1948                 p = reinterpret_cast<LispObject *>(&vselt(b, n));
1949             }
1950             goto down;
1951 
1952         case SER_BACKREF0:
1953 // Investigation of bootstrapreduce.img shows a reasonable number of
1954 // repeat-runs of "SER_BACKREF0 <1>" to reference the top item in the
1955 // repeat heap. The efford involved in supporting SER_REPEAT to compress
1956 // such sequences is minimal here, so I do so.
1957             *(atomic<LispObject>*)p = reader_repeat_old(1 + (c & 0x1f));
1958             goto up;
1959 
1960         case SER_BACKREF1:
1961 // I do not view repeated instances of BACKREF1 as significant, but it is
1962 // so cheap to support that I will.
1963             *(atomic<LispObject>*)p = reader_repeat_old(1 + 32 + (c & 0x1f));
1964             goto up;
1965 
1966         case SER_FIXNUM:
1967             repeat_arg = c & 0x1f;
1968             if ((c & 0x10) != 0) repeat_arg |= (uint64_t)~0xf; // sign extend
1969             *(atomic<LispObject>*)p = fixnum_of_int((int64_t)repeat_arg);
1970             goto up;
1971 
1972         case SER_STRING:
1973 // String will be very much like BVECTOR except that because I expect it to be
1974 // an especially important case I pack a length code into the 5-bit field of
1975 // the opcode byte and the type information is implicit. This code only copes
1976 // with strings with length 1-32. The associated data is JUST the bytes
1977 // making up the string, with padding at the end.
1978             assert(opcode_repeats == 0);
1979             w = (c & 0x1f) + 1;
1980             GC_PROTECT(prev = get_basic_vector(TAG_VECTOR, TYPE_STRING_4, CELL+w));
1981             *(atomic<LispObject>*)p = prev;
1982             {   char *x = reinterpret_cast<char *>(&basic_celt(prev, 0));
1983                 for (size_t i=0; i<(size_t)w; i++) *x++ = read_string_byte();
1984 // Fill in end of the memory block with zero bytes so it is properly tidy.
1985 // This is needed so that comaprisons between strings and hash value
1986 // calculations are easier.
1987                 while (((intptr_t)x & 7) != 0) *x++ = 0;
1988             }
1989             goto up;
1990 
1991 // I had considered having a special opcode to deal with strings of length 0
1992 // or longer than 33, but in fact the general SER_BVECTOR code does just that
1993 // slighly more efficiently then I would otherwise manage. Observe that the
1994 // a SER_BVECTOR followed by 1 byte of length code copes with any vector
1995 // needing up to 127 words (ie 508 bytes) with just 2 bytes of control
1996 // information.
1997 
1998         case SER_BVECTOR:
1999             assert(opcode_repeats == 0);
2000 // The general case for vectors containing binary information is followed
2001 // by a length code that shows how many items there are in the vector.
2002 // This counts in the natural size for the vector.
2003 // At present vectors containing binary can never be "large".
2004             w = read_u64();
2005 // Here I have assembled 7 bits of type information in c. CCCCC comes from the
2006 // opcode. The header I want for my vector will be
2007 //     wwwwwwww....wwww CCC CC 10 g100
2008             {   Header type = ((c & 0x1f)<<(Tw+2)) | (0x3<<Tw),
2009                     tag = is_bignum_header(type) ? TAG_NUMBERS :
2010                                                    TAG_VECTOR;
2011                 if (vector_i8(type))
2012                 {   GC_PROTECT(prev = get_basic_vector(tag, type, CELL+w));
2013                     *(atomic<LispObject>*)p = prev;
2014                     unsigned char *x = reinterpret_cast<unsigned char *>(start_contents(prev));
2015                     if (is_string_header(type))
2016                         for (size_t i=0; i<(size_t)w; i++)
2017                             *x++ = read_string_byte();
2018                     else for (size_t i=0; i<(size_t)w; i++)
2019                         *x++ = read_data_byte();
2020                     while (((intptr_t)x & 7) != 0) *x++ = 0;
2021                 }
2022                 else if (vector_i32(type))
2023                 {   GC_PROTECT(prev = get_basic_vector(tag, type, CELL+4*w));
2024                     *(atomic<LispObject>*)p = prev;
2025                     uint32_t *x = (uint32_t *)start_contents(prev);
2026 // 32-bit integers are transmitted most significant byte first.
2027                     for (size_t i=0; i<(size_t)w; i++)
2028                     {   uint32_t q = read_data_byte() & 0xff;
2029                         q = (q << 8) | (read_data_byte() & 0xff);
2030                         q = (q << 8) | (read_data_byte() & 0xff);
2031                         *x++ = (q << 8) | (read_data_byte() & 0xff);
2032                     }
2033                     while (((intptr_t)x & 7) != 0) *x++ = 0;
2034                 }
2035                 else if (vector_f64(type))
2036                 {   GC_PROTECT(prev = get_basic_vector(tag, type, CELL+8*w));
2037                     *(atomic<LispObject>*)p = prev;
2038                     double *x = reinterpret_cast<double *>(start_contents64(prev));
2039 // There has to be a padder word in these objects on a 32-bit machine so
2040 // that the data is 64-bit aligned. Clean it up.
2041                     if (!SIXTY_FOUR_BIT) *(int32_t *)start_contents(prev) = 0;
2042                     for (size_t i=0; i<(size_t)w; i++) *x++ = read_f64();
2043                 }
2044                 else if (vector_i16(type))
2045                 {   GC_PROTECT(prev = get_basic_vector(tag, type, CELL+2*w));
2046                     *(atomic<LispObject>*)p = prev;
2047                     std::uint16_t *x = reinterpret_cast<std::uint16_t *>(start_contents(prev));
2048                     for (size_t i=0; i<(size_t)w; i++)
2049                     {   uint32_t q = read_data_byte() & 0xff;
2050                         *x++ = (q << 8) | (read_data_byte() & 0xff);
2051                     }
2052                     while (((intptr_t)x & 7) != 0) *x++ = 0;
2053                 }
2054                 else if (vector_i64(type))
2055                 {   GC_PROTECT(prev = get_basic_vector(tag, type, CELL+8*w));
2056                     *(atomic<LispObject>*)p = prev;
2057                     uint64_t *x = (uint64_t *)start_contents64(prev);
2058                     if (!SIXTY_FOUR_BIT) *(int32_t *)start_contents(prev) = 0;
2059                     for (size_t i=0; i<(size_t)w; i++)
2060                     {   uint64_t q = read_data_byte() & 0xff;
2061                         q = (q << 8) | (read_data_byte() & 0xff);
2062                         q = (q << 8) | (read_data_byte() & 0xff);
2063                         q = (q << 8) | (read_data_byte() & 0xff);
2064                         q = (q << 8) | (read_data_byte() & 0xff);
2065                         q = (q << 8) | (read_data_byte() & 0xff);
2066                         q = (q << 8) | (read_data_byte() & 0xff);
2067                         *x++ = (q << 8) | (read_data_byte() & 0xff);
2068                     }
2069                 }
2070                 else if (vector_f32(type))
2071                 {   GC_PROTECT(prev = get_basic_vector(tag, type, CELL+4*w));
2072                     *(atomic<LispObject>*)p = prev;
2073                     float *x = reinterpret_cast<float *>(start_contents(prev));
2074                     for (size_t i=0; i<(size_t)w; i++) *x++ = read_f32();
2075                     while (((intptr_t)x & 7) != 0) *x++ = 0;
2076                 }
2077 #ifdef HAVE_SOFTFLOAT
2078                 else if (vector_f128(type))
2079                 {   GC_PROTECT(prev = get_basic_vector(tag, type, CELL+16*w));
2080                     *(atomic<LispObject>*)p = prev;
2081                     std::fprintf(stderr, "128-bit floats not supported (yet?)\n");
2082                     my_abort("128-bit float");
2083                 }
2084 #endif // HAVE_SOFTFLOAT
2085                 else if (vector_i128(type))
2086                 {   GC_PROTECT(prev = get_basic_vector(tag, type, CELL+16*w));
2087                     *(atomic<LispObject>*)p = prev;
2088                     std::fprintf(stderr, "128-bit integer arrays not supported (yet?)\n");
2089                     my_abort("128-bit integer arrays");
2090                 }
2091                 else
2092                 {   std::fprintf(stderr, "Vector code is impossible\n");
2093                     my_abort("bad format in image file");
2094                 }
2095             }
2096             goto up;
2097 
2098         default:
2099             std::fprintf(stderr, "Unimplemented reader opcode (b) %.2x\n", c);
2100             my_abort("bad byte in image file");
2101     }
2102 
2103 // The above deals with arriving at the description of an object. What follows
2104 // copes with return to an object after having filled in one of its component
2105 // fields.
2106 
2107 up:
2108 // If the back-pointer chain is empty then I am done and can return.
2109     if (b == fixnum_of_int(0))
2110     {   if (r == 0)
2111         {   std::fprintf(stderr, "serial reader about to return zero\n");
2112             my_abort("bad image file");
2113         }
2114         return r;
2115     }
2116 // When I have done everything that fills in the CDR of a CONS cell I
2117 // just need to go and deal with the CAR.
2118     if (consp(b))
2119     {   pbase = b;
2120         p = reinterpret_cast<LispObject *>(vcaraddr(b));
2121         b = car(b);
2122         goto down;
2123     }
2124 // The remaining cases are when b points to a vector or symbol. I use the
2125 // stack s to track how far along it I am, and need to do special things when
2126 // I am almost complete
2127     if (!is_cons(s))
2128     {   std::fprintf(stderr, "s bad at line %d in serialize.cpp\n", __LINE__);
2129         simple_print(s);
2130         my_abort("serialization");
2131     }
2132     if (!is_fixnum(car(s)))
2133     {   std::fprintf(stderr, "car s bad at line %d in serialize.cpp\n", __LINE__);
2134         simple_print(car(s));
2135         my_abort("serialization");
2136     }
2137     intptr_t n = int_of_fixnum(car(s)) - 1;
2138     if (n < 0)
2139     {   std::fprintf(stderr, "car s negative at line %d in serialize.cpp\n", __LINE__);
2140         std::fprintf(stderr, "car(s) = %" PRIx64 " in raw hex\n", (int64_t)car(s));
2141         std::fprintf(stderr, "value of car(s) as list: ");
2142         simple_print(car(s));
2143         my_abort("serialization");
2144     }
2145     if (n == 0)
2146     {   w = b;
2147         pbase = w;
2148 // vselt(v,n) accesses the nth item of the object v, whether v is a vector
2149 // (including hash tables, structures, records, objects...) or a symbol.
2150 // In the case of a symbol the index n selects as between qvalue, pname and
2151 // the other fields making up a symbol.
2152         p = reinterpret_cast<LispObject *>(&vselt(w, 0));
2153         b = vselt(w, 0);
2154 // I could and possibly should push the released cell from s onto a local
2155 // freelist and use that where I do a CONS if possible...
2156 //      car(s) = fr; fr = s; s = cdr(s); cdr(fr) = fixnum_of_int(0);
2157 // might do the job, chaining the freelist through its CAR field.
2158         s = cdr(s);
2159         goto down;
2160     }
2161     setcar(s, fixnum_of_int(n)); // write back decreased index
2162     pbase = b;
2163     p = reinterpret_cast<LispObject *>(&vselt(b, n));
2164     goto down;
2165 }
2166 
2167 //===============================================================================
2168 
2169 // Now code to write out expressions in serialized form.
2170 
2171 
2172 // The first thing I will need to do will be to traverse all the data that
2173 // is to be dumped. When I first visit a location I want to mark it in a
2174 // "used" table. If I then find I visit it again I need to create an entry
2175 // for it in a "shared" table.
2176 // The used table can be a sparse bitmap with (at the lowest level) one
2177 // bit for every 8-byte address. For a heap with M bytes in use this
2178 // consumes M/64 bytes. I will re-use this technology for the mapstore
2179 // function and for the basis of the oblist function.
2180 //
2181 // I will use a multi-layer table based on 4096-byte chunks of memory. The
2182 // lowest level such block will hold 32768 bits each referring to an 8-byte
2183 // address. This covers the bottom 18 bits of the address space. On a 64-bit
2184 // machine each higher level block will hold 512 entries - each is either nullptr
2185 // if no marks are present in an area or is a pointer to a lowest level
2186 // block. This adds coverage of another 9 address bits. So the sequence goes
2187 // 18, 27, 36, 45, 54. The very top block can be 8192 bytes and that gives
2188 // coverage of a full 64-bit space. Any reasonable setup will only allocate
2189 // a single block at each of the highest three levels!
2190 
2191 // The sequence of indirections here looks painful, but this code
2192 // is ONLY used while I am dumping an image file, and I am happt to take
2193 // a view that thit is something where ultimate speed is not vital - and
2194 // where costs such as the ones here are liable to be swamped by I/O
2195 // overheads anyway. I will use this extreme multi-level map even on
2196 // 32-bit machines where the first 3 levels of table are not needed at
2197 // all. It seems tidiest to just retain the general code. Perhaps I will
2198 // change that later, but then a speed penalty to 32-bit systems might be
2199 // acceptable.
2200 
2201 // Experiments (using Jlisp) show that when building the bootstrap version
2202 // of Reduce (which includes a lot more than the final one) there are
2203 // around 250000 objects, of which only 7000 have more than one reference
2204 // to them. That is in the main (bootstrapreduce) initial image - when one
2205 // loads more packages that can add extra data, but for the purposes of
2206 // creaing the main Reduce checkpoint file the above numbers are what I
2207 // will base my design on. Of this the key issue is that the suggestion that
2208 // maybe 3% of items in a lisp heap might (in the case I have measured) be
2209 // shared, so the table needed to record them does not need to be huge.
2210 
2211 static std::uint8_t *****used_map[1024] = {nullptr};
2212 
2213 // Test if an address is marked as in use.
2214 
address_used(uint64_t addr)2215 static int address_used(uint64_t addr)
2216 {   unsigned int i = static_cast<unsigned int>(addr >> 54);
2217     std::uint8_t *****m1 = used_map[i];
2218     if (m1 == nullptr) return 0;
2219     addr -= ((uint64_t)i) << 54;   // offset in page
2220     i = static_cast<unsigned int>(addr >> 45);
2221     std::uint8_t ****m2 = m1[i];
2222     if (m2 == nullptr) return 0;
2223     addr -= ((uint64_t)i) << 45;
2224     i = static_cast<unsigned int>(addr >> 36);
2225     std::uint8_t ***m3 = m2[i];
2226     if (m3 == nullptr) return 0;
2227     addr -= ((uint64_t)i) << 36;
2228     i = static_cast<unsigned int>(addr >> 27);
2229     std::uint8_t **m4 = m3[i];
2230     if (m4 == nullptr) return 0;
2231     addr -= ((uint64_t)i) << 27;
2232     i = static_cast<unsigned int>(addr >> 18);
2233     std::uint8_t *m5 = m4[i];
2234     if (m5 == nullptr) return 0;
2235     addr -= ((uint64_t)i) << 18;
2236 // Now addr is just an 18-bit number. Discard the low 3 bits
2237     addr >>= 3;
2238 //  fprintf(stderr, "address-used %" PRIxPTR " = %d\n", (uintptr_t)addr,
2239 //         m5[addr >> 3] & (1 << (addr & 7)));
2240     return (m5[addr >> 3] & (1 << (addr & 7))) != 0;
2241 }
2242 
2243 // Allocate memory for part of the map. if new can not allocate space
2244 // it should be treated as a fatal error - the processing here is naive
2245 // at present, since for instance with a GUI there is no guarantee that
2246 // stderr exists.
2247 //
2248 // Furthermore rather than using new here (and delete later) I should try
2249 // to use some space that is allocated to me but not currently in use. If I
2250 // have a 2-space copying collector I have half my whole memory available!
2251 // Even if I am using a non-copying collector I expect there to be LOTS of
2252 // space available. So maybe what I need rather than new is a call that
2253 // allocates a vector-like region in the heap, but falls back to new (eg
2254 // to expand the heap) rather than garbage collecting?
2255 
new_map_block()2256 static void *new_map_block()
2257 {   char *p = new (std::nothrow) char[512*sizeof(void *)];
2258     if (p == nullptr)
2259     {   std::fprintf(stderr, "\nFatal error - no memory\n");
2260         my_exit();
2261     }
2262     std::memset(p, 0, 512*sizeof(void *));
2263     return reinterpret_cast<void *>(p);
2264 }
2265 
new_final_map_block()2266 static std::uint8_t *new_final_map_block()
2267 {   std::uint8_t *p = new (std::nothrow) std::uint8_t[4096];
2268     if (p == nullptr)
2269     {   std::fprintf(stderr, "\nFatal error - no memory\n");
2270         my_exit();
2271     }
2272     std::memset(p, 0, 4096);
2273     return p;
2274 }
2275 
2276 // Set a bit in the mark table, allocating extra block as needed. This
2277 // is expected to end up using around 1.5% of the amount of memory that
2278 // you have allocated. So for instance on a 32-bit computer the most space
2279 // it should ever consume should be under 50 Mbytes and on a small machine
2280 // such as a 512 Mbyte Raspberry pi it ought to use under 10 Mbytes.
2281 
mark_address_as_used(uint64_t addr)2282 static void mark_address_as_used(uint64_t addr)
2283 {
2284     unsigned int i = static_cast<unsigned int>(addr >> 54);
2285     std::uint8_t *****m1 = used_map[i];
2286     if (m1 == nullptr) used_map[i] = m1 = (std::uint8_t *****)new_map_block();
2287     addr -= ((uint64_t)i) << 54;   // offset in page
2288     i = static_cast<unsigned int>(addr >> 45);
2289     std::uint8_t ****m2 = m1[i];
2290     if (m2 == nullptr) m1[i] = m2 = (std::uint8_t ****)new_map_block();
2291     addr -= ((uint64_t)i) << 45;
2292     i = static_cast<unsigned int>(addr >> 36);
2293     std::uint8_t ***m3 = m2[i];
2294     if (m3 == nullptr) m2[i] = m3 = (std::uint8_t ***)new_map_block();
2295     addr -= ((uint64_t)i) << 36;
2296     i = static_cast<unsigned int>(addr >> 27);
2297     std::uint8_t **m4 = m3[i];
2298     if (m4 == nullptr) m3[i] = m4 = (std::uint8_t **)new_map_block();
2299     addr -= ((uint64_t)i) << 27;
2300     i = static_cast<unsigned int>(addr >> 18);
2301     std::uint8_t *m5 = m4[i];
2302     if (m5 == nullptr) m4[i] = m5 = new_final_map_block();
2303     addr -= ((uint64_t)i) << 18;
2304 // Now addr is just an 18-bit number. Discard the low 3 bits
2305     addr >>= 3;
2306     m5[addr >> 3] |= (1 << (addr & 7));
2307 }
2308 
2309 // Release all memory used by the bitmap.
2310 
release_map_5(std::uint8_t * m)2311 static void release_map_5(std::uint8_t *m)
2312 {   if (m != nullptr) delete [] m;
2313 }
2314 
release_map_4(std::uint8_t ** m)2315 static void release_map_4(std::uint8_t **m)
2316 {   if (m != nullptr)
2317     {   for (int i=0; i<512; i++) release_map_5(m[i]);
2318         delete [] m;
2319     }
2320 }
2321 
release_map_3(std::uint8_t *** m)2322 static void release_map_3(std::uint8_t ***m)
2323 {   if (m != nullptr)
2324     {   for (int i=0; i<512; i++) release_map_4(m[i]);
2325         delete [] m;
2326     }
2327 }
2328 
release_map_2(std::uint8_t **** m)2329 static void release_map_2(std::uint8_t ****m)
2330 {   if (m != nullptr)
2331     {   for (int i=0; i<512; i++) release_map_3(m[i]);
2332         delete [] m;
2333     }
2334 }
2335 
release_map_1(std::uint8_t ***** m)2336 static void release_map_1(std::uint8_t *****m)
2337 {   if (m != nullptr)
2338     {   for (int i=0; i<512; i++) release_map_2(m[i]);
2339         delete [] m;
2340     }
2341 }
2342 
release_map()2343 void release_map()
2344 {   for (int i=0; i<1024; i++)
2345     {   release_map_1(used_map[i]);
2346         used_map[i] = nullptr;
2347     }
2348 }
2349 
2350 class map_releaser
2351 {
2352 public:
~map_releaser()2353     ~map_releaser()
2354     {   release_map();
2355     }
2356 };
2357 
2358 // I need to comment on the backpointer tagging here. In a tidy world I would
2359 // use TAG_CONS, TAG_SYMBOL and TAG_VECTOR to identify what I was descending
2360 // through. However for symbols and vectors I want back-pointers to be able to
2361 // identify any one of the cells with the object. On a 32-bit system those
2362 // are arranges at 4-byte granularity, so I can only afford to use the low
2363 // TWO bits to track my progress. Done this way I have to use the same
2364 // backpointer tags for lisp vector and also rational and complex numbers,
2365 // and in general it is a bit of a mess!
2366 // I believe I really only need one code for return to a CONS cell, but when I
2367 // first wrote this I used two, so I will leave that alone at least for now.
2368 
2369 #define BACKPOINTER_MASK   3
2370 #define BACKPOINTER_CDR    0
2371 #define BACKPOINTER_CAR    1
2372 #define BACKPOINTER_SYMBOL 2
2373 #define BACKPOINTER_VECTOR 3
2374 
2375 // I will have two copies of the code that traverses everything. The first
2376 // is just there to record which objects have multiple references to them.
2377 // The second will dump the structure to a stream. The skeleton of the code
2378 // is maybe 150 lines long and having it replicated is perhaps bad, but
2379 // at present that seems the easiest way to cope with the different behaviour
2380 // needed during each of the two passes.
2381 
2382 char trigger[40] = "unknown";
2383 
scan_data(LispObject p)2384 void scan_data(LispObject p)
2385 {   LispObject b = 0 + BACKPOINTER_CAR, w;
2386     uintptr_t len;
2387     Header h;
2388 down:
2389     if (p == 0)
2390     {   std::fprintf(stderr, "Zero pointer found from %s\n", trigger);
2391         // An error - but I feel safest if I detect it and do not crash.
2392         goto up;
2393     }
2394     else if (p == nil) goto up;
2395     switch (p & TAG_BITS)
2396     {   default:
2397         case TAG_CONS:
2398             if (address_used(p - TAG_CONS))
2399             {   hash_insert(&repeat_hash, p);
2400                 goto up;
2401             }
2402             mark_address_as_used(p - TAG_CONS);
2403             w = p;
2404             p = cdr(p);
2405             setcdr(w, b);
2406             b = w - TAG_CONS + BACKPOINTER_CDR;
2407             goto down;
2408 
2409         case TAG_SYMBOL:
2410             if (address_used(p - TAG_SYMBOL))
2411             {   hash_insert(&repeat_hash, p);
2412                 goto up;
2413             }
2414             mark_address_as_used(p - TAG_SYMBOL);
2415 // I have two modes if serialization. One views symbols as "just other bits
2416 // of data" and descends through them so that the contents of their value
2417 // cells, property lists and so on get inspected. That version is used
2418 // when creating a full heap image. The other stops when it finds a symbol,
2419 // and when writing data out just writes out its name (or somewhat more
2420 // subtle information if it is a gensym). On reading things back in in this
2421 // letter case the name as dumped is just looked up in the object list. This
2422 // way of doing things may be useful for preserving data on disc or sending
2423 // it to a separate process. It captures the information that "print" would
2424 // except that it also understands about structure sharing within the object
2425 // that is being written. If at some stage I introduce a package system (a la
2426 // Common Lisp) I will need to re-visit what I do in that case so that
2427 // symbols with the same name but that live in different packages get
2428 // processed nicely. I rather suspect that realiable serialization in that
2429 // context is an issue that was originally only half thought through...
2430             if (!descend_symbols) goto up;
2431             w = p;
2432             p = qpname(p);
2433             setpname(w, b);
2434             b = reinterpret_cast<LispObject>(pnameaddr(w)) + BACKPOINTER_SYMBOL;
2435             goto down;
2436 
2437         case TAG_VECTOR:
2438             if (address_used(p - TAG_VECTOR))
2439             {   hash_insert(&repeat_hash, p);
2440                 goto up;
2441             }
2442             mark_address_as_used(p - TAG_VECTOR);
2443 // Some vectors hold binary, some lists and a few have a small number of
2444 // lists in their first few cells and binary data beyond that. It is
2445 // necessary to decode the header to see which case applies. The same
2446 // issue will arise for (boxed) numbers.
2447             h = vechdr(p);
2448             if (vector_holds_binary(h)) goto up;
2449             if (is_mixed_header(h)) len = 4*CELL;
2450             else len = length_of_header(h);
2451             if (len == CELL) goto up;
2452 // len in the length in bytes including the size of the header. For "mixed"
2453 // vectors (most notably stream objects) it represents one cell of header and
2454 // three of lisp data, which are thought of as having indexes 0, 1 and 2.
2455             w = p + len - CELL - TAG_VECTOR;
2456             p = *reinterpret_cast<LispObject *>(w);
2457             *reinterpret_cast<LispObject *>(w) = b;
2458             b = w + BACKPOINTER_VECTOR;
2459             goto down;
2460 
2461         case TAG_NUMBERS:
2462             if (address_used(p - TAG_NUMBERS))
2463             {   hash_insert(&repeat_hash, p);
2464                 goto up;
2465             }
2466             mark_address_as_used(p - TAG_NUMBERS);
2467             h = numhdr(p);
2468             if (vector_holds_binary(h)) goto up;
2469             len = length_of_header(h);
2470             if (len == CELL) goto up;  // should never happen
2471             w = p + len - CELL - TAG_NUMBERS;
2472             p = *reinterpret_cast<LispObject *>(w);
2473             *reinterpret_cast<LispObject *>(w) = b;
2474             b = w + BACKPOINTER_VECTOR;
2475             goto down;
2476 
2477         case TAG_BOXFLOAT:
2478             if (address_used(p - TAG_BOXFLOAT))
2479             {   hash_insert(&repeat_hash, p);
2480                 goto up;
2481             }
2482             mark_address_as_used(p - TAG_BOXFLOAT);
2483 // A boxed float never contains further pointers, so there is no more
2484 // to do here.
2485             goto up;
2486 
2487         case TAG_FIXNUM:
2488         case TAG_HDR_IMMED:
2489 // Immediate data (eg small integers, characters) ought not to need any more.
2490             goto up;
2491 
2492         case TAG_FORWARD:
2493 // Forwarding addresses should only be present while the garbage collector
2494 // is active, and so ought not to be found. I will print a message and
2495 // basically ignore them.
2496             std::fprintf(stderr,
2497                 "\n+++ Forwarding address detected in heap scan from %s\n",
2498                 trigger);
2499             goto up;
2500     }
2501 
2502 up:
2503     switch (b & BACKPOINTER_MASK)
2504     {   default:
2505         case BACKPOINTER_CDR:
2506 // This is where I had just finished scanning the CAR of a cell and now
2507 // need to deal with the CDR.
2508             w = cdr(b - BACKPOINTER_CDR + TAG_CONS);
2509             setcdr(b - BACKPOINTER_CDR + TAG_CONS, p);
2510             p = car(b - BACKPOINTER_CDR + TAG_CONS);
2511             setcar(b - BACKPOINTER_CDR + TAG_CONS, w);
2512             b = b + BACKPOINTER_CAR - BACKPOINTER_CDR;
2513             goto down;
2514 
2515         case BACKPOINTER_CAR:
2516 // The termination of the back-pointer chain is to address zero as if one
2517 // had come down the CDR side of it.
2518             if (b == 0 + BACKPOINTER_CAR) return; // finished!
2519 // I have just finished the CDR, so now I can repair the structure and go
2520 // up another level.
2521             w = b - BACKPOINTER_CAR + TAG_CONS;
2522             b = car(w);
2523             setcar(w, p);
2524             p = w;
2525             goto up;
2526 
2527         case BACKPOINTER_SYMBOL:
2528 // Here I am returning to a symbol. I keep this case separate from
2529 // the more general VECTOR case both because symbols are common and
2530 // because their headers are formatted differently to other vectors, so
2531 // this simplifies the task of sorting out how to re-tag things.
2532             w = *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL);
2533             *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL) = p;
2534             b = b - CELL;
2535             p = *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL);
2536             if (is_symbol_header_full_test(p))
2537             {   p = b - BACKPOINTER_SYMBOL + TAG_SYMBOL;
2538                 b = w;
2539                 goto up;
2540             }
2541             *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL) = w;
2542             goto down;
2543 
2544         case BACKPOINTER_VECTOR:
2545 // I have processed an item that was in a vector-like object and now need
2546 // to scan back to the previous one. When one reaches the start and ascends
2547 // the nature of the header that is found will determine whether the object
2548 // had originally been tagged as SYMBOL, VECTOR or NUMBERS. Note that the
2549 // fact that I am RETURNING to a vector means it must have been a vector
2550 // that contained pointers...
2551             w = *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR);
2552             *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR) = p;
2553             b = b - CELL;
2554             p = *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR);
2555 // The item I am scanning back over is either a number (in fact a ratio
2556 // or a complex number) in which case it will need to be re-tagged with
2557 // TAG_NUMBERS, and the test here detects its header...
2558             if (is_number_header_full_test(p))
2559             {   p = b - BACKPOINTER_VECTOR + TAG_NUMBERS;
2560                 b = w;
2561                 goto up;
2562             }
2563 // .. or some other sort of vector, with the header here identified as
2564 // not representing immediate user date nor being the header of a symbol.
2565 // The numeric cases have already been filtered out.
2566             if (is_vector_header_full_test(p))
2567             {   p = b - BACKPOINTER_VECTOR + TAG_VECTOR;
2568                 b = w;
2569                 goto up;
2570             }
2571             *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR) = w;
2572             goto down;
2573     }
2574 }
2575 
2576 // The code here requires that repeat_hash has been created by a previous
2577 // use of scan_data. Every item that has multiple references to it will
2578 // be in there. The first time an object is visited then hash table entry
2579 // will exist but has zero in its data field.
2580 
2581 // The code here has two HUGE delicacies. The first is that it uses the
2582 // bottom two bits of pointers in its back-pointer chain, and so while
2583 // it is working these are NOT valid in the normal CSL sense, and garbage
2584 // collection would FAIL. And probably fail messily. This should in fact be
2585 // OK because writing data here ought never to try to allocate store, and
2586 // hance should never trigger garbage collection. But I ought to try to
2587 // remember that I must never permit any asynchronous event to interrupt
2588 // what happens here. The second issue is that once this code has started
2589 // reversing pointers it must be allowed to run to completion so it restores
2590 // them. Well what could disturb it? In the short term there are two
2591 // possibilities. One is "^C" or equivalent and the other is a failure
2592 // report from the file-writing layer. In a potential multi-threaded
2593 // future work done in a different thread could be an issue, so this whole
2594 // body of code will need to go within a critical section...
2595 
2596 // I now note that when used via the FASL mechanism this code will be give
2597 // a list of all the material that has to go in the module. And then for
2598 // the savedef stuff again it will be given a list whose length is equal to
2599 // the number of functions begin defined. At present that will generate a
2600 // faiurly long stream of "list4*" opcodes, and it seems clear that having
2601 // a general case for long unshared lists would at least count as tidy.
2602 // I may think about implementing that later, but the absolute space overhead
2603 // here is only 2 bits per function being defined in each list, so it may
2604 // not be a disaster even if it looks a little ugly.
2605 
write_data(LispObject p)2606 void write_data(LispObject p)
2607 {   LispObject b = 0 + BACKPOINTER_CAR, w;
2608     uintptr_t len;
2609     int64_t w64;
2610     Header h;
2611     LispObject tail1, tail2, tail3, tail4;
2612     size_t i;
2613     size_t i2=-1, i3=-1, i4=-1;
2614 down:
2615     if (p == 0) p = SPID_NIL; // reload as a SPID.
2616     else if (p == nil)
2617     {   write_delayed(SER_NIL, "nil");
2618         goto up;
2619     }
2620     if ((i = hash_lookup(&repeat_hash, p)) != (size_t)(-1))
2621     {   if (hash_get_value(&repeat_hash, i) != 0)
2622         {   size_t n = find_index_in_repeats(i);
2623             char msg[40];
2624 #ifdef DEBUG_SERIALIZE
2625             std::sprintf(msg, "back %" PRIuPTR, (uintptr_t)n);
2626 #endif // DEBUG_SERIALIZE
2627             if (n <= 32) write_delayed(SER_BACKREF0 + n - 1, msg);
2628             else if (n <= 64) write_delayed(SER_BACKREF1 + n - 33, msg);
2629             else
2630             {   write_opcode(SER_BIGBACKREF, msg);
2631                 write_u64(n - 65);
2632             }
2633             goto up;
2634         }
2635 // If this is the first visit to an item that will be repeated then I
2636 // need to record where it will live in the table of repeats.
2637         find_index_in_repeats(i);
2638     }
2639 // Here we will have i==-1 if the object is not going to be referenced again.
2640     switch (p & TAG_BITS)
2641     {   default:
2642         case TAG_CONS:
2643 // Here I have (CONS b a) but I need to look ahead to see if the CDR
2644 // field contains a further cons, and if so whether it is shared. I
2645 // should look ahead far enough that I can detect the following cases,
2646 // which I list in order of preference. For each CONS listed I need to
2647 // know that it is not one that has already been dumped, but also if
2648 // there will be a back-reference to it somewhere later in the file.
2649 // I fear that this code is going to be somewhat ugly.
2650 //         (CONS d (CONS c (CONS b (CONS a nil))))
2651 //         (CONS e (CONS d (CONS c (CONS b a))))
2652 //         (CONS c (CONS b (CONS a nil)))
2653 //         (CONS d (CONS c (CONS b a)))
2654 //         (CONS b (CONS a nil))
2655 //         (CONS c (CONS b a))
2656 //         (CONS a nil)
2657 //         (CONS b a)
2658 // niltail will do what is sort of obvious from its name
2659             i2 = i3 = i4 = (size_t)(-1);
2660             tail1 = cdr(p);
2661 // Let me talk through the logic of the next lines. I will consider the tail
2662 // of the list something I can consolidate into one of my more elaborate
2663 // composite list-building opcoded subject to
2664 //  (a) its must be another CONS cell (and being nil instead is special!)
2665 //  (b) AND either (b.1) it must have no repeated references to it at all
2666 //              OR (b.2) this is the first time I have seen it.
2667 // In other cases I must stop here. There is a further wrinkle. The
2668 // check for "have I seen this before" is based on me having called
2669 // find_index_in_repeats and while I have done that for the first cell of
2670 // the chain I have not done so for the subsequent cells, so I will need
2671 // to check against those explicitly.
2672 //
2673 // I write all those opcode using write_delayed and that means that
2674 // sequences of them end up run-length encoded. SER_REPEAT plus LIST4*
2675 // combine to allow a compact representation of very long lists. Because
2676 // this works reasonably well I am not going to have a "LIST_n" opcode
2677 // to cope with that case.
2678 //
2679 // First check if the case that I have is (CONS a nil)...
2680             if (tail1 == nil)
2681             {   if (i != (size_t)-1)
2682                     write_delayed(SER_L_A, "ncons that will be re-used");
2683                 else write_delayed(SER_L_a, "ncons");
2684                 w = p;
2685                 p = car(p);
2686                 setcar(w, b);
2687                 b = w - TAG_CONS + BACKPOINTER_CAR;
2688                 goto down;
2689             }
2690 // Now I have (CONS b a). If a is not a CONS or if it is one that has
2691 // multiple references and this is not the first time it has been visited
2692 // then I do not have scope for further optimisaion so use just the SER_CONS
2693 // code.
2694             if (!is_cons(tail1) ||
2695                 ((i2 = hash_lookup(&repeat_hash, tail1)) != (size_t)(-1) &&
2696                  hash_get_value(&repeat_hash, i2) != 0))
2697             {   // Write out just (CONS b a)
2698                 if (i != (size_t)-1)
2699                     write_delayed(SER_L_A_S, "cons that will be re-used");
2700                 else write_delayed(SER_L_a_S, "cons");
2701                 w = p;
2702                 p = cdr(p);
2703                 setcdr(w, b);
2704  // Reverse pointers with the back-pointer being tagged with 0
2705                 b = w - TAG_CONS + BACKPOINTER_CDR;
2706                 goto down;
2707             }
2708             tail2 = cdr(tail1);
2709             if (tail2 == nil)
2710             {
2711 // Here the case I have is (LIST b a) and either or both of the CONS cells
2712 // concerned may need to be entered into the table of shared items.
2713                 write_delayed(
2714                    i==(size_t)(-1) ?
2715                      (i2==(size_t)(-1) ? SER_L_aa : SER_L_aA) :
2716                      (i2==(size_t)(-1) ? SER_L_Aa : SER_L_AA), "list2");
2717                 if (i2 != (size_t)(-1)) find_index_in_repeats(i2);
2718                 setcdr(p, b);
2719                 b = p - TAG_CONS + BACKPOINTER_CDR;
2720                 p = car(tail1);
2721                 setcar(tail1, b);
2722                 b = tail1 - TAG_CONS + BACKPOINTER_CAR;
2723                 goto down;
2724             }
2725 // Here is (LIST!* c b a). If a is not a cons or is a back-reference
2726 // (and it is not a reference to the first CONS in this block) I can do no
2727 // more.
2728             if (!is_cons(tail2) ||
2729                 tail2 == tail1 ||
2730                 ((i3 = hash_lookup(&repeat_hash, tail2)) != (size_t)(-1) &&
2731                   hash_get_value(&repeat_hash, i3) != 0))
2732             {   write_delayed(
2733                    i==(size_t)(-1) ?
2734                      (i2==(size_t)(-1) ? SER_L_aa_S : SER_L_aA_S) :
2735                      (i2==(size_t)(-1) ? SER_L_Aa_S : SER_L_AA_S), "list2*");
2736                 if (i2 != (size_t)(-1)) find_index_in_repeats(i2);
2737                 setcdr(p, b);
2738                 b = p - TAG_CONS + BACKPOINTER_CDR;
2739                 p = tail2;
2740                 setcdr(tail1, b);
2741                 b = tail1 - TAG_CONS + BACKPOINTER_CDR;
2742                 goto down;
2743             }
2744             tail3 = cdr(tail2);
2745             if (tail3 == nil)
2746             {   write_delayed(
2747                    i==(size_t)(-1) ?
2748                      (i2==(size_t)(-1) ?
2749                        (i3==(size_t)(-1) ? SER_L_aaa : SER_L_aaA) :
2750                        (i3==(size_t)(-1) ? SER_L_aAa : SER_L_aAA)) :
2751                      (i2==(size_t)(-1) ?
2752                        (i3==(size_t)(-1) ? SER_L_Aaa : SER_L_AaA) :
2753                        (i3==(size_t)(-1) ? SER_L_AAa : SER_L_AAA)), "list3");
2754                 if (i2 != (size_t)(-1)) find_index_in_repeats(i2);
2755                 if (i3 != (size_t)(-1)) find_index_in_repeats(i3);
2756                 setcdr(p, b);
2757                 b = p - TAG_CONS + BACKPOINTER_CDR;
2758                 setcdr(tail1, b);
2759                 b = tail1 - TAG_CONS + BACKPOINTER_CDR;
2760                 p = car(tail2);
2761                 setcar(tail2, b);
2762                 b = tail2 - TAG_CONS + BACKPOINTER_CAR;
2763                 goto down;
2764             }
2765 // I must now use this LIST!* case if any of the CONS cells other than the
2766 // very first one have multiple references. That is because the "list4" and
2767 // "list4*" opcodes only allow tagging the first cons as shared.
2768             if (!is_cons(tail3) ||
2769                 tail3 == tail2 ||
2770                 tail3 == tail1 ||
2771                 i2 != (size_t)(-1) ||
2772                 i3 != (size_t)(-1) ||
2773                 ((i4 = hash_lookup(&repeat_hash, tail3)) != (size_t)(-1) &&
2774                  hash_get_value(&repeat_hash, i4) != 0))
2775             {   write_delayed(
2776                    i==(size_t)(-1) ?
2777                      (i2==(size_t)(-1) ?
2778                        (i3==(size_t)(-1) ? SER_L_aaa_S : SER_L_aaA_S) :
2779                        (i3==(size_t)(-1) ? SER_L_aAa_S : SER_L_aAA_S)) :
2780                      (i2==(size_t)(-1) ?
2781                        (i3==(size_t)(-1) ? SER_L_Aaa_S : SER_L_AaA_S) :
2782                        (i3==(size_t)(-1) ? SER_L_AAa_S : SER_L_AAA_S)),
2783                     "list3*");
2784                 if (i2 != (size_t)(-1)) find_index_in_repeats(i2);
2785                 if (i3 != (size_t)(-1)) find_index_in_repeats(i3);
2786                 setcdr(p, b);
2787                 b = p - TAG_CONS + BACKPOINTER_CDR;
2788                 setcdr(tail1, b);
2789                 b = tail1 - TAG_CONS + BACKPOINTER_CDR;
2790                 p = cdr(tail2);
2791                 setcdr(tail2, b);
2792                 b = tail2 - TAG_CONS + BACKPOINTER_CDR;
2793                 goto down;
2794             }
2795             tail4 = cdr(tail3);
2796             if (tail4 == nil)
2797             {   write_delayed(
2798                    i==(size_t)(-1) ? SER_L_aaaa : SER_L_Aaaa, "list4");
2799                 setcdr(p, b);
2800                 b = p - TAG_CONS + BACKPOINTER_CDR;
2801                 setcdr(tail1, b);
2802                 b = tail1 - TAG_CONS + BACKPOINTER_CDR;
2803                 setcdr(tail2, b);
2804                 b = tail2 - TAG_CONS + BACKPOINTER_CDR;
2805                 p = car(tail3);
2806                 setcar(tail3, b);
2807                 b = tail3 - TAG_CONS + BACKPOINTER_CAR;
2808                 goto down;
2809             }
2810             write_delayed(
2811                    i==(size_t)(-1) ? SER_L_aaaa_S : SER_L_Aaaa_S, "list4*");
2812             setcdr(p, b);
2813             b = p - TAG_CONS + BACKPOINTER_CDR;
2814             setcdr(tail1, b);
2815             b = tail1 - TAG_CONS + BACKPOINTER_CDR;
2816             setcdr(tail2, b);
2817             b = tail2 - TAG_CONS + BACKPOINTER_CDR;
2818             p = cdr(tail3);
2819             setcdr(tail3, b);
2820             b = tail3 - TAG_CONS + BACKPOINTER_CDR;
2821             goto down;
2822 
2823         case TAG_SYMBOL:
2824             if (!descend_symbols)
2825             {   w = qpname(p);
2826                 char msg[40];
2827                 bool isgensym = false;
2828                 size_t n = length_of_byteheader(vechdr(w)) - CELL;
2829 // If I have a gensym that has been printed then its pname field
2830 // holds a string of the form
2831 //      GGGnnnn_nnn_nnn_nnn
2832 // with some number of groups of 3 digits on the end and then a group
2833 // of 4. This code loses that part of the name.
2834                 if ((isgensym = (qheader(p) & SYM_ANY_GENSYM) != 0) &&
2835                     (qheader(p) & SYM_UNPRINTED_GENSYM) == 0)
2836                 {   while (basic_celt(w, n-4) == '_') n -= 4;
2837                     n -= 4;
2838                 }
2839                 if (isgensym)
2840                 {   if (i != (size_t)-1)
2841                     {
2842 #ifdef DEBUG_SERIALIZE
2843                         std::sprintf(msg, "dup-gensym, length=%" PRIuPTR, (uintptr_t)n);
2844 #endif // DEBUG_SERIALIZE
2845                         write_opcode(SER_DUPGENSYM, msg);
2846                     }
2847                     else
2848                     {
2849 #ifdef DEBUG_SERIALIZE
2850                         std::sprintf(msg, "gensym, length=%" PRIuPTR, (uintptr_t)n);
2851 #endif // DEBUG_SERIALIZE
2852                         write_opcode(SER_GENSYM, msg);
2853                     }
2854                 }
2855                 else
2856                 {   if (i != (size_t)-1)
2857                     {
2858 #ifdef DEBUG_SERIALIZE
2859                         std::sprintf(msg, "dup-symbol, length=%" PRIuPTR, (uintptr_t)n);
2860 #endif // DEBUG_SERIALIZE
2861                         write_opcode(SER_DUPSYMBOL, msg);
2862                     }
2863                     else
2864                     {
2865 #ifdef DEBUG_SERIALIZE
2866                         std::sprintf(msg, "symbol, length=%" PRIuPTR, (uintptr_t)n);
2867 #endif // DEBUG_SERIALIZE
2868                         write_opcode(SER_SYMBOL, msg);
2869                     }
2870                 }
2871                 write_u64(n);  // number of bytes in the name
2872                 for (size_t i=0; i<n; i++)
2873                 {   int c = basic_celt(w, i) & 0xff;
2874                     char msg[40];
2875 #ifdef DEBUG_SERIALIZE
2876                     if (0x20 < c && c <= 0x7e) std::sprintf(msg, "'%c'", c);
2877                     else std::sprintf(msg, "%#.2x", c);
2878 #endif // DEBUG_SERIALIZE
2879                     write_byte(c, msg);
2880                 }
2881                 goto up;
2882             }
2883             if (i != (size_t)-1)
2884                 write_opcode(SER_DUPRAWSYMBOL, "raw symbol header");
2885             else write_opcode(SER_RAWSYMBOL, "raw symbol header");
2886 // Here I need to cope with the tagging bits and function cells, and
2887 // the count field... Each of these uses a variable length coding scheme that
2888 // will be 1 byte long in easy cases but can cope with 2^64 possibilities in
2889 // all if necessary.
2890             write_u64(((uint64_t)qheader(p))>>(Tw+4));
2891             write_function0(qfn0(p));
2892             write_function1(qfn1(p));
2893             write_function2(qfn2(p));
2894             write_function3(qfn3(p));
2895             write_function4up(qfn4up(p));
2896             write_u64(qcount(p));
2897             w = p;
2898             p = qpname(p);
2899             setpname(w, b);
2900             b = reinterpret_cast<LispObject>(pnameaddr(w)) + BACKPOINTER_SYMBOL;
2901             goto down;
2902 
2903         case TAG_VECTOR:
2904 // Some vectors hold binary, some lists and a few have a small number of
2905 // lists in their first few cells and binary data beyond that. It is
2906 // necessary to decode the header to see which case applies. The same
2907 // issue will arise for (boxed) numbers.
2908             h = vechdr(p);
2909         writevector:
2910             if (vector_holds_binary(h)) goto write_binary_vector;
2911             write_opcode(SER_LVECTOR | ((h>>(Tw+2)) & 0x1f), "lisp vector");
2912 // Length of a list-containing vector is given in CELLS.
2913             write_u64(length_of_header(h)/CELL - 1);
2914 // Observe that for vectors containing List data the DUP comes after the
2915 // SER_LVECTOR opcode but before the sequences that fill in vector contents.
2916             if (i != (size_t)-1)
2917                 write_opcode(SER_DUP, "repeatedly referenced vector");
2918 // For now the data beyond the 3 list-holding items in a MIXED structure
2919 // will not be dumped. I may need to review that later on.
2920             if (is_mixed_header(h)) len = 3;
2921             else len = length_of_header(h)/CELL - 1;
2922 // len in the length in bytes including the size of the header. For "mixed"
2923 // vectors (most notably stream objects) it represents one cell of header and
2924 // three of lisp data. The "-1" on the next line is because elements run from
2925 // 0 to len-1 rather than from 1 to len.
2926             if (len == 0) goto up; // NB special case
2927             w = reinterpret_cast<LispObject>(&basic_elt(p, len-1));
2928             p = *reinterpret_cast<LispObject *>(w);
2929             *reinterpret_cast<LispObject *>(w) = b;
2930             b = w + BACKPOINTER_VECTOR;
2931             goto down;
2932 
2933         write_binary_vector:
2934 // I need to separate off bitvectors and short strings here since they
2935 // get special treatment.
2936             if (is_string_header(h) &&
2937                 (len = length_of_byteheader(h) - CELL) <= 32 &&
2938                 len != 0)
2939             {   char msg[40];
2940 #ifdef DEBUG_SERIALIZE
2941                 std::sprintf(msg, "string, length=%" PRIuPTR, (intptr_t)len);
2942 #endif // DEBUG_SERIALIZE
2943                 write_opcode(SER_STRING+len-1, msg);
2944                 for (size_t j=0; j<len; j++)
2945                 {   int c = basic_ucelt(p, j);
2946 #ifdef DEBUG_SERIALIZE
2947                     if (0x20 < c && c <= 0x7e) std::sprintf(msg, "'%c'", c);
2948                     else std::sprintf(msg, "%#.2x", c);
2949 #endif // DEBUG_SERIALIZE
2950                     write_byte(c, msg);
2951                 }
2952                 if (i != (size_t)-1) write_opcode(SER_DUP, "dup string");
2953                 goto up;
2954             }
2955             else if (is_bitvec_header(h))
2956             {   char msg[40];
2957                 len = length_of_bitheader(h);
2958 #ifdef DEBUG_SERIALIZE
2959                 std::sprintf(msg, "bitvec, length=%" PRIuPTR, (intptr_t)len);
2960 #endif // DEBUG_SERIALIZE
2961                 write_opcode(SER_BITVEC, msg);
2962                 write_u64(len);
2963                 len = (len + 7)/8;
2964                 for (size_t j=0; j<len; j++)
2965                 {   int c = basic_ucelt(p, j);
2966 #ifdef DEBUG_SERIALIZE
2967                     for (int k=0; k<8; k++)
2968                         msg[k] = (c & (1<<k)) != 0 ? '1' : '0';
2969                     msg[8] = 0;
2970 #endif // DEBUG_SERIALIZE
2971                     write_byte(c, msg);
2972                 }
2973                 if (i != (size_t)-1) write_opcode(SER_DUP, "dup bitvector");
2974                 goto up;
2975             }
2976 // If I have a big-integer that uses at most two (32-bit) words then
2977 // I can transmit it as a big fixnum. I know that a 3-word bignum can
2978 // sometimes fit within 64-bits, but I do not detect and handle that
2979 // case here. The main concern I have here is that if I move to making
2980 // fixnums 60-bits wide in the future that the range that they cover is
2981 // handled nicely, and here it is!
2982 // Observe that the type returned by bignum_digits(p)[n] is uint32_t and
2983 // that although most digits in a bignum are unsigned the most significant
2984 // one must be treated as signed. So I cast to int32_t before casting to
2985 // int64_t to ensure that the sign gets propagated the way I need it to.
2986             else if (is_bignum_header(h))
2987             {   if (length_of_header(h) == CELL+4)
2988                 {   int64_t n = (int32_t)bignum_digits(p)[0];
2989                     char msg[40];
2990 #ifdef DEBUG_SERIALIZE
2991                     std::sprintf(msg, "int value=%" PRId64, n);
2992 #endif // DEBUG_SERIALIZE
2993                     if (n < 0)
2994                         write_delayed_with_arg(SER_NEGFIXNUM, -n-1, msg);
2995                     else write_delayed_with_arg(SER_POSFIXNUM, n, msg);
2996                     if (i != (size_t)-1) write_opcode(SER_DUP, "dup bignum");
2997                     goto up;
2998                 }
2999                 else if (length_of_header(h) == CELL+8)
3000                 {   int64_t n = (int32_t)bignum_digits(p)[0] |
3001                                 ((int64_t)(int32_t)bignum_digits(p)[1] << 31);
3002                     char msg[40];
3003 #ifdef DEBUG_SERIALIZE
3004                     std::sprintf(msg, "int value=%" PRId64, n);
3005 #endif // DEBUG_SERIALIZE
3006 // The value I have here fitted within two bignum digits and so is really at
3007 // most 62 bits. A consequence of that is that negating it can not lead to
3008 // arithmetic overflow within a signed 64-bit word. Whew!
3009                     if (n < 0)
3010                         write_delayed_with_arg(SER_NEGFIXNUM, -n-1, msg);
3011                     else write_delayed_with_arg(SER_POSFIXNUM, n, msg);
3012                     if (i != (size_t)-1) write_opcode(SER_DUP, "dup bignum");
3013                     goto up;
3014                 }
3015 // I will treat bignums with 3 or more words using a general scheme
3016 // and this will include cases like the values + and - 0x8000000000000000LL
3017 // which I might otherwise need to think about carefully to ensure that
3018 // processing with 64-bit integers did not cause trouble with overflow.
3019             }
3020 //
3021 // The general code here writes out a vector where its contents are
3022 // binary data. This needs to use separate code for each sort of data
3023 // so that the serialized version is transmitted using a standard order
3024 // of bytes. Also for vectors that hold bytes or halfwords the number
3025 // of units to transmit has to be computed in the light of the full
3026 // header word.
3027 // Hahaha there is a trap here. For vectors that hold bytes or halfword values
3028 // part of the length ends up in the bits that otherwise specify the type
3029 // of stuff in use. But the code that allocates a new vector thinks that
3030 // it will insert this information too. Unless steps are taken to arrange
3031 // that information is sent just once I can get into trouble. The way I will
3032 // work around this is here in the writing-code. The "type" field will be
3033 // forced to be (eg) TYPE_STRING_4 which should then make all behave OK.
3034             {   Header h1 = h;
3035                 if (vector_i8(h1)) h1 |= (0x60 << Tw);
3036                 else if (vector_i16(h1)) h1 |= (0x40 << Tw);
3037                 write_opcode(SER_BVECTOR | ((h1>>(Tw+2)) & 0x1f), "binary vector");
3038             }
3039 // The code that follows must match up with the code that reads vectors
3040 // back in. It has to convert data to a portable form that is agnostic
3041 // to little vs. big-endian architectures.
3042 // Also note that the "vector" may be tagged as TAG_VECTOR or TAG_NUMBERS and
3043 // so I need code that uses a mask operation to address its start.
3044             if (vector_i8(h))
3045             {   unsigned char *x = reinterpret_cast<unsigned char *>(start_contents(p));
3046                 write_u64(len = length_of_byteheader(h) - CELL);
3047 // I *could* detect strings etc here to display the comments more tidily,
3048 // but since they are just for debugging that seems like too much work
3049 // for today. I also transmit as unsigned bytes regardless of whether the
3050 // final use will be signed or unsigned.
3051                 for (size_t i=0; i<len; i++)
3052                     write_byte(*x++, "part of vec8/string");
3053             }
3054             else if (vector_i32(h))
3055             {   uint32_t *x = (uint32_t *)start_contents(p);
3056 // The packed length is the length in words.
3057                 write_u64(len = (length_of_header(h) - CELL)/4);
3058 // 32-bit integers are transmitted most significant byte first.
3059                 for (size_t i=0; i<len; i++)
3060                 {   uint32_t q = *x++;
3061                     write_byte((q>>24) & 0xff, "high byte");
3062                     write_byte((q>>16) & 0xff, "3");
3063                     write_byte((q>>8) & 0xff, "2");
3064                     write_byte(q & 0xff, "low byte");
3065                 }
3066             }
3067             else if (vector_f64(h))
3068             {   double *x = reinterpret_cast<double *>(start_contents64(p));
3069                 write_u64(len = (length_of_header(h) - CELL)/8);
3070                 for (size_t i=0; i<len; i++) write_f64(*x++);
3071             }
3072             else if (vector_i16(h))
3073             {   std::uint16_t *x = reinterpret_cast<std::uint16_t *>(start_contents(p));
3074                 write_u64(len = length_of_hwordheader(h) - CELL/2);
3075                 for (size_t i=0; i<len; i++)
3076                 {   uint32_t q = *x++;
3077                     write_byte((q>>8) & 0xff, "high byte");
3078                     write_byte(q & 0xff, "low byte");
3079                 }
3080             }
3081             else if (vector_i64(h))
3082             {   uint64_t *x = (uint64_t *)start_contents64(p);
3083                 write_u64(len = (length_of_header(h) - CELL)/8);
3084 // 64-bit integers are transmitted most significant byte first.
3085                 for (size_t i=0; i<len/8; i++)
3086                 {   uint64_t q = *x++;
3087                     write_byte((q>>56) & 0xff, "high byte");
3088                     write_byte((q>>48) & 0xff, "7");
3089                     write_byte((q>>40) & 0xff, "6");
3090                     write_byte((q>>32) & 0xff, "5");
3091                     write_byte((q>>24) & 0xff, "4");
3092                     write_byte((q>>16) & 0xff, "3");
3093                     write_byte((q>>8) & 0xff, "2");
3094                     write_byte(q & 0xff, "low byte");
3095                 }
3096             }
3097             else if (vector_f32(h))
3098             {   float *x = reinterpret_cast<float *>(start_contents(p));
3099                 write_u64(len = (length_of_header(h) - CELL)/4);
3100                 for (size_t i=0; i<len/4; i++) write_f32(*x++);
3101             }
3102 #ifdef HAVE_SOFTFLOAT
3103             else if (vector_f128(h))
3104             {   std::fprintf(stderr, "128-bit float arrays not supported (yet?)\n");
3105                 my_abort("128-bit float arrays");
3106             }
3107 #endif // HAVE_SOFTFLOAT
3108             else if (vector_i128(h))
3109             {   std::fprintf(stderr, "128-bit integer arrays not supported (yet?)\n");
3110                 my_abort("128-bit integer arrays");
3111             }
3112             else
3113             {   std::fprintf(stderr, "Vector code is impossible\n");
3114                 my_abort("bad vector type");
3115             }
3116             if (i != (size_t)-1)
3117                 write_opcode(SER_DUP, "repeatedly ref. vector");
3118             goto up;
3119 
3120         case TAG_NUMBERS:
3121             h = numhdr(p);
3122             goto writevector;
3123 
3124         case TAG_BOXFLOAT:
3125             switch (type_of_header(flthdr(p)))
3126             {   case TYPE_SINGLE_FLOAT:
3127                 {   char msg[40];
3128 #ifdef DEBUG_SERIALIZE
3129                     std::sprintf(msg, "float %.7g", static_cast<double>(single_float_val(p)));
3130 #endif // DEBUG_SERIALIZE
3131                     write_opcode(SER_FLOAT32, msg);
3132                     write_f32(single_float_val(p));
3133                 }
3134                 break;
3135                 case TYPE_DOUBLE_FLOAT:
3136                 {   char msg[40];
3137 #ifdef DEBUG_SERIALIZE
3138                     std::sprintf(msg, "double %.16g", double_float_val(p));
3139 #endif // DEBUG_SERIALIZE
3140                     write_opcode(SER_FLOAT64, msg);
3141                     write_f64(double_float_val(p));
3142                 }
3143                 break;
3144 #ifdef HAVE_SOFTFLOAT
3145                 case TYPE_LONG_FLOAT:
3146                 {   char msg[40];
3147 // At present I do not have a good scheme to display the 128-bit float value.
3148 #ifdef DEBUG_SERIALIZE
3149                     std::sprintf(msg, "long double");
3150 #endif // DEBUG_SERIALIZE
3151                     write_opcode(SER_FLOAT128, msg);
3152                     write_f128(long_float_val(p));
3153                 }
3154                 break;
3155 #endif // HAVE_SOFTFLOAT
3156                 default:
3157                     std::fprintf(stderr, "floating point representation not recognized\n");
3158                     my_abort("unknown floating  point format");
3159             }
3160             if (i != (size_t)-1)
3161                 write_opcode(SER_DUP, "repeatedly referenced vector");
3162 // A boxed float never contains further pointers, so there is no more
3163 // to do here.
3164             goto up;
3165 
3166         case TAG_FIXNUM:
3167             w64 = int_of_fixnum(p);
3168             if (-16 <= w64 && w64 < 15)
3169             {   char msg[40];
3170 #ifdef DEBUG_SERIALIZE
3171                 std::sprintf(msg, "int, value=%d", static_cast<int>(w64));
3172 #endif // DEBUG_SERIALIZE
3173                 write_delayed(SER_FIXNUM | (static_cast<int>(w64) & 0x1f), msg);
3174             }
3175             else
3176             {   char msg[40];
3177 #ifdef DEBUG_SERIALIZE
3178                 std::sprintf(msg, "int value=%" PRId64, w64);
3179 #endif // DEBUG_SERIALIZE
3180                 if (w64 < 0)
3181                     write_delayed_with_arg(SER_NEGFIXNUM, -w64-1, msg);
3182                 else
3183                     write_delayed_with_arg(SER_POSFIXNUM, w64, msg);
3184             }
3185             goto up;
3186 
3187         case TAG_HDR_IMMED:
3188 // Immediate data (eg characters and SPIDs).
3189         {   char msg[40];
3190             uint64_t nn = ((uint64_t)p) >> (Tw+2);
3191 #ifdef DEBUG_SERIALIZE
3192             std::sprintf(msg, "char/spid, value=%#" PRIx64, (uint64_t)p);
3193 #endif // DEBUG_SERIALIZE
3194             write_delayed_with_arg(SER_CHARSPID, nn, msg);
3195         }
3196         goto up;
3197 
3198         case TAG_FORWARD:
3199 // Forwarding addresses should only be present while the garbage collector
3200 // is active, and so ought not to be found. I will print a message and
3201 // basically ignore them.
3202             std::fprintf(stderr,
3203                 "\n+++ Forwarding address detected in heap scan from %s\n",
3204                 trigger);
3205             goto up;
3206     }
3207 
3208 up:
3209     switch (b & BACKPOINTER_MASK)
3210 {       default:
3211         case BACKPOINTER_CDR:
3212 // This is where I had just finished scanning the CDR of a cell and now
3213 // need to deal with the CAR.
3214             w = cdr(b - BACKPOINTER_CDR + TAG_CONS);
3215             setcdr(b - BACKPOINTER_CDR + TAG_CONS, p);
3216             p = car(b - BACKPOINTER_CDR + TAG_CONS);
3217             setcar(b - BACKPOINTER_CDR + TAG_CONS, w);
3218             b = b + BACKPOINTER_CAR - BACKPOINTER_CDR;
3219             goto down;
3220 
3221         case BACKPOINTER_CAR:
3222 // The termination of the back-pointer chain is to address zero as if one
3223 // had come down the CAR side of it.
3224             if (b == 0 + BACKPOINTER_CAR)
3225             {   write_opcode(-1, "Finished");  // Needed to flush any pending
3226                                                // repeated opcode.
3227                 return; // finished!
3228             }
3229 // I have just finished the CAR, so now I can repair the structure and go
3230 // up another level.
3231             w = b - BACKPOINTER_CAR + TAG_CONS;
3232             b = car(w);
3233             setcar(w, p);
3234             p = w;
3235             goto up;
3236 
3237         case BACKPOINTER_SYMBOL:
3238 // Here I am returning to a symbol. I keep this case separate from
3239 // the more general VECTOR case both because symbols are common and
3240 // because their headers are formatted differently to other vectors, so
3241 // this simplifies the task of sorting out how to re-tag things.
3242             w = *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL);
3243             *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL) = p;
3244             b = b - CELL;
3245             p = *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL);
3246             if (is_symbol_header_full_test(p))
3247             {   p = b - BACKPOINTER_SYMBOL + TAG_SYMBOL;
3248                 b = w;
3249                 goto up;
3250             }
3251             *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL) = w;
3252             goto down;
3253 
3254         case BACKPOINTER_VECTOR:
3255 // I have processed an item that was in a vector-like object and now need
3256 // to scan back to the previous one. When one reaches the start and ascends
3257 // the nature of the header that is found will determine whether the object
3258 // had originally been tagged as SYMBOL, VECTOR or NUMBERS. Note that the
3259 // fact that I am RETURNING to a vector means it must have been a vector
3260 // that contained pointers...
3261             w = *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR);
3262             *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR) = p;
3263             b = b - CELL;
3264             p = *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR);
3265 // The item I am scanning back over is either a number (in fact a ratio
3266 // or a complex number) in which case it will need to be re-tagged with
3267 // TAG_NUMBERS, and the test here detects its header...
3268             if (is_number_header_full_test(p))
3269             {   p = b - BACKPOINTER_VECTOR + TAG_NUMBERS;
3270                 b = w;
3271                 goto up;
3272             }
3273 // .. or some other sort of vector, with the header here identified as
3274 // not representing immediate user date nor being the header of a symbol.
3275 // The numeric cases have already been filtered out.
3276             if (is_vector_header_full_test(p))
3277             {   p = b - BACKPOINTER_VECTOR + TAG_VECTOR;
3278                 b = w;
3279                 goto up;
3280             }
3281             *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR) = w;
3282             goto down;
3283     }
3284 }
3285 
3286 class hash_releaser
3287 {
3288 public:
~hash_releaser()3289     ~hash_releaser()
3290     {   hash_finalize(&repeat_hash);
3291         if (repeat_heap != nullptr) delete [] repeat_heap;
3292         repeat_heap = nullptr;
3293         repeat_heap_size = 0;
3294     }
3295 };
3296 
3297 bool setup_codepointers = false;
3298 
Lwrite_module(LispObject env,LispObject a,LispObject b)3299 LispObject Lwrite_module(LispObject env, LispObject a, LispObject b)
3300 {
3301 #ifdef DEBUG_FASL
3302     Save save (a, b);
3303     trace_printf("FASLOUT: ");
3304     errexit();
3305     loop_print_trace(a);
3306     errexit();
3307     trace_printf("\n");
3308     errexit();
3309     loop_print_trace(b);
3310     errexit();
3311     trace_printf("\n");
3312     errexit();
3313     Save restore(a, b);
3314 #endif // DEBUG_FASL
3315     if (!setup_codepointers)
3316     {   set_up_function_tables();
3317         setup_codepointers = true;
3318     }
3319     descend_symbols = false;
3320     hash_init(&repeat_hash, 13); // allow 8K entries to start with.
3321     {   map_releaser RAII;
3322         std::strcpy(trigger, "write-module scan A");
3323         scan_data(a);
3324         std::strcpy(trigger, "write-module scan B");
3325         scan_data(b);
3326     }
3327     writer_setup_repeats();
3328     hash_releaser RAII;
3329     write_u64(repeat_heap_size);
3330 // Note that the serialization code ought not to allocate store or
3331 // do anything that could provoke garbage collection, and so I do not
3332 // need to stack stuff here. Any asynchronous interrupt during serialization
3333 // would be liable to be fatal since the implementation traverses data using
3334 // pointer reversal, so in general much of the heap can be in a corrupted
3335 // state while that is going on.
3336     std::strcpy(trigger, "write-module write a");
3337     write_data(a);
3338     std::strcpy(trigger, "write-module write b");
3339     write_data(b);
3340     return onevalue(nil);
3341 }
3342 
Lserialize(LispObject env,LispObject a)3343 LispObject Lserialize(LispObject env, LispObject a)
3344 {   if (!setup_codepointers)
3345     {   set_up_function_tables();
3346         setup_codepointers = true;
3347     }
3348     descend_symbols = false;
3349     hash_init(&repeat_hash, 13); // allow 8K entries to start with.
3350     {   map_releaser RAII;
3351         std::strcpy(trigger, "serialize scan");
3352         scan_data(a);
3353     }
3354     writer_setup_repeats();
3355     hash_releaser RAII;
3356     write_u64(repeat_heap_size);
3357     std::strcpy(trigger, "serialize write");
3358     write_data(a);
3359     return onevalue(nil);
3360 }
3361 
Lserialize1(LispObject env,LispObject a)3362 LispObject Lserialize1(LispObject env, LispObject a)
3363 {   if (!setup_codepointers)
3364     {   set_up_function_tables();
3365         setup_codepointers = true;
3366     }
3367     descend_symbols = true;
3368     hash_init(&repeat_hash, 13); // allow 8K entries to start with.
3369     {   map_releaser RAII;
3370         std::strcpy(trigger, "serialize1 scan");
3371         scan_data(a);
3372     };
3373     writer_setup_repeats();
3374     hash_releaser RAII;
3375     write_u64(repeat_heap_size);
3376     std::strcpy(trigger, "serialize1 write");
3377     write_data(a);
3378     return onevalue(nil);
3379 }
3380 
3381 // This is a single function that will implement load-module,
3382 // load-source and select-source.
3383 
3384 #define F_LOAD_MODULE     0
3385 #define F_LOAD_SOURCE     1
3386 #define F_SELECTED_SOURCE 2
3387 
load_module(LispObject env,LispObject file,int option)3388 static LispObject load_module(LispObject env, LispObject file, int option)
3389 // load_module() rebinds *package* in COMMON mode, but also note that
3390 // it also rebinds *echo to nil in case we are reading from a stream.
3391 {   save_current_function saver(env);
3392     char filename[LONGEST_LEGAL_FILENAME];
3393     Header h;
3394     size_t len;
3395     bool from_stream = false;
3396     std::memset(filename, 0, sizeof(filename));
3397     if (is_stream(file)) h=0, from_stream = true;
3398     else if (symbolp(file))
3399     {   file = get_pname(file);
3400         h = vechdr(file);
3401     }
3402     else if (!is_vector(file) || !is_string_header(h = vechdr(file)))
3403     {   switch (option)
3404         {
3405         default:
3406             return aerror("load-module");
3407         case F_LOAD_SOURCE:
3408             return aerror("load-source");
3409         case F_SELECTED_SOURCE:
3410             return aerror("load-selected-source");
3411         }
3412     }
3413     current_module = file;
3414     if (from_stream)
3415     {   if (Iopen_from_stdin())
3416         {   err_printf("Failed to load module from stream\n");
3417             return error(1, err_no_fasl, file);
3418         }
3419     }
3420     else
3421     {   len = length_of_byteheader(h) - CELL;
3422         char *modname = reinterpret_cast<char *>(file) + CELL - TAG_VECTOR;
3423         modname = trim_module_name(modname, &len);
3424         if (Iopen(modname, len, IOPEN_IN, filename))
3425         {   err_printf("Failed to find \"%s\"\n", filename);
3426             return error(1, err_no_fasl, file);
3427         }
3428     }
3429 // I will account time spent fast-loading things as "storage management"
3430 // overhead to be counted as "garbage collector time" rather than
3431 // regular "cpu time" unless --ignore-load-time is specified, and then
3432 // I will just ignore it totally.
3433     uint64_t t0 = read_clock();
3434     if (verbos_flag & 2)
3435     {   freshline_trace();
3436         if (option != F_LOAD_MODULE)
3437         {   if (from_stream) trace_printf("Loading source from a stream\n");
3438             else trace_printf("Loading source for \"%s\"\n", filename);
3439         }
3440         else
3441         {   if (from_stream) trace_printf("Fast-loading from a stream\n");
3442             else trace_printf("Fast-loading \"%s\"\n", filename);
3443         }
3444     }
3445     inf_init(); // Ready for reading from compressed stream
3446     LispObject r = nil;
3447     class serializer_tidy
3448     {   LispObject *saveStack;
3449         bool from_stream;
3450         uint64_t t0b;
3451     public:
3452         serializer_tidy(bool fg, uint64_t t0a, LispObject file)
3453         {
3454             if (fg)
3455             {   *++stack = qvalue(standard_input);
3456                 setvalue(standard_input, file);
3457                 *++stack = qvalue(echo_symbol);
3458                 setvalue(echo_symbol, nil);
3459             }
3460             *++stack = qvalue(current_package);
3461             saveStack = stack;
3462             from_stream = fg;
3463             t0b = t0a;
3464         }
3465         ~serializer_tidy()
3466         {   stack = saveStack;
3467 // This is some tidy-up activity that I must always do at the end of
3468 // reading (or trying to read) something.
3469             if (repeat_heap != nullptr) delete [] repeat_heap;
3470             repeat_heap = nullptr;
3471             repeat_heap_size = 0;
3472             setvalue(current_package, *stack--);
3473             inf_finish();
3474             IcloseInput();
3475             if (from_stream)
3476             {   setvalue(echo_symbol, *stack--);
3477                 setvalue(standard_input, *stack--);
3478             }
3479             uint64_t delta = read_clock() - t0b;
3480             if (!ignoreLoadTime) gc_time += delta;
3481             base_time += delta;
3482         }
3483     };
3484     {   serializer_tidy tidy(from_stream, t0, file);
3485         reader_setup_repeats(read_u64());
3486         r = serial_read();
3487 #ifdef DEBUG_SERIALIZE
3488         std::fprintf(stderr, "Re-input: ");
3489         simple_print(r);
3490         std::fprintf(stderr, "\n");
3491 #endif // DEBUG_SERIALIZE
3492         if (r != eof_symbol &&
3493             option != F_LOAD_MODULE) r = serial_read();
3494         if (repeat_heap != nullptr) delete [] repeat_heap;
3495         repeat_heap = nullptr;
3496         repeat_heap_size = 0;
3497     }
3498 // I will process the stuff I just read AFTER I have closed the stream
3499 // etc. That will mean I never try using nested reading of fasl streams.
3500     if (option == F_LOAD_MODULE)
3501     {   static_cast<void>(eval(r, nil));
3502     }
3503     else
3504     {
3505 // Now r should be a list of the form ( (name def) (name def) )
3506 #ifdef DEBUG_FASL
3507         trace_printf("SAVEDEF info: ");
3508         loop_print_trace(r);
3509         trace_printf("\n");
3510 #endif // DEBUG_FASL
3511         file = nil;
3512         while (is_cons(r))
3513         {   LispObject p = car(r);
3514             r = cdr(r);
3515             LispObject name, def;
3516             if (is_cons(p) && is_cons(cdr(p)))
3517             {   name = car(p);
3518                 def = car(cdr(p));
3519             }
3520             else continue;
3521 // if I am in load_selected_source mode I need to check before I set up
3522 // !*savedef information.
3523             bool getsavedef = true;
3524             if (option == F_SELECTED_SOURCE && name != nil)
3525             {   LispObject w;
3526                 w = get(name, load_selected_source_symbol, nil);
3527                 if (w == nil) getsavedef = false;
3528                 else if (integerp(w) != nil && consp(def))
3529                 {   Save save1(name, file, r, def);
3530 // The md60 function is called on something like (fname (args...) body...)
3531                     LispObject def1 = cons(name, cdr(def));
3532                     errexit();
3533                     LispObject w1 = Lmd60(nil, def1);
3534                     errexit();
3535                     if (!numeq2(w, w1)) getsavedef = false;
3536                     save1.restore(name, file, r, def);
3537                 }
3538             }
3539             if (getsavedef)
3540             {   {   Save save1(name, file, r);
3541                     if (name == nil)
3542                     {   LispObject p1 = cdr(p);
3543                         LispObject n1 = car(p1);
3544                         LispObject t1 = car(p1 = cdr(p1));
3545                         LispObject v1 = car(p1 = cdr(p1));
3546                         putprop(n1, t1, v1);
3547                     }
3548                     else putprop(name, savedef, def);
3549                     errexit();
3550                     save1.restore(name, file, r);
3551                 }
3552 // Build up a list of the names of all functions whose !*savedef information
3553 // has been established.
3554                 Save save2(name, r);
3555                 file = cons(name, file);
3556                 errexit();
3557                 save2.restore(name, r);
3558             }
3559 // Now set up the load_source property on the function name to indicate the
3560 // module it was found in.
3561             LispObject w;
3562             w = get(name, load_source_symbol, nil);
3563             {   Save save(name, file, r);
3564                 w = cons(current_module, w);
3565                 errexit();
3566                 save.restore(name, file, r);
3567             }
3568             Save save(name, file, r);
3569             putprop(name, load_source_symbol, w);
3570             errexit();
3571             save.restore(name, file, r);
3572         }
3573     }
3574     if (option == F_LOAD_MODULE) return onevalue(nil);
3575     else return onevalue(file);
3576 }
3577 
Lload_module(LispObject env,LispObject file)3578 LispObject Lload_module(LispObject env, LispObject file)
3579 {   return load_module(env, file, F_LOAD_MODULE);
3580 }
3581 
Lload_source(LispObject env,LispObject file)3582 LispObject Lload_source(LispObject env, LispObject file)
3583 {   return load_module(env, file, F_LOAD_SOURCE);
3584 }
3585 
load_source0(int option)3586 LispObject load_source0(int option)
3587 {
3588 // First I will scan all the input libraries collecting a list of the
3589 // names of modules present in them. I will discard any duplicates
3590 // names.
3591     LispObject mods = nil;
3592     for (LispObject l = qvalue(input_libraries); is_cons(l); l = cdr(l))
3593     {   LispObject m;
3594         {   Save save(mods, l);
3595             m = Llibrary_members(nil, car(l));
3596             errexit();
3597             save.restore(mods, l);
3598         }
3599         while (is_cons(m))
3600         {   LispObject m1 = car(m);
3601             m = cdr(m);
3602             if (Lmemq(nil, m1, mods) != nil) continue;
3603             Save save(l, m);
3604             mods = cons(m1, mods);
3605             errexit();
3606             save.restore(l, m);
3607         }
3608     }
3609 // Now I will do load-source or load-selected-source on each module, and
3610 // form the union of the results, which should give me a consolidated
3611 // list of the names of functions seen.
3612     LispObject r = nil;
3613     while (is_cons(mods))
3614     {   LispObject m = car(mods), w;
3615         mods = cdr(mods);
3616         {   Save save(r, mods);
3617             w = load_module(nil, m, option);
3618             errexit();
3619             save.restore(r, mods);
3620         }
3621         Save save(mods);
3622 // The special version of UNION here always works in linear time, and that
3623 // is MUCH better than the more general version. Well with bootstrapreduce
3624 // the final result list from load!-source() ends up of length about
3625 // 20,000 and so the quadratic version of Lunion does work sort of
3626 // proportional to 400 million - which does complete but which noticably
3627 // slows things down.
3628         r = Lunion_symlist(nil, r, w);
3629         errexit();
3630         save.restore(mods);
3631     }
3632     return onevalue(r);
3633 }
3634 
Lload_selected_source(LispObject env,LispObject file)3635 LispObject Lload_selected_source(LispObject env, LispObject file)
3636 {   return load_module(env, file, F_SELECTED_SOURCE);
3637 }
3638 
Lload_source0(LispObject env)3639 LispObject Lload_source0(LispObject env)
3640 {   return load_source0(F_LOAD_SOURCE);
3641 }
3642 
Lload_selected_source0(LispObject env)3643 LispObject Lload_selected_source0(LispObject env)
3644 {   return load_source0(F_SELECTED_SOURCE);
3645 }
3646 
Lunserialize(LispObject env)3647 LispObject Lunserialize(LispObject env)
3648 {   LispObject r;
3649     reader_setup_repeats(read_u64());
3650     r = serial_read();
3651     if (repeat_heap != nullptr) delete [] repeat_heap;
3652     repeat_heap = nullptr;
3653     repeat_heap_size = 0;
3654     return onevalue(r);
3655 }
3656 
3657 // Here I will comments on how the previous version of warm_setup (and
3658 // hence "preserve") worked, and how the new one does. The intent is that this
3659 // will first help me know what I am doing as I code the new version, and
3660 // document some of the design decisions. Both the old ones and their
3661 // consequences and the new ones and why I wanted to change things.
3662 //
3663 // First an overview of the old code. I dumped a heap image by taking
3664 // each page of the heap and writing it out almost unaltered but with its
3665 // address in memory attached. Well that was the original ideal but because
3666 // pages were not going to be loaded back at the same addresses and I could
3667 // only guarantee consistency of relative addresses within a block I needed
3668 // to "unadjust" each pointer in the heap so it became encoded as a
3669 // block-number together with an offset. So I had to have code that
3670 // scanned the heap parsing it to identify every object, and I needed to
3671 // detect where within objects there were pointers. I also run a simplified
3672 // zip-like compression process to keep image files compact.
3673 //
3674 // To reload I could originally just read back blocks and "adjust" to put
3675 // pointers back to their native state. However a few years later I needed
3676 // to support both 32 and 64-bit architectures and I wanted to allow images
3677 // made on one to load on the other. That involved parsing blocks of memory
3678 // and halving or doubling the width of pointers.
3679 //
3680 // To cope with native code entrypoints I re-ran the normal heap
3681 // initialization code on an image once it had been reloaded. That puts the
3682 // entrypoint of CAR back as it should, and similarly for all other
3683 // built in functions. Doing things that way makes it way harder to cope with
3684 // cases where functions have been undefined, redefined or had their
3685 // definitions copied.
3686 //
3687 // Overall although the old scheme worked it was complicated and somewhat ugly.
3688 // I wanted to to introduce an option whereby 32-bit machines could have
3689 // 28-bit fixnums but 64-bit machines bigger ones - adapting the conversion
3690 // of images across dump/restart for that was going to be messy. Supporting
3691 // dynamically-compiled native code seemed awkward. The possibility of a trail
3692 // of residual bugs was a worry.
3693 //
3694 // So now I will describe the newer scheme. It dumps a heap starting from
3695 // every unambiguous lisp-base. In effect it is a print procedure that
3696 // does a pre-scan of all the data so as to detect where structures are
3697 // shared or looped, but then writes out the data in a sequential form.
3698 // I already have a "printl" that prints potentially cyclic lists in
3699 // normal character form - but the version used here uses a more compressed
3700 // bytecode notation, and tried to represent common things particularly
3701 // efficiently. Lists are in general transmitted with one byte for each CONS
3702 // cell, and strings often have a single prefix byte ahead of the bytes that
3703 // make up the string content. Creating the linear representation of data
3704 // is done using a pointer-reversal scheme that temporarily overwrites things
3705 // while writing them out, but by its nature restores them when it is
3706 // complete. This results in the code using only bounded stack space. The
3707 // reading code also uses pointer reversal for almost all its recursion.
3708 //
3709 // I believe that the new code ends up shorter and neater, and my hope is
3710 // that it will lead to a more compact representation of heap images and that
3711 // it will be fast. The previous one expected to be fast since the writing
3712 // and reading of images was basically simple block IO operations, but the
3713 // scanning of the head to alter the representationof pointers was messy and
3714 // poitentially costly (but was a cache-friendly linear scan of memory). Here
3715 // there is a risk that writing images may be more expensive, but I am
3716 // expecting that loading will be at least as fast as it used to be.
3717 //
3718 // My very first experiment has been to start a cold-start CSL and go
3719 // "preserve". That seems to create a serialized form of everything that is
3720 // around 24K bytes long, while the image using the previous version was
3721 // of size 43K. Until I can reload images and verify that everything works
3722 // this is very provisional and uncertain, but is feels encouraging to
3723 // me.
3724 
3725 // There is some need to be careful here. While serializing everything the
3726 // code will traverse through all the data structures that represent open
3727 // streams. Stream objects have three initial items in them - a type,
3728 // "write_data" and "read_data". The type information may be used in
3729 // printing diagnostics if something fails. The read and write_data are
3730 // used by synomym and broadcast streams as well as by streams that read
3731 // and write from lists of character objects. A consequence of this is
3732 // that although simple direct file I/O can be performed even in the middle
3733 // of preserving an image (well it has to be!) synomym streams may not be
3734 // used. In normal circumstances this means that the standard default
3735 // input and output streams have to be avoided. This issue bit me when I tried
3736 // using the Lisp "print" function from within the checkpoint code to help
3737 // be generate debug/trace information: all went well until the standard
3738 // output stream had some pointers in it reversed, and then everything
3739 // collapsed miserably. Hence this comment.
3740 
write_everything()3741 void write_everything()
3742 {   set_up_function_tables();
3743 // These may have been messed with during the run. Reset them here to
3744 // be tidy.
3745     big_divisor = make_four_word_bignum(0, 0, 0, 0);
3746     big_dividend = make_four_word_bignum(0, 0, 0, 0);
3747     if (!setup_codepointers)
3748     {   set_up_function_tables();
3749         setup_codepointers = true;
3750     }
3751     descend_symbols = true;
3752     hash_init(&repeat_hash, 13); // allow 8K entries to start with.
3753 // First scan the components of NIL. I have to do this because even with
3754 // descend_symbols set to true the scanning code views NIL as such a special
3755 // case that it does not descend through it or view multiple references to
3756 // it as worth noting.
3757     {   map_releaser RAII;
3758         std::strcpy(trigger, "value nil scan");
3759         scan_data(qvalue(nil));
3760         std::strcpy(trigger, "env nil scan");
3761         scan_data(qenv(nil));
3762         std::strcpy(trigger, "pname nil scan");
3763         scan_data(qpname(nil));
3764         std::strcpy(trigger, "plist nil scan");
3765         scan_data(qplist(nil));
3766         std::strcpy(trigger, "fastgets nil scan");
3767         scan_data(qfastgets(nil));
3768         std::strcpy(trigger, "package nil scan");
3769         scan_data(qpackage(nil));
3770 // Next the major list-bases.
3771         for (LispObject **p = list_bases; *p!=nullptr; p++)
3772         {   std::sprintf(trigger, "list base %" PRIx64 " scan",
3773                 static_cast<uint64_t>(**p));
3774             scan_data(**p);
3775         }
3776     }
3777 // Now I should have identified all cyclic and shared data - including
3778 // eveything in the object list/package structures.
3779     writer_setup_repeats();
3780     hash_releaser RAII;
3781 // Before I get to messy things I will write out some integer values.
3782 
3783     write_u64(miscflags);
3784     write_u64(gensym_ser);
3785     write_u64(print_precision);
3786     write_u64(current_modulus);
3787     write_u64(fastget_size);
3788     write_u64(package_bits);
3789     write_u64(modulus_is_large);
3790     write_u64(trap_floating_overflow);
3791 
3792 // At the start of a heap image I have a CRC for the tables of function
3793 // entrypoints, then the number of repeated objects.
3794     write_u64(function_crc);
3795     write_u64(repeat_heap_size);
3796 // Now inspect all structures again, this time writing a serialized form
3797 // for everything.
3798 
3799     std::strcpy(trigger, "value of nil write");
3800     write_data(qvalue(nil));
3801     std::strcpy(trigger, "env of nil write");
3802     write_data(qenv(nil));
3803     std::strcpy(trigger, "pname of nil write");
3804     write_data(qpname(nil));
3805     std::strcpy(trigger, "plist of nil write");
3806     write_data(qplist(nil));
3807     std::strcpy(trigger, "fastgets of nil write");
3808     write_data(qfastgets(nil));
3809     std::strcpy(trigger, "package of nil write");
3810     write_data(qpackage(nil));
3811     for (LispObject **p = list_bases; *p!=nullptr; p++)
3812     {   std::sprintf(trigger, "list base %p write", reinterpret_cast<void *>(**p));
3813         write_data(**p);
3814     }
3815 // Tidy up at the end. I do not logically need an explicit end of data marker
3816 // in the serialized form, but putting one there seems like a way to make
3817 // me feel more secure against corrupred image files.
3818     write_opcode(SER_END, "end of data");
3819 }
3820 
warm_setup()3821 void warm_setup()
3822 {   size_t i;
3823     set_up_function_tables();
3824     setheader(nil, TAG_HDR_IMMED+TYPE_SYMBOL+SYM_GLOBAL_VAR);
3825     for (LispObject **p = list_bases; *p!=nullptr; p++) **p = nil;
3826     *stack = nil;
3827     qcountLow(nil) = 0;
3828     qcountHigh(nil) = 0;
3829 // Make things GC safe first...
3830     setvalue(nil, nil);
3831     setenv(nil, nil);
3832     setpname(nil, nil);
3833     setplist(nil, nil);
3834     setfastgets(nil, nil);
3835     setpackage(nil, nil);
3836     qfn0(nil) = undefined_0;
3837     qfn1(nil) = undefined_1;
3838     qfn2(nil) = undefined_2;
3839     qfn3(nil) = undefined_3;
3840     qfn4up(nil) = undefined_4up;
3841     setheader(nil, TAG_HDR_IMMED+TYPE_SYMBOL+SYM_GLOBAL_VAR);
3842 
3843 #define boffo_size 256
3844     boffo = get_basic_vector(TAG_VECTOR, TYPE_STRING_4, CELL+boffo_size);
3845     std::memset(reinterpret_cast<void *>(reinterpret_cast<char *>(boffo) + (CELL - TAG_VECTOR)), '@', boffo_size);
3846 
3847     exit_tag = exit_value = nil;
3848     exit_reason = UNWIND_NULL;
3849 
3850     inf_init();
3851 
3852     miscflags = read_u64();
3853     gensym_ser = read_u64();
3854     print_precision = read_u64();
3855     current_modulus = read_u64();
3856     fastget_size = read_u64();
3857     package_bits = read_u64();
3858     modulus_is_large = read_u64();
3859     trap_floating_overflow = read_u64();
3860 
3861     uint64_t entrypt_checksum = read_u64();
3862     if (entrypt_checksum != function_crc)
3863     {
3864 // This was of reporting the problem is not neat, but may hold the fort
3865 // for at least a while.
3866         std::fprintf(stderr, "Checksums %" PRIx64 "  vs %" PRIx64 "\n",
3867                         entrypt_checksum, function_crc);
3868         std::fprintf(stderr, "do not match. Image made by incompatible version\n");
3869         my_exit();
3870     }
3871     size_t repeatsize = read_u64();
3872     reader_setup_repeats(repeatsize);
3873 
3874 // Now I can use serial_read...
3875 
3876     setvalue(nil, serial_read());
3877     setenv(nil, serial_read());
3878     setpname(nil, serial_read());
3879     setplist(nil, serial_read());
3880     setfastgets(nil, serial_read());
3881 
3882 // This next one is a BIGGY because the package structure is liable to
3883 // include all other symbols, and through them basically everything!
3884     setpackage(nil, serial_read());
3885 
3886     for (LispObject **p = list_bases; *p!=nullptr; p++) **p = serial_read();
3887 
3888     if ((i = read_opcode_byte()) != SER_END)
3889     {   std::fprintf(stderr, "Did not find SER_END opcode where expected\n");
3890         std::fprintf(stderr, "Byte that was read was %.2x\n", static_cast<int>(i));
3891         my_abort("END marker missing");
3892     }
3893     {   char endmsg[32];
3894         Zread(endmsg, 24);  // the termination record
3895 // Although I check here I will not make the system crash if I see an
3896 // error - at least until I have tested things and found this test
3897 // properly reliable.
3898 #ifdef COMMON
3899         if (std::strncmp(endmsg, "\n\nEnd of CCL dump file\n\n", 24) != 0)
3900 #else
3901         if (std::strncmp(endmsg, "\n\nEnd of CSL dump file\n\n", 24) != 0)
3902 #endif
3903         {   std::fprintf(stderr, "\n+++ Bad end record |%s|\n", endmsg);
3904         }
3905     }
3906 
3907     inf_finish();
3908 
3909     {   LispObject w = error_output;
3910         error_output = 0;
3911         IcloseInput();
3912         error_output = w;
3913     }
3914     if (repeat_heap != nullptr) delete [] repeat_heap;
3915     repeat_heap = nullptr;
3916     repeat_heap_size = 0;
3917 
3918 // There are various things such as lispsystem* and the various standard
3919 // output streams that may depend on the particular system I am loading on
3920 // and so have to be set up as if from cold...
3921     set_up_variables(true);
3922 }
3923 
3924 // It is now convenient to put "mapstore" here, because it uses yet another
3925 // variant on the code that traverses my entire heap.
3926 
3927 // This code uses the same bitmap technology (with the temporary bitmap
3928 // allocated using new) that serialization (and hence both fasl output
3929 // and preserve) does. I provide a generic function that visits every
3930 // symbol in the system. This function must not trigger garbage collection!
3931 // So I give it a predicate that filters the symbols found, and it pushes
3932 // onto the Lisp stack every one that the predicate tells it to. If there
3933 // is not enough stack space it return true.
3934 
3935 // In a full Reduce as of 1Q 2018 with every Reduce package loaded there
3936 // are about 40,000 symbols present. Around half of those name functions.
3937 // So mapstore has at worst 20,000 symbols of note to process at this stage.
3938 // Of course that number will expand as Reduce does. However also of course
3939 // one could only have ALL the packages that make up Reduce loaded in
3940 // rather artificial circumstances...
3941 //
3942 // Well the Lisp stack is a segment of size CSL_PAGE_SIZE which is (at the
3943 // time of writing) by default 4 Mbytes. If I do not have a lot on the stack
3944 // already I can therefore push up to 500,000 items onto it without overflow,
3945 // and those will be protected against garbage collection. So what I will do
3946 // is arrange that I can traverse the entire heap and push every symbol that
3947 // I find onto the stack... My function returns true if it fails because
3948 // of stack overflow, and in that case the stack will be (almost) full but
3949 // not all symbols will be on it.
3950 //
3951 // BUT.... this stuff is only used for an explicit list-all-symbols function
3952 // that the user could call or for mapstore(), and so I am really not very
3953 // concerned about its limitation!
3954 
3955 typedef bool symbol_processor_predicate(LispObject);
3956 
push_symbols(symbol_processor_predicate * pp,LispObject p)3957 bool push_symbols(symbol_processor_predicate *pp, LispObject p)
3958 {   LispObject b = 0 + BACKPOINTER_CAR, w;
3959     uintptr_t len;
3960     Header h;
3961     bool fail = false;
3962     debug_record("push_symbols start");
3963 down:
3964     debug_record("push_symbols down");
3965     if (p == 0)
3966     {   std::fprintf(stderr, "Zero pointer found from %s\n", trigger);
3967         // An error - but I feel safest if I detect it and do not crash.
3968         goto up;
3969     }
3970     else if (p == nil) goto up;
3971     switch (p & TAG_BITS)
3972     {   default:
3973         case TAG_CONS:
3974             debug_record("push_symbols CONS");
3975             if (address_used(p - TAG_CONS)) goto up;
3976             mark_address_as_used(p - TAG_CONS);
3977             w = p;
3978             p = cdr(p);
3979             setcdr(w, b);
3980             b = w - TAG_CONS + BACKPOINTER_CDR;
3981             goto down;
3982 
3983         case TAG_SYMBOL:
3984             debug_record("push_symbols SYMBOL");
3985             if (address_used(p - TAG_SYMBOL)) goto up;
3986 //          {   LispObject pn = qpname(p);
3987 //              if (is_string(pn))
3988 //                  trace_printf(": %.*s\n",
3989 //                      (int)length_of_byteheader(vechdr(pn))-CELL, &celt(pn, 0));
3990 //          }
3991 // I will stop 256 bytes before letting the stack overflow.
3992             if ((uintptr_t)stack+256 < (uintptr_t)stackLimit)
3993             {   if ((*pp)(p)) *++stack = p;
3994             }
3995             else fail = true; // I must keep traversing to restore things.
3996             mark_address_as_used(p - TAG_SYMBOL);
3997             w = p;
3998             p = qpname(p);
3999             setpname(w, b);
4000             b = reinterpret_cast<LispObject>(pnameaddr(w)) + BACKPOINTER_SYMBOL;
4001             goto down;
4002 
4003         case TAG_VECTOR:
4004             debug_record("push_symbols VECTOR");
4005             if (address_used(p - TAG_VECTOR)) goto up;
4006             mark_address_as_used(p - TAG_VECTOR);
4007             h = vechdr(p);
4008             if (vector_holds_binary(h)) goto up;
4009             if (is_mixed_header(h)) len = 4*CELL;
4010             else len = length_of_header(h);
4011             if (len == CELL) goto up;
4012             w = p + len - CELL - TAG_VECTOR;
4013             p = *reinterpret_cast<LispObject *>(w);
4014             *reinterpret_cast<LispObject *>(w) = b;
4015             b = w + BACKPOINTER_VECTOR;
4016             goto down;
4017 
4018         case TAG_NUMBERS:
4019             debug_record("push_symbols NUMBERS");
4020             if (address_used(p - TAG_NUMBERS)) goto up;
4021             mark_address_as_used(p - TAG_NUMBERS);
4022             h = numhdr(p);
4023             if (vector_holds_binary(h)) goto up;
4024             len = length_of_header(h);
4025             if (len == CELL) goto up;
4026             w = p + len - CELL - TAG_NUMBERS;
4027             p = *reinterpret_cast<LispObject *>(w);
4028             *reinterpret_cast<LispObject *>(w) = b;
4029             b = w + BACKPOINTER_VECTOR;
4030             goto down;
4031 
4032         case TAG_BOXFLOAT:
4033             debug_record("push_symbols BOXFLOAT");
4034             if (address_used(p - TAG_BOXFLOAT)) goto up;
4035             mark_address_as_used(p - TAG_BOXFLOAT);
4036             goto up;
4037 
4038         case TAG_FIXNUM:
4039         case TAG_HDR_IMMED:
4040             debug_record("push_symbols FIXNUM etc");
4041 // Immediate data (eg small integers, characters) ought not to need any more.
4042             goto up;
4043 
4044         case TAG_FORWARD:
4045             debug_record("push_symbols FORWARD");
4046 // Forwarding addresses should only be present while the garbage collector
4047 // is active, and so ought not to be found. I will print a message and
4048 // basically ignore them.
4049             std::fprintf(stderr,
4050                 "\n+++ Forwarding address detected in heap scan from %s\n",
4051                 trigger);
4052             debug_show_trail("forwarding addr");
4053             goto up;
4054     }
4055 
4056 up:
4057     debug_record("push_symbols up");
4058     switch (b & BACKPOINTER_MASK)
4059     {   default:
4060         case BACKPOINTER_CDR:
4061             debug_record("push_symbols BACKPOINTER_CDR");
4062             w = cdr(b - BACKPOINTER_CDR + TAG_CONS);
4063             setcdr(b - BACKPOINTER_CDR + TAG_CONS, p);
4064             p = car(b - BACKPOINTER_CDR + TAG_CONS);
4065             setcar(b - BACKPOINTER_CDR + TAG_CONS, w);
4066             b = b + BACKPOINTER_CAR - BACKPOINTER_CDR;
4067             goto down;
4068 
4069         case BACKPOINTER_CAR:
4070             debug_record("push_symbols BACKPOINTER_CAR");
4071             if (b == 0 + BACKPOINTER_CAR) return fail; // finished!
4072             w = b - BACKPOINTER_CAR + TAG_CONS;
4073             b = car(w);
4074             setcar(w, p);
4075             p = w;
4076             goto up;
4077 
4078         case BACKPOINTER_SYMBOL:
4079             debug_record("push_symbols BACKPOINTER_SYMBOL");
4080             w = *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL);
4081             *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL) = p;
4082             b = b - CELL;
4083             p = *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL);
4084             if (is_symbol_header_full_test(p))
4085             {   p = b - BACKPOINTER_SYMBOL + TAG_SYMBOL;
4086                 b = w;
4087                 goto up;
4088             }
4089             *reinterpret_cast<LispObject *>(b - BACKPOINTER_SYMBOL) = w;
4090             goto down;
4091 
4092         case BACKPOINTER_VECTOR:
4093             debug_record("push_symbols BACKPOINTER_VECTOR");
4094             w = *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR);
4095             *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR) = p;
4096             b = b - CELL;
4097             p = *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR);
4098             if (is_number_header_full_test(p))
4099             {   p = b - BACKPOINTER_VECTOR + TAG_NUMBERS;
4100                 b = w;
4101                 goto up;
4102             }
4103             if (is_vector_header_full_test(p))
4104             {   p = b - BACKPOINTER_VECTOR + TAG_VECTOR;
4105                 b = w;
4106                 goto up;
4107             }
4108             *reinterpret_cast<LispObject *>(b - BACKPOINTER_VECTOR) = w;
4109             goto down;
4110     }
4111 }
4112 
4113 // The following returns true if it fails.
4114 
push_all_symbols(symbol_processor_predicate * pp)4115 static bool push_all_symbols(symbol_processor_predicate *pp)
4116 {   map_releaser RAII;
4117     for (LispObject *s=stackBase+1; s<=stack; s++)
4118     {   std::sprintf(trigger, "Stack@%p", s);
4119         if (push_symbols(pp, *s)) return true;
4120     }
4121     std::strcpy(trigger, "value nil push");
4122     if (push_symbols(pp, qvalue(nil))) return true;
4123     std::strcpy(trigger, "env nil push");
4124     if (push_symbols(pp, qenv(nil))) return true;
4125     std::strcpy(trigger, "pname nil push");
4126     if (push_symbols(pp, qpname(nil))) return true;
4127     std::strcpy(trigger, "plist nil push");
4128     if (push_symbols(pp, qplist(nil))) return true;
4129     std::strcpy(trigger, "fastgets nil push");
4130     if (push_symbols(pp, qfastgets(nil))) return true;
4131     std::strcpy(trigger, "package nil push");
4132     if (push_symbols(pp, qpackage(nil))) return true;
4133     for (LispObject **p = list_bases; *p!=nullptr; p++)
4134     {   std::sprintf(trigger, "list base %p push", reinterpret_cast<void *>(**p));
4135         if (push_symbols(pp, **p)) return true;
4136     }
4137     return false;
4138 }
4139 
always(LispObject x)4140 bool always(LispObject x)
4141 {   return true;
4142 }
4143 
4144 // true if a symbol has a value, a property list or a definition as
4145 // a function. Note checking for fastgets.
4146 
interesting(LispObject x)4147 static bool interesting(LispObject x)
4148 {   LispObject ff;
4149     if ((ff = qfastgets(x)) != nil)
4150     {   for (int i=0; i<fastget_size; i++)
4151             if (static_cast<LispObject>(basic_elt(ff, i)) != SPID_NOPROP) return true;
4152     }
4153     return (qfn1(x) != undefined_1 ||
4154             qplist(x) != nil ||
4155             qvalue(x) != unset_var);
4156 }
4157 
not_gensym(LispObject x)4158 static bool not_gensym(LispObject x)
4159 {   return ((qheader(x) & (SYM_CODEPTR | SYM_ANY_GENSYM))== 0);
4160 }
4161 
4162 // Return an unsorted list of all symbols present in the current world.
4163 // (all!-symbols)     everything that has a value, definition or property
4164 // (all!-symbols nil) everything (even if dull) except gensyms. Note that
4165 //                    gensyms are excluded even if they have properties.
4166 // (all!-symbols t)   everything including gensyms.
4167 // There is a limit (at around 500,000) on the number of symbols that can
4168 // be returned, so if the user creates hundreds of thousands of gensyms (or
4169 // indeed hundreds of thousands of interned symbols) this could report
4170 // failure.
4171 
4172 
Lall_symbols(LispObject env,LispObject include_gensyms)4173 LispObject Lall_symbols(LispObject env, LispObject include_gensyms)
4174 {   LispObject *stacksave = stack;
4175     if (push_all_symbols(include_gensyms==nil ? not_gensym : always))
4176     {   stack = stacksave;
4177         return aerror("all_symbols");
4178     }
4179     LispObject r = nil;
4180     while (stack != stacksave)
4181     {   LispObject w = *stack--;
4182         r = cons(w, r);
4183     }
4184     return onevalue(r);
4185 }
4186 
Lall_symbols0(LispObject env)4187 LispObject Lall_symbols0(LispObject env)
4188 {   LispObject *stacksave = stack;
4189     if (push_all_symbols(interesting))
4190     {   stack = stacksave;
4191         return aerror("all_symbols");
4192     }
4193     LispObject r = nil;
4194     while (stack != stacksave)
4195     {   LispObject w = *stack--;
4196         r = cons(w, r);
4197     }
4198     return onevalue(r);
4199 }
4200 
4201 typedef struct mapstore_item
4202 {   double w;
4203     double n;
4204     uint64_t n1;
4205     char name[40]; // I will truncate names to 39 chars
4206 } mapstore_item;
4207 
4208 bool profile_count_mode = false;
4209 
profile_cf(const void * a,const void * b)4210 static int profile_cf(const void *a, const void *b)
4211 {   mapstore_item *aa = (mapstore_item *)a,
4212                   *bb = (mapstore_item *)b;
4213 // profile_count_mode selects whether I sort on the w field or the
4214 // n1 field here.
4215     if (profile_count_mode)
4216     {   if (aa->n1 == bb->n1) return 0;
4217         if (aa->n1 < bb->n1) return 1;
4218         else return -1;
4219     }
4220     if (aa->w == bb->w) return 0;
4221     else if (aa->w < bb->w) return 1;
4222     else return -1;
4223 }
4224 
4225 static double itotal_count = 0.0, total_count = 0.0;
4226 
count_totals(LispObject x)4227 static bool count_totals(LispObject x)
4228 {   uint64_t n = qcount(x);
4229     if (n == 0) return false; // Ignore items with zero count
4230     LispObject e = qenv(x);
4231     if (is_cons(e))
4232     {   e = car(e);
4233         if (is_bps(e))
4234         {   size_t clen = length_of_byteheader(vechdr(e)) - CELL;
4235             double w = static_cast<double>(n)/static_cast<double>(clen);
4236 // Here I want a measure that will give a good idea of how worthwhile it
4237 // would be to compile the given function into C - what I have chosen is
4238 // a count of bytecodes executed scaled by the length
4239 // of the bytestream code defining the function.  This will cause "good value"
4240 // cases to show up best.  I scale this relative to the total across all
4241 // functions recorded to make the numbers less sensitive to details of
4242 // how I generate test cases.  For interest I also display the proportion
4243 // of actual bytecodes interpreted.  In each case I record these out of
4244 // a total of 100.0 (percent) to give comfortable ranges of numbers to admire.
4245 // To get the scaling correct I need to count the total "costs" of all
4246 // functions in a first pass.
4247             itotal_count += static_cast<double>(n);
4248             total_count += w;
4249         }
4250     }
4251     return false;
4252 }
4253 
clear_counts(LispObject x)4254 static bool clear_counts(LispObject x)
4255 {   qcountLow(x) = 0;
4256     qcountHigh(x) = 0;
4257     return false;
4258 }
4259 
non_zero_count(LispObject x)4260 static bool non_zero_count(LispObject x)
4261 {   return qcount(x) != 0;
4262 }
4263 
Lmapstore(LispObject env,LispObject a)4264 LispObject Lmapstore(LispObject env, LispObject a)
4265 // Argument controls what happens:
4266 //    nil or 0      print statistics and reset to zero
4267 //           1      print, but do not reset
4268 //           2      return list of stats, reset to zero
4269 //           3      return list, do not reset
4270 //           4      reset to zero, do not print, return nil
4271 //           8      Toggle call count mode
4272 //
4273 // The cases I seem to use while building Reduce are
4274 //     2  return a list (and reset counts)
4275 //     4  reset counts to zero
4276 {   int what;
4277     mapstore_item *buff=nullptr;
4278     size_t buffp=0, buffn=0;
4279     if (a == nil) a = fixnum_of_int(0);
4280     if (is_fixnum(a)) what = int_of_fixnum(a);
4281     else what = 0;
4282     profile_count_mode = false;
4283     if (what & 8) profile_count_mode = true;
4284     what &= 7;
4285     if (what == 4)
4286     {   stack_restorer RAII;
4287         push_all_symbols(clear_counts);
4288         return onevalue(nil);
4289     }
4290     if (what == 0 || what == 1)   // needed if I am printing
4291     {   buff = new (std::nothrow) mapstore_item[100];
4292         if (buff == nullptr) return onevalue(nil); // fail
4293         buffp = 0;
4294         buffn = 100;
4295     }
4296     {   stack_restorer RAII;
4297         itotal_count = total_count = 0.0;
4298         push_all_symbols(count_totals);
4299     }
4300     LispObject r;
4301     {   stack_restorer RAII;
4302         LispObject *savestack = stack;
4303 // The code here is a bit odd. push_all_symbols() does what its name
4304 // says and places references to every symbol that passes its predicate
4305 // onto the (Lisp) stack. That is a place where those refernces are safe
4306 // across GCs. Then the symbols can be removed one at a time until the stack
4307 // is back at its original level. I do the transfers to and from the stack
4308 // with explicit visible code to stress exactly what is happening.
4309         push_all_symbols(non_zero_count);
4310         r = nil;
4311         while (stack != savestack)
4312         {   LispObject x = *stack;
4313             uint64_t n = qcount(x);
4314             if (n == 0) continue;
4315             LispObject e = qenv(x);
4316             if (is_cons(e))
4317             {   e = car(e);
4318                 if (is_bps(e))
4319                 {   size_t clen = length_of_byteheader(vechdr(e)) - CELL;
4320                     double w = static_cast<double>(n)/static_cast<double>(clen);
4321                     if (w/total_count > 0.00001 ||
4322                         static_cast<double>(n)/itotal_count > 0.0001)
4323                     {   if (what == 0 || what == 1)
4324                         {   if (buffp == buffn)
4325                             {   buffn += 100;
4326                                 mapstore_item *bigger  = new (std::nothrow)
4327                                     mapstore_item[buffn];
4328                                 if (bigger == nullptr) return onevalue(nil);
4329                                 std::memcpy(bigger, buff, (buffn-100)*sizeof(mapstore_item));
4330                                 delete [] buff;
4331                                 buff = bigger;
4332                             }
4333                             buff[buffp].w = 100.0*w/total_count;
4334                             buff[buffp].n = 100.0*static_cast<double>(n)/itotal_count;
4335                             buff[buffp].n1 = n;
4336                             LispObject pn = qpname(x);
4337                             size_t npn = length_of_byteheader(vechdr(pn)) - CELL;
4338                             if (npn >= 40) npn = 39;
4339                             std::strncpy(buff[buffp].name, reinterpret_cast<const char *>(&basic_celt(pn, 0)), npn);
4340                             buff[buffp].name[npn] = 0;
4341                             buffp++;
4342                         }
4343                         if (what == 2 || what == 3)
4344                         {   LispObject w1;
4345 // Result is a list of items ((name size bytes-executed) ...).
4346                             Save save(r);
4347                             w1 = make_lisp_integer64(n);
4348                             errexit();
4349                             w1 = list3(x, fixnum_of_int(clen), w1);
4350                             errexit();
4351                             save.restore(r);
4352                             r = cons(w1, r);
4353                             errexit();
4354                         }
4355                     }
4356                 }
4357             }
4358             x = *stack--;
4359             if ((what & 1) == 0)
4360             {   qcountLow(x) = 0;
4361                 qcountHigh(x) = 0;
4362             }
4363         }
4364     }
4365     if (what == 0 || what == 1)
4366     {   double running = 0.0;
4367         std::qsort(reinterpret_cast<void *>(buff), buffp, sizeof(buff[0]), profile_cf);
4368         trace_printf("\n  Value  %%bytes (So far) MBytecodes Function name\n");
4369         for (size_t j=0; j<buffp; j++)
4370         {   running += buff[j].n;
4371             trace_printf("%7.2f %7.2f (%6.2f) %9lu: ",
4372                          buff[j].w, buff[j].n, running,
4373                          (long unsigned)(buff[j].n1/10000u));
4374             trace_printf("%s\n", buff[j].name);
4375         }
4376         trace_printf("\n");
4377         delete [] buff;
4378     }
4379     return onevalue(r);
4380 }
4381 
Lmapstore0(LispObject env)4382 LispObject Lmapstore0(LispObject env)
4383 {   return Lmapstore(env, nil);
4384 }
4385 
4386 // end of serialize.cpp
4387