1 /*
2     This file is part of GNU APL, a free implementation of the
3     ISO/IEC Standard 13751, "Programming Language APL, Extended"
4 
5     Copyright (C) 2008-2016  Dr. Jürgen Sauermann
6 
7     This program is free software: you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation, either version 3 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include <fcntl.h>
22 #include <errno.h>
23 #include <iostream>
24 #include <stdio.h>
25 #include <sys/mman.h>
26 #include <sys/stat.h>
27 #include <sys/types.h>
28 
29 #include "Archive.hh"
30 #include "buildtag.hh"   // for ARCHIVE_SVN
31 #include "Common.hh"
32 #include "Command.hh"
33 #include "CharCell.hh"
34 #include "ComplexCell.hh"
35 #include "Executable.hh"
36 #include "FloatCell.hh"
37 #include "Function.hh"
38 #include "Heapsort.hh"
39 #include "IndexExpr.hh"
40 #include "IntCell.hh"
41 #include "LvalCell.hh"
42 #include "Macro.hh"
43 #include "NativeFunction.hh"
44 #include "Output.hh"
45 #include "PointerCell.hh"
46 #include "PrintOperator.hh"
47 #include "Symbol.hh"
48 #include "Token.hh"
49 #include "UCS_string.hh"
50 #include "UserFunction.hh"
51 #include "Value.hh"
52 #include "Workspace.hh"
53 
54 using namespace std;
55 
56 /// if there is less than x chars left on the current line then leave
57 /// char mode and start a new line indented.
58 #define NEED(x)   if (space < int(x)) \
59    { leave_char_mode();   out << "\n";   space = do_indent(); } out
60 
61 //-----------------------------------------------------------------------------
62 bool
xml_allowed(Unicode uni)63 XML_Saving_Archive::xml_allowed(Unicode uni)
64 {
65    if (uni < ' ')    return false;    // control chars and negative
66    if (uni == '<')   return false;   // < not allowed
67    if (uni == '&')   return false;   // < not allowed
68    if (uni == '"')   return false;   // not allowed in "..."
69    if (uni > '~')
70       {
71         // non-ASCII character. This is, in principle, allowed in XML but
72         // may not print properly if the font used does not provide the
73         // character. We therefore allow characters in ⎕AV and their alternate
74         // characters except our own type markers (⁰¹²...)
75         //
76         if (is_iPAD_char(uni))   return false;
77         if (Avec::find_char(uni) != Invalid_CHT)              return true;
78         if (Avec::map_alternative_char(uni) != Invalid_CHT)   return true;
79         return false;    // allowed, but may
80       }
81    return true;
82 }
83 //-----------------------------------------------------------------------------
84 const char *
decr(int & counter,const char * str)85 XML_Saving_Archive::decr(int & counter, const char * str)
86 {
87    counter -= strlen(str);
88    return str;
89 }
90 //-----------------------------------------------------------------------------
91 int
do_indent()92 XML_Saving_Archive::do_indent()
93 {
94 const int spaces = indent * INDENT_LEN;
95    loop(s, spaces)   out << " ";
96 
97    return 72 - spaces;
98 }
99 //-----------------------------------------------------------------------------
100 XML_Saving_Archive::Vid
find_vid(const Value * val)101 XML_Saving_Archive::find_vid(const Value * val)
102 {
103 const void * item = bsearch(val, values, value_count, sizeof(_val_par),
104                       _val_par::compare_val_par1);
105    if (item == 0)   return INVALID_VID;
106    return Vid(reinterpret_cast<const _val_par *>(item) - values);
107 }
108 //-----------------------------------------------------------------------------
109 void
emit_unicode(Unicode uni,int & space)110 XML_Saving_Archive::emit_unicode(Unicode uni, int & space)
111 {
112    if (uni == UNI_ASCII_LF)
113       {
114         leave_char_mode();
115         out << UNI_PAD_U1 << "A" << "\n";
116         space = do_indent();
117       }
118    else if (!xml_allowed(uni))
119       {
120         space -= leave_char_mode();
121         char cc[40];
122         snprintf(cc, sizeof(cc), "%X", uni);
123         NEED(1 + strlen(cc)) << UNI_PAD_U1 << decr(space, cc);
124         space--;   // PAD_U1
125       }
126    else
127       {
128         NEED(1) << "";
129         space -= enter_char_mode();
130         out << uni;
131         space--;   // uni
132       }
133 }
134 //-----------------------------------------------------------------------------
135 void
save_UCS(const UCS_string & ucs)136 XML_Saving_Archive::save_UCS(const UCS_string & ucs)
137 {
138 int space = do_indent();
139    out << decr(space, "<UCS uni=\"");
140    Assert(char_mode == false);
141    ++indent;
142    loop(u, ucs.size())   emit_unicode(ucs[u], space);
143    leave_char_mode();
144    out << "\"/>" << endl;
145    space -= 2;
146    --indent;
147 }
148 //-----------------------------------------------------------------------------
149 XML_Saving_Archive &
save_shape(Vid vid)150 XML_Saving_Archive::save_shape(Vid vid)
151 {
152 const Value & v = *values[vid]._val;
153 
154    do_indent();
155    out << "<Value flg=\"" << HEX(v.get_flags()) << "\" vid=\"" << vid << "\"";
156 
157 const Vid parent_vid = values[vid]._par;
158    out << " parent=\"" << parent_vid << "\" rk=\"" << v.get_rank()<< "\"";
159 
160    loop (r, v.get_rank())
161       {
162         out << " sh-" << r << "=\"" << v.get_shape_item(r) << "\"";
163       }
164 
165    out << "/>" << endl;
166    return *this;
167 }
168 //-----------------------------------------------------------------------------
169 XML_Saving_Archive &
save_Ravel(Vid vid)170 XML_Saving_Archive::save_Ravel(Vid vid)
171 {
172 const Value & v = *values[vid]._val;
173 
174 int space = do_indent();
175 
176 char cc[80];
177    snprintf(cc, sizeof(cc), "<Ravel vid=\"%d\" cells=\"", vid);
178    out << decr(space, cc);
179 
180    ++indent;
181 const ShapeItem len = v.nz_element_count();
182 const Cell * C = &v.get_ravel(0);
183    loop(l, len)   emit_cell(*C++, space);
184 
185    space -= leave_char_mode();
186    out<< "\"/>" << endl;
187    space -= 2;
188    --indent;
189 
190    return *this;
191 }
192 //-----------------------------------------------------------------------------
193 void
emit_cell(const Cell & cell,int & space)194 XML_Saving_Archive::emit_cell(const Cell & cell, int & space)
195 {
196 char cc[80];
197    switch(cell.get_cell_type())
198       {
199             case CT_CHAR:   // uses UNI_PAD_U0, UNI_PAD_U1, and UNI_PAD_U2
200                  emit_unicode(cell.get_char_value(), space);
201                  break;
202 
203             case CT_INT:   // uses UNI_PAD_U3
204                  space -= leave_char_mode();
205                  snprintf(cc, sizeof(cc), "%lld",
206                           long_long(cell.get_int_value()));
207                  NEED(1 + strlen(cc)) << UNI_PAD_U3 << decr(--space, cc);
208                  break;
209 
210             case CT_FLOAT:   // uses UNI_PAD_U4 or UNI_PAD_U8
211                  space -= leave_char_mode();
212 #ifdef RATIONAL_NUMBERS_WANTED
213                  {
214                  const FloatCell & flt = cell.cFloatCell();
215                  if (const APL_Integer denom = flt.get_denominator())
216                     {
217                       const APL_Integer numer = flt.get_numerator();
218                       snprintf(cc, sizeof(cc), "%lld÷%lld", long_long(numer),
219                                long_long(denom));
220                       NEED(1 + strlen(cc)) << UNI_PAD_U8 << decr(--space, cc);
221                       break;
222                     }
223                  }
224 #endif
225                  snprintf(cc, sizeof(cc), "%.17g",
226                           double(cell.get_real_value()));
227                  NEED(1 + strlen(cc)) << UNI_PAD_U4 << decr(--space, cc);
228                  break;
229 
230             case CT_COMPLEX:   // uses UNI_PAD_U5
231                  space -= leave_char_mode();
232                  snprintf(cc, sizeof(cc), "%17gJ%17g",
233                           double(cell.get_real_value()),
234                           double(cell.get_imag_value()));
235                  NEED(1 + strlen(cc)) << UNI_PAD_U5 << decr(--space, cc);
236                  break;
237 
238             case CT_POINTER:   // uses UNI_PAD_U6
239                  space -= leave_char_mode();
240                  {
241                    const Vid vid = find_vid(cell.get_pointer_value().get());
242                    snprintf(cc, sizeof(cc), "%d", vid);
243                    NEED(1 + strlen(cc)) << UNI_PAD_U6 << decr(--space, cc);
244                  }
245                  break;
246 
247             case CT_CELLREF:   // uses UNI_PAD_U7
248                  space -= leave_char_mode();
249                  {
250                    const Cell * cp = cell.get_lval_value();
251                    if (cp)   // valid Cell *
252                       {
253                         const Value * owner = cell.cLvalCell().get_cell_owner();
254                         const long long offset = owner->get_offset(cp);
255                         const Vid vid = find_vid(owner);
256                         snprintf(cc, sizeof(cc), "%d[%lld]", vid, offset);
257                         NEED(1 + strlen(cc)) << UNI_PAD_U7 << decr(--space, cc);
258                       }
259                    else     // 0-cell-pointer
260                       {
261                         snprintf(cc, sizeof(cc), "0");
262                         NEED(2) << UNI_PAD_U7 << "0" << decr(--space, cc);
263                       }
264                  }
265                  break;
266 
267             default: Assert(0);
268       }
269 }
270 //-----------------------------------------------------------------------------
271 void
save_Function(const Function & fun)272 XML_Saving_Archive::save_Function(const Function & fun)
273 {
274 const int * eprops = fun.get_exec_properties();
275 const APL_time_us creation_time = fun.get_creation_time();
276    do_indent();
277    out << "<Function creation-time=\"" << creation_time
278        << "\" exec-properties=\""
279        << eprops[0] << "," << eprops[1] << ","
280        << eprops[2] << "," << eprops[3] << "\"";
281 
282    if (fun.is_native())   out << " native=\"1\"";
283 
284    out << ">" << endl;
285    ++indent;
286 
287    save_UCS(fun.canonical(false));
288 
289    --indent;
290    do_indent();
291    out << "</Function>" << endl;
292 }
293 //-----------------------------------------------------------------------------
294 int
save_Function_name(const char * ufun_prefix,const char * level_prefix,const char * id_prefix,const Function & fun)295 XML_Saving_Archive::save_Function_name(const char * ufun_prefix,
296                                        const char * level_prefix,
297                                        const char * id_prefix,
298                                        const Function & fun)
299 {
300    if (fun.is_derived())
301       {
302         CERR << endl <<
303 "WARNING: The )SI stack contains a derived function. )SAVEing a workspace in\n"
304 "         such a state is currently not supported and WILL cause problems\n"
305 "         when )LOADing the workspace. Please perform )SIC (or →) and then\n"
306 "         )SAVE this workspace again." << endl;
307       }
308 
309 const UserFunction * ufun = fun.get_ufun1();
310    if (ufun)   // user defined function
311       {
312         const UCS_string & fname = ufun->get_name();
313         Symbol * sym = Workspace::lookup_symbol(fname);
314         Assert(sym);
315         const int sym_depth = sym->get_ufun_depth(ufun);
316         out << " " << ufun_prefix << "-name=\""  << fname     << "\""
317             << " " << level_prefix << "-level=\"" << sym_depth << "\"";
318         return 2;   // two attributes
319       }
320    else        // primitive or quad function
321       {
322         out << " " << id_prefix << "-id=\"" << HEX(fun.get_Id());
323         return 1;   // one attribute
324       }
325 }
326 //-----------------------------------------------------------------------------
327 void
save_Parser(const Prefix & prefix)328 XML_Saving_Archive::save_Parser(const Prefix & prefix)
329 {
330    do_indent();
331     out << "<Parser size=\""      << prefix.size()
332         << "\" assign-pending=\"" << prefix.get_assign_state()
333         << "\" action=\""         << prefix.action
334         << "\" lookahead-high=\"" << prefix.get_lookahead_high()
335         << "\">" << endl;
336 
337    // write the lookahead token, starting at the fifo's get position
338    //
339    ++indent;
340    loop(s, prefix.size())
341       {
342         const Token_loc & tloc = prefix.at(prefix.size() - s - 1);
343         save_token_loc(tloc);
344       }
345    save_token_loc(prefix.saved_lookahead);
346    --indent;
347 
348 
349    do_indent();
350    out << "</Parser>" << endl;
351 }
352 //-----------------------------------------------------------------------------
353 void
save_symtab(const SymbolTable & symtab)354 XML_Saving_Archive::save_symtab(const SymbolTable & symtab)
355 {
356 std::vector<const Symbol *> symbols = symtab.get_all_symbols();
357 
358    // remove erased symbols
359    //
360    for (size_t s = 0; s < symbols.size();)
361       {
362         const Symbol * sym = symbols[s];
363         if (sym->is_erased())
364             {
365               symbols[s] = symbols.back();
366               symbols.pop_back();
367               continue;
368             }
369 
370         ++s;
371       }
372 
373    do_indent();
374    out << "<SymbolTable size=\"" << symbols.size() << "\">" << endl;
375 
376    ++indent;
377 
378    while (symbols.size() > 0)
379       {
380         // set idx to the alphabetically smallest name
381         //
382         int idx = 0;
383         for (size_t i = 1; i < symbols.size(); ++i)
384             {
385               if (symbols[idx]->compare(*symbols[i]) > 0)   idx = i;
386             }
387 
388         const Symbol * sym = symbols[idx];
389         save_Symbol(*sym);
390 
391         symbols[idx] = symbols.back();
392         symbols.pop_back();
393       }
394 
395    --indent;
396 
397    do_indent();
398    out << "</SymbolTable>" << endl << endl;
399 }
400 //-----------------------------------------------------------------------------
401 void
save_SI_entry(const StateIndicator & si)402 XML_Saving_Archive::save_SI_entry(const StateIndicator & si)
403 {
404 const Executable & exec = *si.get_executable();
405 
406    do_indent();
407    out << "<SI-entry level=\"" << si.get_level()
408        << "\" pc=\"" << si.get_PC()
409        << "\" line=\"" << exec.get_line(si.get_PC()) << "\""
410        <<">" << endl << flush;
411 
412    ++indent;
413    do_indent();
414    switch(exec.get_parse_mode())
415       {
416         case PM_FUNCTION:
417              {
418                Symbol * sym = Workspace::lookup_symbol(exec.get_name());
419                Assert(sym);
420                const UserFunction * ufun = exec.get_ufun();
421                Assert(ufun);
422                const int sym_depth = sym->get_ufun_depth(ufun);
423 
424                if (ufun->is_macro())
425                   out << "<UserFunction macro-num=\"" << ufun->get_macnum()
426                       << "\"/>" << endl;
427                else if (ufun->is_lambda())
428                   {
429                     out << "<UserFunction lambda-name=\"" << ufun->get_name()
430                         << "\">" << endl;
431                     ++indent;
432                     save_UCS(ufun->canonical(false));
433                     --indent;
434                     do_indent();
435                     out << "</UserFunction>" << endl;
436                   }
437                else
438                   out << "<UserFunction ufun-name=\"" << sym->get_name()
439                       << "\" symbol-level=\"" << sym_depth << "\"/>" << endl;
440              }
441              break;
442 
443         case PM_STATEMENT_LIST:
444              out << "<Statements>" << endl;
445              ++indent;
446              save_UCS(exec.get_text(0));
447              --indent;
448              do_indent();
449              out << "</Statements>" << endl;
450                break;
451 
452         case PM_EXECUTE:
453              out << "<Execute>" << endl;
454              ++indent;
455              save_UCS(exec.get_text(0));
456              --indent;
457              do_indent();
458              out << "</Execute>" << endl;
459              break;
460 
461           default: FIXME;
462       }
463 
464    // print the parser states...
465    //
466    save_Parser(si.current_stack);
467 
468    --indent;
469 
470    do_indent();
471    out << "</SI-entry>" << endl << endl;
472 }
473 //-----------------------------------------------------------------------------
474 void
save_Symbol(const Symbol & sym)475 XML_Saving_Archive::save_Symbol(const Symbol & sym)
476 {
477    do_indent();
478    out << "<Symbol name=\"" << sym.get_name() << "\" stack-size=\""
479        << sym.value_stack_size() << "\">" << endl;
480 
481    ++indent;
482    loop(v, sym.value_stack_size())  save_vstack_item(sym[v]);
483    --indent;
484 
485    do_indent();
486    out << "</Symbol>" << endl << endl;
487 }
488 //-----------------------------------------------------------------------------
489 void
save_user_commands(const std::vector<Command::user_command> & cmds)490 XML_Saving_Archive::save_user_commands(
491                const std::vector<Command::user_command> & cmds)
492 {
493    if (cmds.size() == 0)   return;
494 
495    do_indent();
496    out << "<Commands size=\"" << cmds.size() << "\">" << endl;
497 
498    ++indent;
499    loop(u, cmds.size())
500       {
501         const Command::user_command & ucmd = cmds[u];
502         do_indent();
503         out << "<Command name=\"" << ucmd.prefix
504             << "\" mode=\"" << ucmd.mode
505             << "\" fun=\"" <<  ucmd.apl_function << "\"/>" << endl;
506       }
507 
508    --indent;
509    do_indent();
510    out << "</Commands>" << endl << endl;
511 }
512 //-----------------------------------------------------------------------------
513 void
save_token_loc(const Token_loc & tloc)514 XML_Saving_Archive::save_token_loc(const Token_loc & tloc)
515 {
516    do_indent();
517    out << "<Token pc=\"" << tloc.pc
518        << "\" tag=\"" << HEX(tloc.tok.get_tag()) << "\"";
519    emit_token_val(tloc.tok);
520 
521    out << "/>" << endl;
522 }
523 //-----------------------------------------------------------------------------
524 void
emit_token_val(const Token & tok)525 XML_Saving_Archive::emit_token_val(const Token & tok)
526 {
527    switch(tok.get_ValueType())
528       {
529         case TV_NONE:  break;
530 
531         case TV_CHAR:  Log(LOG_archive)   CERR << "Saving TV_SYM Token" << endl;
532                        out << " char=\"" << int(tok.get_char_val()) << "\"";
533                        break;
534 
535         case TV_INT:   Log(LOG_archive)   CERR << "Saving TV_INT Token" << endl;
536                        out << " int=\"" << tok.get_int_val() << "\"";
537                        break;
538 
539         case TV_FLT:   Log(LOG_archive)   CERR << "Saving TV_FLT Token" << endl;
540                        out << " float=\"" << tok.get_flt_val() << "\"";
541                        break;
542 
543         case TV_CPX:   Log(LOG_archive)   CERR << "Saving TV_CPX Token" << endl;
544                        out << " real=\"" << tok.get_cpx_real()
545                            << "\" imag=\"" << tok.get_cpx_imag() << "\"";
546                        break;
547 
548         case TV_SYM:   Log(LOG_archive)   CERR << "Saving TV_SYM Token" << endl;
549                        {
550                          Symbol * sym = tok.get_sym_ptr();
551                          const UCS_string name = sym->get_name();
552                          out << " sym=\"" << name << "\"";
553                        }
554                        break;
555 
556         case TV_LIN:   Log(LOG_archive)   CERR << "Saving TV_LIN Token" << endl;
557                        out << " line=\"" << tok.get_fun_line() << "\"";
558                        break;
559 
560         case TV_VAL:   {
561                          Log(LOG_archive)
562                             CERR << "Saving TV_VAL Token" << endl;
563 
564                          const Vid vid = find_vid(tok.get_apl_val().get());
565                          out << " vid=\"" << vid << "\"";
566                        }
567                        break;
568 
569         case TV_INDEX: {
570                          Log(LOG_archive)
571                             CERR << "Saving TV_INDEX Token" << endl;
572                          const IndexExpr & idx = tok.get_index_val();
573                          const int rank = idx.value_count();
574                          out << " index=\"";
575                          loop(i, rank)
576                              {
577                                if (i)   out << ",";
578                                const Value * val = idx.values[i].get();
579                                if (val)   out << "vid_" << find_vid(val);
580                                else       out << "-";
581                                 out << "\"";
582                              }
583                        }
584                        break;
585 
586         case TV_FUN:   {
587                          Log(LOG_archive)
588                             CERR << "Saving TV_FUN Token" << endl;
589 
590                          Function * fun = tok.get_function();
591                          Assert1(fun);
592                          save_Function_name("ufun", "symbol", "fun", *fun);
593                        }
594                        out << "\"";
595                        break;
596 
597         default:       FIXME;
598 
599       }
600 }
601 //-----------------------------------------------------------------------------
602 void
save_vstack_item(const ValueStackItem & vsi)603 XML_Saving_Archive::save_vstack_item(const ValueStackItem & vsi)
604 {
605    switch(vsi.name_class)
606       {
607         case NC_UNUSED_USER_NAME:
608              do_indent();
609              out << "<unused-name/>" << endl;
610              break;
611 
612         case NC_LABEL:
613              do_indent();
614              out << "<Label value=\"" << vsi.sym_val.label << "\"/>" << endl;
615              break;
616 
617         case NC_VARIABLE:
618              do_indent();
619              out << "<Variable vid=\"" << find_vid(vsi.apl_val.get())
620                  << "\"/>" << endl;
621              break;
622 
623         case NC_FUNCTION:
624         case NC_OPERATOR:
625              save_Function(*vsi.sym_val.function);
626              break;
627 
628         case NC_SHARED_VAR:
629              do_indent();
630              out << "<Shared-Variable key=\"" << vsi.sym_val.sv_key
631                  << "\"/>" << endl;
632              break;
633 
634         default: Assert(0);
635       }
636 }
637 //-----------------------------------------------------------------------------
638 bool
compare_val_par(const _val_par & A,const _val_par & B,const void *)639 XML_Saving_Archive::_val_par::compare_val_par(const _val_par & A,
640                                               const _val_par & B, const void *)
641 {
642    return A._val > B._val;
643 }
644 //-----------------------------------------------------------------------------
645 int
compare_val_par1(const void * key,const void * B)646 XML_Saving_Archive::_val_par::compare_val_par1(const void * key, const void * B)
647 {
648 const void * Bv = (reinterpret_cast<const _val_par *>(B))->_val;
649    return charP(key) - charP(Bv);
650 }
651 //-----------------------------------------------------------------------------
652 XML_Saving_Archive &
save()653 XML_Saving_Archive::save()
654 {
655 tm * t;
656    {
657      timeval now;
658      gettimeofday(&now, 0);
659      t = gmtime(&now.tv_sec);
660    }
661 
662 const int offset = Workspace::get_v_Quad_TZ().get_offset();   // timezone offset
663 
664    // check with: xmllint --valid workspace.xml >/dev/null
665    //
666    out <<
667 "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n"
668 "\n"
669 "<!DOCTYPE Workspace\n"
670 "[\n"
671 "    <!ELEMENT Workspace (Value*,Ravel*,SymbolTable,Symbol*,Commands,StateIndicator)>\n"
672 "    <!ATTLIST Workspace  wsid       CDATA #REQUIRED>\n"
673 "    <!ATTLIST Workspace  year       CDATA #REQUIRED>\n"
674 "    <!ATTLIST Workspace  month      CDATA #REQUIRED>\n"
675 "    <!ATTLIST Workspace  day        CDATA #REQUIRED>\n"
676 "    <!ATTLIST Workspace  hour       CDATA #REQUIRED>\n"
677 "    <!ATTLIST Workspace  minute     CDATA #REQUIRED>\n"
678 "    <!ATTLIST Workspace  second     CDATA #REQUIRED>\n"
679 "    <!ATTLIST Workspace  timezone   CDATA #REQUIRED>\n"
680 "    <!ATTLIST Workspace  saving_SVN CDATA #REQUIRED>\n"
681 "\n"
682 "        <!ELEMENT Value (#PCDATA)>\n"
683 "        <!ATTLIST Value flg    CDATA #REQUIRED>\n"
684 "        <!ATTLIST Value vid    CDATA #REQUIRED>\n"
685 "        <!ATTLIST Value parent CDATA #IMPLIED>\n"
686 "        <!ATTLIST Value rk     CDATA #REQUIRED>\n"
687 "        <!ATTLIST Value sh-0   CDATA #IMPLIED>\n"
688 "        <!ATTLIST Value sh-1   CDATA #IMPLIED>\n"
689 "        <!ATTLIST Value sh-2   CDATA #IMPLIED>\n"
690 "        <!ATTLIST Value sh-3   CDATA #IMPLIED>\n"
691 "        <!ATTLIST Value sh-4   CDATA #IMPLIED>\n"
692 "        <!ATTLIST Value sh-5   CDATA #IMPLIED>\n"
693 "        <!ATTLIST Value sh-6   CDATA #IMPLIED>\n"
694 "        <!ATTLIST Value sh-7   CDATA #IMPLIED>\n"
695 "\n"
696 "        <!ELEMENT Ravel (#PCDATA)>\n"
697 "        <!ATTLIST Ravel vid    CDATA #REQUIRED>\n"
698 "        <!ATTLIST Ravel cells  CDATA #REQUIRED>\n"
699 "\n"
700 "        <!ELEMENT SymbolTable (Symbol*)>\n"
701 "        <!ATTLIST SymbolTable size CDATA #REQUIRED>\n"
702 "\n"
703 "            <!ELEMENT Symbol (unused-name|Variable|Function|Label|Shared-Variable)*>\n"
704 "            <!ATTLIST Symbol name       CDATA #REQUIRED>\n"
705 "            <!ATTLIST Symbol stack-size CDATA #REQUIRED>\n"
706 "\n"
707 "                <!ELEMENT unused-name EMPTY>\n"
708 "\n"
709 "                <!ELEMENT Variable (#PCDATA)>\n"
710 "                <!ATTLIST Variable vid CDATA #REQUIRED>\n"
711 "\n"
712 "                <!ELEMENT Function (UCS)>\n"
713 "                <!ATTLIST Function creation-time   CDATA #IMPLIED>\n"
714 "                <!ATTLIST Function exec-properties CDATA #IMPLIED>\n"
715 "\n"
716 "                <!ELEMENT Label (#PCDATA)>\n"
717 "                <!ATTLIST Label value CDATA #REQUIRED>\n"
718 "\n"
719 "                <!ELEMENT Shared-Variable (#PCDATA)>\n"
720 "                <!ATTLIST Shared-Variable key CDATA #REQUIRED>\n"
721 "\n"
722 "        <!ELEMENT UCS (#PCDATA)>\n"
723 "        <!ATTLIST UCS uni CDATA #REQUIRED>\n"
724 "\n"
725 "        <!ELEMENT Commands (Command*)>\n"
726 "        <!ATTLIST Commands size CDATA #REQUIRED>\n"
727 "\n"
728 "            <!ELEMENT Command (#PCDATA)>\n"
729 "            <!ATTLIST Command name       CDATA #REQUIRED>\n"
730 "            <!ATTLIST Command mode       CDATA #REQUIRED>\n"
731 "            <!ATTLIST Command fun       CDATA #REQUIRED>\n"
732 "\n"
733 "        <!ELEMENT StateIndicator (SI-entry*)>\n"
734 "        <!ATTLIST StateIndicator levels CDATA #REQUIRED>\n"
735 "\n"
736 "            <!ELEMENT SI-entry ((Execute|Statements|UserFunction),Parser+)>\n"
737 "            <!ATTLIST SI-entry level     CDATA #REQUIRED>\n"
738 "            <!ATTLIST SI-entry pc        CDATA #REQUIRED>\n"
739 "            <!ATTLIST SI-entry line      CDATA #REQUIRED>\n"
740 "\n"
741 "                <!ELEMENT Statements (UCS)>\n"
742 "\n"
743 "                <!ELEMENT Execute (UCS)>\n"
744 "\n"
745 "                <!ELEMENT UserFunction (#PCDATA)>\n"
746 "                <!ATTLIST UserFunction ufun-name       CDATA #IMPLIED>\n"
747 "                <!ATTLIST UserFunction macro-num       CDATA #IMPLIED>\n"
748 "                <!ATTLIST UserFunction lambda-name     CDATA #IMPLIED>\n"
749 "                <!ATTLIST UserFunction symbol-level    CDATA #IMPLIED>\n"
750 "\n"
751 "                <!ELEMENT Parser (Token*)>\n"
752 "                <!ATTLIST Parser size           CDATA #REQUIRED>\n"
753 "                <!ATTLIST Parser assign-pending CDATA #REQUIRED>\n"
754 "                <!ATTLIST Parser lookahead-high CDATA #REQUIRED>\n"
755 "                <!ATTLIST Parser action         CDATA #REQUIRED>\n"
756 
757 "                    <!ELEMENT Token (#PCDATA)>\n"
758 "                    <!ATTLIST Token pc           CDATA #REQUIRED>\n"
759 "                    <!ATTLIST Token tag          CDATA #REQUIRED>\n"
760 "                    <!ATTLIST Token char         CDATA #IMPLIED>\n"
761 "                    <!ATTLIST Token int          CDATA #IMPLIED>\n"
762 "                    <!ATTLIST Token float        CDATA #IMPLIED>\n"
763 "                    <!ATTLIST Token real         CDATA #IMPLIED>\n"
764 "                    <!ATTLIST Token imag         CDATA #IMPLIED>\n"
765 "                    <!ATTLIST Token sym          CDATA #IMPLIED>\n"
766 "                    <!ATTLIST Token line         CDATA #IMPLIED>\n"
767 "                    <!ATTLIST Token vid          CDATA #IMPLIED>\n"
768 "                    <!ATTLIST Token index        CDATA #IMPLIED>\n"
769 "                    <!ATTLIST Token fun-id       CDATA #IMPLIED>\n"
770 "                    <!ATTLIST Token ufun-name    CDATA #IMPLIED>\n"
771 "                    <!ATTLIST Token symbol-level CDATA #IMPLIED>\n"
772 "                    <!ATTLIST Token comment      CDATA #IMPLIED>\n"
773 "\n"
774 "]>\n"
775 "\n"
776 "\n"
777 "    <!-- hour/minute/second is )SAVE time in UTC (aka. GMT).\n"
778 "         timezone is offset to UTC in seconds.\n"
779 "         local time is UTC + offset -->\n"
780 "<Workspace wsid=\""     << Workspace::get_WS_name()
781      << "\" year=\""     << (t->tm_year + 1900)
782      << "\" month=\""    << (t->tm_mon  + 1)
783      << "\" day=\""      <<  t->tm_mday << "\"" << endl <<
784 "           hour=\""     <<  t->tm_hour
785      << "\" minute=\""   <<  t->tm_min
786      << "\" second=\""   <<  t->tm_sec
787      << "\" timezone=\"" << offset << "\"" << endl <<
788 "           saving_SVN=\"" << ARCHIVE_SVN
789      << "\">\n" << endl;
790 
791    ++indent;
792 
793    // collect all values to be saved. We mark the values to avoid
794    // saving of stale values and unmark the used values
795    //
796    Value::mark_all_dynamic_values();
797    Workspace::unmark_all_values();
798 
799    for (const DynamicObject * dob = DynamicObject::get_all_values()->get_next();
800         dob != DynamicObject::get_all_values(); dob = dob->get_next())
801        {
802          // WARNING: do not use pValue() here !
803          const Value * val = static_cast<const Value *>(dob);
804 
805          if (val->is_marked())    continue;   // stale
806 
807          ++value_count;
808        }
809 
810    values = new _val_par[value_count];
811 ShapeItem idx = 0;
812 
813    for (const DynamicObject * dob = DynamicObject::get_all_values()->get_next();
814         dob != DynamicObject::get_all_values(); dob = dob->get_next())
815        {
816          // WARNING: do not use pValue() here !
817          const Value * val = static_cast<const Value *>(dob);
818 
819          if (val->is_marked())    continue;   // stale
820 
821          val->unmark();
822          new (values + idx++) _val_par(val, INVALID_VID);
823        }
824 
825    Assert(idx == value_count);
826 
827    // some people use an excessive number of values. We therefore sort them
828    // by the address of the value as to speed up finding them later on
829    //
830    Heapsort<_val_par>::sort(values, value_count, 0, &_val_par::compare_val_par);
831    loop(v, (value_count - 1))   Assert(&values[v]._val < &values[v + 1]._val);
832 
833    // set up parents of values
834    //
835    loop(p, value_count)   // for every (parent-) value
836       {
837         const Value & parent = *values[p]._val;
838         const ShapeItem ec = parent.nz_element_count();
839         loop(e, ec)   // for every ravel cell of the (parent-) value
840             {
841               const Cell & cP = parent.get_ravel(e);
842               if (cP.is_pointer_cell())
843                  {
844                    const Value * sub = cP.get_pointer_value().get();
845                    Assert1(sub);
846                    const Vid sub_idx = find_vid(sub);
847                    Assert(sub_idx < value_count);
848                    if (values[sub_idx]._par != INVALID_VID)
849                       {
850                         // sub already has a parent, which supposedly cannot
851                         // happen. print out some more information about this
852                         // case.
853                         //
854                         CERR << "*** Sub-Value "
855                              << voidP(sub) << " has two parents." << endl
856                              << "Child: vid=" << sub_idx << ", _val="
857                              << values[sub_idx]._val << "_par="
858                              << values[sub_idx]._par << endl
859                              << "Parent 2: vid=" << p <<  ", _val="
860                              << values[p]._val << "_par="
861                              << values[p]._par << endl
862                              << "Call stack:" << endl;
863                              BACKTRACE
864                         CERR << endl << " Running )CHECK..." << endl;
865                         Command::cmd_CHECK(CERR);
866                         CERR << endl;
867 
868 
869 #if VALUE_HISTORY_WANTED
870 print_history(CERR, sub, 0);
871 print_history(CERR, values[sub_idx]._par, 0);
872 print_history(CERR, values[p]._val, 0);
873 #endif
874 
875    CERR << endl <<
876 "The workspace will be )SAVEd, but using it for anything other than for\n"
877 " recovering its content (i.e. defined functions or variables) means asking\n"
878 " for BIG trouble!" << endl;
879                       }
880 
881                    values[sub_idx] = _val_par(values[sub_idx]._val, Vid(p));
882                  }
883               else if (cP.is_lval_cell())
884                  {
885                    Log(LOG_archive)
886                       CERR << "LVAL CELL in " << p << " at " LOC << endl;
887                  }
888             }
889       }
890 
891 
892    // save all values (without their ravel)
893    //
894    loop(vid, value_count)   save_shape(Vid(vid));
895 
896    // save ravels of all values
897    //
898    loop(vid, value_count)   save_Ravel(Vid(vid));
899 
900    // save user defined symbols
901    //
902    save_symtab(Workspace::get_symbol_table());
903 
904    // save certain system variables
905    //
906 #define rw_sv_def(x, _str, _txt) save_Symbol(Workspace::get_v_ ## x());
907 #define ro_sv_def(x, _str, _txt) save_Symbol(Workspace::get_v_ ## x());
908 #include "SystemVariable.def"
909 
910    // save user-defined commands (if any)
911    //
912    save_user_commands(Workspace::get_user_commands());
913 
914    // save state indicator
915    //
916    {
917      const int levels = Workspace::SI_entry_count();
918      do_indent();
919      out << "<StateIndicator levels=\"" << levels << "\">" << endl;
920 
921      ++indent;
922 
923      loop(l, levels)
924         {
925           for (const StateIndicator * si = Workspace::SI_top();
926                si; si = si->get_parent())
927               {
928                 if (si->get_level() == l)
929                    {
930                      save_SI_entry(*si);
931                      break;
932                    }
933               }
934         }
935 
936      --indent;
937 
938      do_indent();
939      out << "</StateIndicator>" << endl << endl;
940    }
941 
942    --indent;
943 
944    do_indent();
945 
946    // write closing tag and a few 0's so that string functions
947    // can be used on the mmaped file.
948    //
949    out << "</Workspace>" << endl
950        << char(0) << char(0) <<char(0) <<char(0) << endl;
951 
952    return *this;
953 }
954 //=============================================================================
XML_Loading_Archive(const char * _filename,int & dump_fd)955 XML_Loading_Archive::XML_Loading_Archive(const char * _filename, int & dump_fd)
956    : fd(-1),
957      map_start(0),
958      map_length(0),
959      file_start(0),
960      line_start(0),
961      line_no(1),
962      current_char(UNI_ASCII_SPACE),
963      data(0),
964      file_end(0),
965      copying(false),
966      protection(false),
967      reading_vids(false),
968      have_allowed_objects(false),
969      filename(_filename),
970      file_is_complete(false)
971 {
972    Log(LOG_archive)   CERR << "Loading file " << filename << endl;
973 
974    fd = open(filename, O_RDONLY);
975    if (fd == -1)   return;
976 
977 struct stat st;
978    if (fstat(fd, &st))
979       {
980         CERR << "fstat() failed: " << strerror(errno) << endl;
981         close(fd);
982         fd = -1;
983         return;
984       }
985 
986    map_length = st.st_size;
987    map_start = mmap(0, map_length, PROT_READ, MAP_SHARED, fd, 0);
988    if (map_start == reinterpret_cast<const void *>(-1))
989       {
990         CERR << "mmap() failed: " << strerror(errno) << endl;
991         close(fd);
992         fd = -1;
993         return;
994       }
995 
996    // success
997    //
998    file_start = charP(map_start);
999    file_end = utf8P(file_start + map_length);
1000 
1001    reset();
1002 
1003    if (!strncmp(file_start, "#!", 2) ||   // )DUMP file
1004        !strncmp(file_start, "<!", 2) ||   // )DUMP-HTML file
1005        !strncmp(file_start, "⍝!", 4))     // a library
1006       {
1007         // the file was either written with )DUMP or is a library.
1008         // Return the open file descriptor (the destructor will unmap())
1009         //
1010         dump_fd = fd;
1011         fd = -1;   // file will be closed via dump_fd
1012         return;
1013       }
1014 
1015    if (strncmp(file_start, "<?xml", 5))   // not an xml file
1016       {
1017         CERR << "file " << filename << " does not " << endl
1018              << "have the format of a GNU APL .xml or .apl file" << endl;
1019         close(fd);
1020         fd = -1;
1021         return;
1022       }
1023 }
1024 //-----------------------------------------------------------------------------
~XML_Loading_Archive()1025 XML_Loading_Archive::~XML_Loading_Archive()
1026 {
1027    if (map_start)   munmap(map_start, map_length);
1028    if (fd != -1)    close(fd);
1029 }
1030 //-----------------------------------------------------------------------------
1031 void
reset()1032 XML_Loading_Archive::reset()
1033 {
1034    line_start = data = utf8P(file_start);
1035    line_no = 1;
1036    next_tag(LOC);
1037 }
1038 //-----------------------------------------------------------------------------
1039 bool
skip_to_tag(const char * tag)1040 XML_Loading_Archive::skip_to_tag(const char * tag)
1041 {
1042    for (;;)
1043       {
1044          if (next_tag(LOC))   return true;
1045          if (is_tag(tag))     break;
1046       }
1047 
1048    return false;
1049 }
1050 //-----------------------------------------------------------------------------
1051 void
read_vids()1052 XML_Loading_Archive::read_vids()
1053 {
1054    reset();   // skips to <Workspace>
1055    reading_vids = true;
1056    read_Workspace(true);
1057    reading_vids = false;
1058 
1059    reset();
1060 }
1061 //-----------------------------------------------------------------------------
1062 void
where(ostream & out)1063 XML_Loading_Archive::where(ostream & out)
1064 {
1065    out << "line=" << line_no << "+" << (data - line_start) << " '";
1066 
1067    loop(j, 40)   { if (data[j] == 0x0A)   break;   out << data[j]; }
1068    out << "'" << endl;
1069 }
1070 //-----------------------------------------------------------------------------
1071 void
where_att(ostream & out)1072 XML_Loading_Archive::where_att(ostream & out)
1073 {
1074    out << "line=" << line_no << "+" << (attributes - line_start) << " '";
1075 
1076    loop(j, 40)
1077       {
1078         if (attributes[j] == 0x0A)        break;
1079         if (attributes + j >= end_attr)   break;
1080          out << attributes[j];
1081       }
1082 
1083    out << "'" << endl;
1084 }
1085 //-----------------------------------------------------------------------------
1086 bool
get_uni()1087 XML_Loading_Archive::get_uni()
1088 {
1089    if (data > file_end)   return true;   // EOF
1090 
1091 int len = 0;
1092    current_char = UTF8_string::toUni(data, len, true);
1093    data += len;
1094    if (current_char == 0x0A)   { ++line_no;   line_start = data; }
1095    return false;
1096 }
1097 //-----------------------------------------------------------------------------
1098 bool
is_tag(const char * prefix) const1099 XML_Loading_Archive::is_tag(const char * prefix) const
1100 {
1101    return !strncmp(charP(tag_name), prefix, strlen(prefix));
1102 }
1103 //-----------------------------------------------------------------------------
1104 void
expect_tag(const char * prefix,const char * loc) const1105 XML_Loading_Archive::expect_tag(const char * prefix, const char * loc) const
1106 {
1107    if (!is_tag(prefix))
1108       {
1109         CERR << "   Got tag ";
1110         print_tag(CERR);
1111         CERR << " when expecting tag " << prefix
1112              << " at " << loc << "  line " << line_no << endl;
1113         DOMAIN_ERROR;
1114       }
1115 }
1116 //-----------------------------------------------------------------------------
1117 void
print_tag(ostream & out) const1118 XML_Loading_Archive::print_tag(ostream & out) const
1119 {
1120    loop(t, attributes - tag_name)   out << tag_name[t];
1121 }
1122 //-----------------------------------------------------------------------------
1123 const UTF8 *
find_attr(const char * att_name,bool optional)1124 XML_Loading_Archive::find_attr(const char * att_name, bool optional)
1125 {
1126 const int att_len = strlen(att_name);
1127 
1128    for (const UTF8 * d = attributes; d < end_attr; ++d)
1129        {
1130          if (strncmp(att_name, charP(d), att_len))   continue;
1131          const UTF8 * dd = d + att_len;
1132          while (*dd <= ' ')   ++dd;   // skip whitespaces
1133          if (*dd++ != '=')   continue;
1134 
1135          // attribute= found. find value.
1136          while (*dd <= ' ')   ++dd;   // skip whitespaces
1137          Assert(*dd == '"');
1138          return dd + 1;
1139        }
1140 
1141    // not found
1142    //
1143    if (!optional)
1144       {
1145          CERR << "Attribute name '" << att_name << "' not found in:" << endl;
1146          where_att(CERR);
1147          DOMAIN_ERROR;
1148       }
1149    return 0;   // not found
1150 }
1151 //-----------------------------------------------------------------------------
1152 int64_t
find_int_attr(const char * attrib,bool optional,int base)1153 XML_Loading_Archive::find_int_attr(const char * attrib, bool optional, int base)
1154 {
1155 const UTF8 * value = find_attr(attrib, optional);
1156    if (value == 0)   return -1;   // not found
1157 
1158 const int64_t val = strtoll(charP(value), 0, base);
1159    return val;
1160 }
1161 //-----------------------------------------------------------------------------
1162 APL_Float
find_float_attr(const char * attrib)1163 XML_Loading_Archive::find_float_attr(const char * attrib)
1164 {
1165 const UTF8 * value = find_attr(attrib, false);
1166 const APL_Float val = strtod(charP(value), 0);
1167    return val;
1168 }
1169 //-----------------------------------------------------------------------------
1170 bool
next_tag(const char * loc)1171 XML_Loading_Archive::next_tag(const char * loc)
1172 {
1173 again:
1174 
1175    // read chars up to (including) '<'
1176    //
1177    while (current_char != '<')
1178       {
1179          if (get_uni())   return true;
1180       }
1181 
1182    tag_name = data;
1183 
1184    // read char after <
1185    //
1186    if (get_uni())   return true;
1187 
1188    if (current_char == '?')   goto again;   // processing instruction
1189    if (current_char == '!')   goto again;   // comment
1190    if (current_char == '/')   get_uni();    // / at start of name
1191 
1192    // read chars before attributes (if any)
1193    //
1194    for (;;)
1195        {
1196          if (current_char == ' ')   break;
1197          if (current_char == '/')   break;
1198          if (current_char == '>')   break;
1199          if (get_uni())   return true;
1200        }
1201 
1202    attributes = data;
1203 
1204    // read chars before end of tag
1205    //
1206    while (current_char != '>')
1207       {
1208          if (get_uni())   return true;
1209       }
1210 
1211    end_attr = data;
1212 
1213 /*
1214    CERR << "See tag ";
1215    for (const UTF8 * t = tag_name; t < attributes; ++t)   CERR << (char)*t;
1216    CERR << " at " << loc << " line " << line_no << endl;
1217 */
1218 
1219    return false;
1220 }
1221 //-----------------------------------------------------------------------------
1222 void
read_Workspace(bool silent)1223 XML_Loading_Archive::read_Workspace(bool silent)
1224 {
1225    expect_tag("Workspace", LOC);
1226 
1227 const int offset   = find_int_attr("timezone", false, 10);
1228 const UTF8 * wsid  = find_attr("wsid",         false);
1229 
1230 int year  = find_int_attr("year",     false, 10);
1231 int mon   = find_int_attr("month",    false, 10);
1232 int day   = find_int_attr("day",      false, 10);
1233 int hour  = find_int_attr("hour",     false, 10);
1234 int min   = find_int_attr("minute",   false, 10);
1235 int sec   = find_int_attr("second",   false, 10);
1236 
1237 UCS_string saving_SVN;
1238 UCS_string current_SVN(ARCHIVE_SVN);
1239    {
1240      const UTF8 * saving = find_attr("saving_SVN", true);
1241      while (saving && *saving != '"')   saving_SVN.append(Unicode(*saving++));
1242    }
1243 bool mismatch = false;
1244 
1245    if (saving_SVN.size() == 0)   // saved with very old version
1246       {
1247         mismatch = true;
1248         CERR << "WARNING: this workspace was )SAVEd with a VERY "
1249              << "old SVN version of GNU APL." << endl;
1250       }
1251    else if (saving_SVN != current_SVN)   // saved with different version
1252       {
1253         mismatch = true;
1254         CERR << "WARNING: this workspace was )SAVEd with SVN version "
1255              << saving_SVN << endl <<
1256         "          but is now being )LOADed with a SVN version "
1257              << current_SVN << " or greater" << endl;
1258       }
1259 
1260    if (mismatch)
1261       {
1262         CERR << "Expect problems, in particular when the )SI was not clear.\n";
1263         if (!copying)
1264            CERR << "In case of problems, please try )COPY instead of )LOAD."
1265                 << endl;
1266       }
1267 
1268    sec  += offset          % 60;
1269    min  += (offset /   60) % 60;
1270    hour += (offset / 3600) % 60;
1271    if      (sec  >= 60)   { sec  -= 60;   ++min;  }
1272    else if (sec  <  0)    { sec  += 60;   --min;  }
1273 
1274    if      (min  >= 60)   { min  -= 60;   ++hour; }
1275    else if (min  <   0)   { min  += 60;   --hour; }
1276 
1277    if      (hour >= 24)   { hour -= 24;   ++day;  }
1278    else if (hour <   0)   { hour += 24;   --day;  }
1279 
1280 bool next_month = false;
1281 bool prev_month = false;
1282    switch(day)
1283       {
1284         case 32: next_month = true;
1285                  break;
1286 
1287         case 31: if (mon == 4 || mon == 6 || mon == 9 || mon == 11)
1288                     next_month = true;
1289                  break;
1290 
1291         case 30: if (mon == 2)   next_month = true;
1292                  break;
1293 
1294         case 29: if (mon != 2)         break;               // not february
1295                  if (year & 3)         next_month = true;   // not leap year
1296                  // the above fails if someone loads a workspace that
1297                  // was saved around midnight on 2/28/2100. Dont do that!
1298                  break;
1299 
1300         case 0:  prev_month = true;
1301                  break;
1302 
1303         default: break;
1304       }
1305 
1306    if      (next_month)   { day = 1; ++mon;  }
1307    else if (prev_month)
1308            {
1309              day = 31;   --mon;
1310              if (mon == 4 || mon == 6 || mon == 9 || mon == 11)   day = 30;
1311              else if (mon == 2)                 day = (year & 3) ? 28 : 29;
1312            }
1313 
1314    if      (mon > 12)   { mon =  1; ++year; }
1315    else if (mon <  1)   { mon = 12; --year; }
1316 
1317    Log(LOG_archive)   CERR << "read_Workspace() " << endl;
1318 
1319    // quick check that the file is complete
1320    //
1321    for (const UTF8 * c = file_end - 12; (c > data) && (c > file_end - 200); --c)
1322        {
1323          if (!strncmp(charP(c), "</Workspace>", 12))
1324             {
1325               file_is_complete = true;
1326               break;
1327             }
1328        }
1329 
1330    if (!file_is_complete && !copying)
1331       {
1332         CERR <<
1333 "*** workspace file " << filename << endl <<
1334 "    seems to be incomplete (possibly caused by a crash on )SAVE?)\n"
1335 "    You may still be able to )COPY from it.\n"
1336 "\nNOT COPIED" << endl;
1337         return;
1338       }
1339 
1340    // the order in which tags are written to the xml file
1341    //
1342 const char * tag_order[] =
1343 {
1344   "Value",
1345   "Ravel",
1346   "SymbolTable",
1347   "Symbol",
1348   "Commands",
1349   "StateIndicator",
1350   "/Workspace",
1351   0
1352 };
1353 const char ** tag_pos = tag_order;
1354 
1355    for (;;)
1356        {
1357          next_tag(LOC);
1358 
1359          // make sure that we do not move backwards in tag_order
1360          //
1361          if (!is_tag(*tag_pos))   // new tag
1362             {
1363               tag_pos++;
1364               for (;;)
1365                   {
1366                      if (*tag_pos == 0)      DOMAIN_ERROR;   // end of list
1367                      if (is_tag(*tag_pos))   break;          // found
1368                      ++tag_pos;
1369                   }
1370             }
1371 
1372          if      (is_tag("Value"))            read_Value();
1373          else if (is_tag("Ravel"))            read_Ravel();
1374          else if (is_tag("SymbolTable"))      read_SymbolTable();
1375          else if (is_tag("Symbol"))           read_Symbol();
1376          else if (is_tag("Commands"))         read_Commands();
1377          else if (copying)                    break;
1378          else if (is_tag("StateIndicator"))   read_StateIndicator();
1379          else if (is_tag("/Workspace"))       break;
1380          else    /* complain */               expect_tag("UNEXPECTED", LOC);
1381        }
1382 
1383    // loaded workspace can contain stale variables, e.g. shared vars.
1384    // remove them.
1385    //
1386    Value::erase_stale(LOC);
1387 
1388    if (reading_vids)   return;
1389 
1390 const char * tz_sign = (offset < 0) ? "" : "+";
1391    if (!silent)   COUT
1392         << "SAVED "
1393         << setfill('0') << year        << "-"
1394         << setw(2)      << mon         << "-"
1395         << setw(2)      << day         << " "
1396         << setw(2)      << hour        << ":"
1397         << setw(2)      << min         << ":"
1398         << setw(2)      << sec         << " (GMT"
1399         << tz_sign      << offset/3600 << ")"
1400         << setfill(' ') <<  endl;
1401 
1402    if (!copying)
1403       {
1404         const UTF8 * end = wsid;
1405         while (*end != '"')   ++end;
1406 
1407         Workspace::set_WS_name(UCS_string(UTF8_string(wsid, end - wsid)));
1408       }
1409 
1410    if (have_allowed_objects && allowed_objects.size())
1411       {
1412         CERR << "NOT COPIED:";
1413         loop(a, allowed_objects.size())   CERR << " " << allowed_objects[a];
1414         CERR << endl;
1415       }
1416 }
1417 //-----------------------------------------------------------------------------
1418 void
read_Value()1419 XML_Loading_Archive::read_Value()
1420 {
1421    expect_tag("Value", LOC);
1422 
1423 const int  vid = find_int_attr("vid", false, 10);
1424 const int  parent = find_int_attr("parent", true, 10);
1425 const int  rk  = find_int_attr("rk",  false, 10);
1426 
1427    Log(LOG_archive)   CERR << "  read_Value() vid=" << vid << endl;
1428 
1429    if (reading_vids)
1430       {
1431          parents.push_back(parent);
1432          return;
1433       }
1434 
1435 Shape sh_value;
1436    loop(r, rk)
1437       {
1438         char sh[20];
1439         snprintf(sh, sizeof(sh), "sh-%d", int(r));
1440         const UTF8 * sh_r = find_attr(sh, false);
1441         sh_value.add_shape_item(atoll(charP(sh_r)));
1442       }
1443 
1444    // if we do )COPY or )PCOPY and vid is not in vids_COPY list, then we
1445    // push 0 (so that indexing with vid still works) and ignore such
1446    // values in read_Ravel.
1447    //
1448 bool no_copy = false;   // assume the value is needed
1449    if (copying)
1450       {
1451         // if vid is a sub-value then find its topmost owner
1452         //
1453         int parent = vid;
1454         for (;;)
1455             {
1456               Assert(parent < int(parents.size()));
1457               if (parents[parent] == -1)   break;   // topmost owner found
1458               parent = parents[parent];
1459             }
1460 
1461         no_copy = true;   // assume the value is not needed
1462         loop(v, vids_COPY.size())
1463            {
1464               if (parent == vids_COPY[v])   // vid is in the list: copy
1465                  {
1466                    no_copy = false;
1467                    break;
1468                  }
1469            }
1470       }
1471 
1472    if (no_copy)
1473       {
1474         values.push_back(Value_P());
1475       }
1476    else
1477       {
1478         Assert(vid == int(values.size()));
1479 
1480         Value_P val(sh_value, LOC);
1481         values.push_back(val);
1482       }
1483 }
1484 //-----------------------------------------------------------------------------
1485 void
read_Cells(Cell * & C,Value & C_owner,const UTF8 * & first)1486 XML_Loading_Archive::read_Cells(Cell * & C, Value & C_owner,
1487                                 const UTF8 * & first)
1488 {
1489    while (*first <= ' ')   ++first;
1490 
1491 int len;
1492 const Unicode type = UTF8_string::toUni(first, len, true);
1493 
1494    switch (type)
1495       {
1496         case UNI_PAD_U0: // end of UNI_PAD_U2
1497         case '\n':       // end of UNI_PAD_U2 (fix old bug)
1498              first += len;
1499              break;
1500 
1501         case UNI_PAD_U1: // hex Unicode
1502         case UNI_PAD_U2: // printable ASCII
1503              {
1504                UCS_string ucs;
1505                read_chars(ucs, first);
1506                loop(u, ucs.size())   new (C++) CharCell(ucs[u]);
1507              }
1508              break;
1509 
1510         case UNI_PAD_U3: // integer
1511              first += len;
1512              {
1513                char * end = 0;
1514                const APL_Integer val = strtoll(charP(first), &end, 10);
1515                new (C++) IntCell(val);
1516                first = utf8P(end);
1517              }
1518              break;
1519 
1520         case UNI_PAD_U4: // real
1521              first += len;
1522              {
1523                char * end = 0;
1524                const APL_Float val = strtod(charP(first), &end);
1525                new (C++) FloatCell(val);
1526                first = utf8P(end);
1527              }
1528              break;
1529 
1530         case UNI_PAD_U5: // complex
1531              first += len;
1532              {
1533                char * end = 0;
1534                const APL_Float real = strtod(charP(first), &end);
1535                first = utf8P(end);
1536                Assert(*end == 'J');
1537                ++end;
1538                const APL_Float imag = strtod(end, &end);
1539                new (C++) ComplexCell(real, imag);
1540                first = utf8P(end);
1541              }
1542              break;
1543 
1544         case UNI_PAD_U6: // pointer
1545              first += len;
1546              {
1547                char * end = 0;
1548                const int vid = strtoll(charP(first), &end, 10);
1549                Assert(vid >= 0);
1550                Assert(vid < int(values.size()));
1551                C++->init_from_value(values[vid].get(), C_owner, LOC);
1552                first = utf8P(end);
1553              }
1554              break;
1555 
1556         case UNI_PAD_U7: // cellref
1557              first += len;
1558              if (first[0] == '0')    // 0-cell-pointer
1559                 {
1560                   new (C++) LvalCell(0, 0);
1561                   first++;
1562                 }
1563              else
1564                 {
1565                   char * end = 0;
1566                   const int vid = strtoll(charP(first), &end, 16);
1567                   Assert(vid >= 0);
1568                   Assert(vid < int(values.size()));
1569                   Assert(*end == '[');   ++end;
1570                   const ShapeItem offset = strtoll(end, &end, 16);
1571                   Assert(*end == ']');   ++end;
1572                   new (C++) LvalCell(&values[vid]->get_ravel(offset),
1573                                      values[vid].get());
1574                   first = utf8P(end);
1575                 }
1576              break;
1577 
1578         case UNI_PAD_U8: // rational quotient
1579              //
1580              // we should understand rational quotients even if we are
1581              // not ./configured for them..
1582              //
1583              first += len;
1584              {
1585                char * end = 0;
1586                const uint64_t numer = strtoll(charP(first), &end, 10);
1587                first = utf8P(end);
1588 
1589                // skip ÷ (which is is C3 B7 in UTF8)
1590                Assert((*end++ & 0xFF) == 0xC3);
1591                Assert((*end++ & 0xFF) == 0xB7);
1592                const uint64_t denom = strtoll(end, &end, 10);
1593                Assert(denom > 0);
1594 #ifdef RATIONAL_NUMBERS_WANTED
1595                new (C++) FloatCell(numer, denom);
1596 #else
1597                new (C++) FloatCell((1.0*(numer))/denom);
1598 #endif
1599                first = utf8P(end);
1600              }
1601              break;
1602 
1603 
1604         default: Q1(type) Q1(line_no) DOMAIN_ERROR;
1605       }
1606 }
1607 //-----------------------------------------------------------------------------
1608 void
read_chars(UCS_string & ucs,const UTF8 * & utf)1609 XML_Loading_Archive::read_chars(UCS_string & ucs, const UTF8 * & utf)
1610 {
1611    while (*utf <= ' ')   ++utf;   // skip leading whitespace
1612 
1613    for (bool char_mode = false;;)
1614        {
1615          if (*utf == '"')   break;   // end of attribute value
1616 
1617          int len;
1618          const Unicode uni = UTF8_string::toUni(utf, len, true);
1619 
1620           if (char_mode && *utf != '\n' && uni != UNI_PAD_U0)
1621              {
1622                ucs.append(uni);
1623                utf += len;
1624                continue;
1625              }
1626 
1627          if (uni == UNI_PAD_U2)   // start of char_mode
1628             {
1629               utf += len;   // skip UNI_PAD_U2
1630               char_mode = true;
1631               continue;
1632             }
1633 
1634          if (uni == UNI_PAD_U1)   // start of hex mode
1635             {
1636               utf += len;   // skip UNI_PAD_U1
1637               char_mode = false;
1638               char * end = 0;
1639               const int hex = strtoll(charP(utf), &end, 16);
1640               ucs.append(Unicode(hex));
1641               utf = utf8P(end);
1642               continue;
1643             }
1644 
1645          if (uni == UNI_PAD_U0)   // end of char_mode
1646             {
1647               utf += len;   // skip UNI_PAD_U0
1648               char_mode = false;
1649               continue;
1650             }
1651 
1652          if (uni == '\n')   // end of char_mode (fix old bug)
1653             {
1654               // due to an old bug, the trailing UNI_PAD_U0 may be missing
1655               // at the end of the line. We therefore reset char_mode at the
1656               // end of the line so that workspaces save with that fault can
1657               // be read.
1658               //
1659               utf += len;   // skip UNI_PAD_U0
1660               char_mode = false;
1661               continue;
1662             }
1663 
1664          break;
1665        }
1666 
1667    while (*utf <= ' ')   ++utf;   // skip trailing whitespace
1668 }
1669 //-----------------------------------------------------------------------------
1670 void
read_Ravel()1671 XML_Loading_Archive::read_Ravel()
1672 {
1673    if (reading_vids)   return;
1674 
1675 const int vid = find_int_attr("vid", false, 10);
1676 const UTF8 * cells = find_attr("cells", false);
1677 
1678    Log(LOG_archive)   CERR << "    read_Ravel() vid=" << vid << endl;
1679 
1680    Assert(vid < int(values.size()));
1681 Value_P val = values[vid];
1682 
1683    if (!val)   // )COPY with vids_COPY or static value
1684       {
1685         return;
1686       }
1687 
1688 const ShapeItem count = val->nz_element_count();
1689 Cell * C = &val->get_ravel(0);
1690 Cell * end = C + count;
1691 
1692    while (C < end)
1693       {
1694         read_Cells(C, val.getref(), cells);
1695         while (*cells <= ' ')   ++cells;   // skip trailing whitespace
1696       }
1697 
1698    if (C != end)   // unless all cells read
1699       {
1700         CERR << "vid=" << vid << endl;
1701         FIXME;
1702       }
1703 
1704    {
1705      int len = 0;
1706      const Unicode next = UTF8_string::toUni(cells, len, true);
1707      Assert(next == '"');
1708    }
1709 
1710    val->check_value(LOC);
1711 }
1712 //-----------------------------------------------------------------------------
1713 void
read_unused_name(int d,Symbol & symbol)1714 XML_Loading_Archive::read_unused_name(int d, Symbol & symbol)
1715 {
1716    Log(LOG_archive)   CERR << "      [" << d << "] unused name" << endl;
1717 
1718    if (d == 0)   return;   // Symbol::Symbol has already created the top level
1719 
1720    symbol.push();
1721 }
1722 //-----------------------------------------------------------------------------
1723 void
read_Variable(int d,Symbol & symbol)1724 XML_Loading_Archive::read_Variable(int d, Symbol & symbol)
1725 {
1726 const int vid = find_int_attr("vid", false, 10);
1727    Assert(vid < int(values.size()));
1728 
1729    Log(LOG_archive)   CERR << "      [" << d << "] read_Variable() vid=" << vid
1730                            << " name=" << symbol.get_name() << endl;
1731 
1732    // some system variables are saved for troubleshooting purposes, but
1733    // should not be loaded...
1734    //
1735    if (symbol.is_readonly())                     return;
1736    if (symbol.get_name().starts_iwith("⎕NLT"))   return;   // extinct
1737    if (symbol.get_Id() == ID_Quad_SVE)           return;
1738    if (symbol.get_Id() == ID_Quad_SYL)           return;
1739    if (symbol.get_Id() == ID_Quad_PS)            return;
1740 
1741    if (vid == -1)   // stale variable
1742       {
1743         Log(LOG_archive)   CERR << "      " << symbol.get_name()
1744                                 << " looks like a stale variable" << endl;
1745         return;
1746       }
1747 
1748    if (!values[vid])   return;   // value filtered out
1749 
1750    while (symbol.value_stack_size() <= d)   symbol.push();
1751 // if (d != 0)   symbol.push();
1752 
1753    try
1754       {
1755         symbol.assign(values[vid], true, LOC);
1756       }
1757    catch (...)
1758       {
1759         CERR << "*** Could not assign value " << *values[vid]
1760              << "    to variable " << symbol.get_name() << " ***" << endl;
1761       }
1762 }
1763 //-----------------------------------------------------------------------------
1764 void
read_Label(int d,Symbol & symbol)1765 XML_Loading_Archive::read_Label(int d, Symbol & symbol)
1766 {
1767 const int value = find_int_attr("value", false, 10);
1768    if (d == 0)   symbol.pop();
1769    symbol.push_label(Function_Line(value));
1770 }
1771 //-----------------------------------------------------------------------------
1772 void
read_Function(int d,Symbol & symbol)1773 XML_Loading_Archive::read_Function(int d, Symbol & symbol)
1774 {
1775 const int native = find_int_attr("native", true, 10);
1776 const APL_time_us creation_time = find_int_attr("creation-time", true, 10);
1777 int eprops[4] = { 0, 0, 0, 0 };
1778 const UTF8 * ep = find_attr("exec-properties", true);
1779    if (ep)
1780       {
1781         sscanf(charP(ep), "%d,%d,%d,%d",
1782                eprops, eprops + 1, eprops + 2, eprops+ 3);
1783       }
1784 
1785    Log(LOG_archive)
1786       CERR << "      [" << d << "] read_Function(" << symbol.get_name()
1787            << ") native=" << native << endl;
1788 
1789    next_tag(LOC);
1790    expect_tag("UCS", LOC);
1791 const UTF8 * uni = find_attr("uni", false);
1792    next_tag(LOC);
1793    expect_tag("/Function", LOC);
1794 
1795 UCS_string text;
1796    while (*uni != '"')   read_chars(text, uni);
1797 
1798    if (native == 1)
1799       {
1800         NativeFunction * nfun = NativeFunction::fix(text, symbol.get_name());
1801         if (nfun)   // fix succeeded
1802            {
1803              nfun->set_creation_time(creation_time);
1804              if (d == 0)   symbol.pop();
1805              symbol.push_function(nfun);
1806            }
1807         else        // fix failed
1808            {
1809              CERR << "   *** loading of native function " << text
1810                   << " failed" << endl << endl;
1811              if (d == 0)   symbol.pop();
1812              symbol.push();
1813            }
1814       }
1815    else
1816       {
1817         int err = 0;
1818         UCS_string creator_UCS(filename);
1819         creator_UCS.append(UNI_ASCII_COLON);
1820         creator_UCS.append_number(line_no);
1821         UTF8_string creator(creator_UCS);
1822 
1823         UserFunction * ufun = 0;
1824         if (text[0] == UNI_LAMBDA)
1825            {
1826              ufun = UserFunction::fix_lambda(symbol, text);
1827              ufun->increment_refcount(LOC);   // since we push it below
1828            }
1829         else
1830            {
1831              ufun = UserFunction::fix(text, err, false, LOC, creator, false);
1832            }
1833 
1834         if (d == 0)   symbol.pop();
1835         if (ufun)
1836            {
1837              ufun->set_creation_time(creation_time);
1838              symbol.push_function(ufun);
1839              ufun->set_exec_properties(eprops);
1840            }
1841         else
1842            {
1843              CERR << "    ⎕FX " << symbol.get_name() << " failed: "
1844                   << Workspace::more_error() << endl;
1845              symbol.push();
1846            }
1847       }
1848 }
1849 //-----------------------------------------------------------------------------
1850 void
read_Shared_Variable(int d,Symbol & symbol)1851 XML_Loading_Archive::read_Shared_Variable(int d, Symbol & symbol)
1852 {
1853 // const SV_key key = find_int_attr("key", false, 10);
1854    if (d != 0)   symbol.push();
1855 
1856    CERR << "WARNING: workspace was )SAVEd with a shared variable "
1857         << symbol.get_name() << endl
1858         << " (shared variables are not restored by )LOAD or )COPY)" << endl;
1859 
1860    // symbol.share_var(key);
1861 }
1862 //-----------------------------------------------------------------------------
1863 void
read_SymbolTable()1864 XML_Loading_Archive::read_SymbolTable()
1865 {
1866 const int size = find_int_attr("size", false, 10);
1867 
1868    Log(LOG_archive)   CERR << "  read_SymbolTable()" << endl;
1869 
1870    loop(s, size)
1871       {
1872         next_tag(LOC);
1873         read_Symbol();
1874       }
1875 
1876    next_tag(LOC);
1877    expect_tag("/SymbolTable", LOC);
1878 }
1879 //-----------------------------------------------------------------------------
1880 void
read_Symbol()1881 XML_Loading_Archive::read_Symbol()
1882 {
1883    expect_tag("Symbol", LOC);
1884 
1885 const UTF8 * name = find_attr("name",  false);
1886 const UTF8 * name_end = name;
1887    while (*name_end != '"')   ++name_end;
1888 
1889 UTF8_string name_UTF(name, name_end - name);
1890 UCS_string  name_UCS(name_UTF);
1891    if (name_UCS.size() == 0)
1892       {
1893         CERR << "*** Warning: empty Symbol name in XML archive " << filename
1894              << " around line " << line_no << endl;
1895         skip_to_tag("/Symbol");
1896         return;
1897       }
1898 
1899    Log(LOG_archive)   CERR << "    read_Symbol() name=" << name_UCS << endl;
1900 
1901    // ⎕NLT and ⎕PT were removed, but could lurk around in old workspaces.
1902    // ⎕PW and ⎕TZ are session variables that must not )LOADed (but might be
1903    // )COPYd)
1904    //
1905    if (name_UTF == UTF8_string("⎕NLT") || name_UTF == UTF8_string("⎕PT"))
1906       {
1907         Log(LOG_archive)   CERR << "        skipped at " << LOC << endl;
1908         skip_to_tag("/Symbol");
1909         return;
1910       }
1911 
1912 const int depth = find_int_attr("stack-size", false, 10);
1913 
1914    // lookup symbol, trying ⎕xx first
1915    //
1916 Symbol * symbol;
1917    if (name_UCS == ID::get_name_UCS(ID_LAMBDA))
1918       symbol = &Workspace::get_v_LAMBDA();
1919    else if (name_UCS == ID::get_name_UCS(ID_ALPHA))
1920       symbol = &Workspace::get_v_ALPHA();
1921    else if (name_UCS == ID::get_name_UCS(ID_ALPHA_U))
1922       symbol = &Workspace::get_v_ALPHA_U();
1923    else if (name_UCS == ID::get_name_UCS(ID_CHI))
1924       symbol = &Workspace::get_v_CHI();
1925    else if (name_UCS == ID::get_name_UCS(ID_OMEGA))
1926       symbol = &Workspace::get_v_OMEGA();
1927    else if (name_UCS == ID::get_name_UCS(ID_OMEGA_U))
1928       symbol = &Workspace::get_v_OMEGA_U();
1929    else
1930       symbol = Workspace::lookup_existing_symbol(name_UCS);
1931 
1932    // we do NOT copy if:
1933    //
1934    // 1. )PCOPY and the symbol exists, or
1935    // 2.  there is an object list and this symbol is not contained in the list
1936    //
1937 const bool is_protected = symbol && protection;
1938 const bool is_selected = allowed_objects.contains(name_UCS);
1939 bool no_copy = is_protected || (have_allowed_objects && !is_selected);
1940 
1941    if (reading_vids)
1942       {
1943         // we prepare vids for )COPY or )PCOPY, so we do not create a symbol
1944         // and care only for the top level
1945         //
1946         if (no_copy || (depth == 0))
1947            {
1948              Log(LOG_archive)   CERR << "        skipped at " << LOC << endl;
1949              skip_to_tag("/Symbol");
1950              return;
1951            }
1952 
1953         // we have entries and copying is allowed
1954         //
1955         next_tag(LOC);
1956         if (is_tag("Variable"))
1957            {
1958              const int vid = find_int_attr("vid", false, 10);
1959              vids_COPY.push_back(vid);
1960            }
1961         skip_to_tag("/Symbol");
1962         return;
1963       }
1964 
1965    // in a )COPY without dedicated objects only
1966    // ⎕CT, ⎕FC, ⎕IO, ⎕LX, ⎕PP, ⎕PR, and ⎕RL shall be copied
1967    //
1968    if (!have_allowed_objects       &&   // no dedicated object list
1969         copying                    &&   // )COPY
1970         (name_UCS == ID::get_name_UCS(ID_Quad_CT) ||
1971          name_UCS == ID::get_name_UCS(ID_Quad_FC) ||
1972          name_UCS == ID::get_name_UCS(ID_Quad_IO) ||
1973          name_UCS == ID::get_name_UCS(ID_Quad_LX) ||
1974          name_UCS == ID::get_name_UCS(ID_Quad_PP) ||
1975          name_UCS == ID::get_name_UCS(ID_Quad_PR) ||
1976          name_UCS == ID::get_name_UCS(ID_Quad_RL)
1977         ))
1978       {
1979         Log(LOG_archive)   CERR << name_UCS << " not copied at " << LOC << endl;
1980         no_copy = true;
1981       }
1982 
1983    // in a )LOAD silently ignore session variables (⎕PW and ⎕TZ)
1984    //
1985    if (!copying &&   // )LOAD
1986         (name_UCS == ID::get_name_UCS(ID_Quad_PW) ||
1987          name_UCS == ID::get_name_UCS(ID_Quad_TZ)))
1988       {
1989         skip_to_tag("/Symbol");
1990         return;
1991       }
1992 
1993    if (copying)
1994       {
1995         if (no_copy || (depth == 0))
1996            {
1997              skip_to_tag("/Symbol");
1998              return;
1999            }
2000       }
2001 
2002    // remove this symbol from allowed_objects so that we can print NOT COPIED
2003    // at the end for all objects that are still in the list.
2004    //
2005    loop(a, allowed_objects.size())
2006       {
2007         if (allowed_objects[a] == name_UCS)
2008              {
2009                allowed_objects[a] = allowed_objects.back();
2010                allowed_objects.pop_back();
2011                break;
2012            }
2013       }
2014 
2015    if (symbol == 0)
2016       {
2017         symbol = Workspace::lookup_symbol(name_UCS);
2018       }
2019    Assert(symbol);
2020 
2021    loop(d, depth)
2022       {
2023         // for )COPY skip d > 0
2024         //
2025         if (copying && (d > 0))
2026            {
2027              skip_to_tag("/Symbol");
2028              return;
2029            }
2030 
2031         next_tag(LOC);
2032         if      (is_tag("unused-name"))       read_unused_name(d, *symbol);
2033         else if (is_tag("Variable"))          read_Variable(d, *symbol);
2034         else if (is_tag("Function"))          read_Function(d, *symbol);
2035         else if (is_tag("Label"))             read_Label(d, *symbol);
2036         else if (is_tag("Shared-Variable"))   read_Shared_Variable(d, *symbol);
2037       }
2038 
2039    Assert(symbol->value_stack_size() == depth);
2040    next_tag(LOC);
2041    expect_tag("/Symbol", LOC);
2042 }
2043 //-----------------------------------------------------------------------------
2044 void
read_Commands()2045 XML_Loading_Archive::read_Commands()
2046 {
2047 const int size = find_int_attr("size", false, 10);
2048 
2049    Log(LOG_archive)   CERR << "  read_Commands()" << endl;
2050 
2051    loop(s, size)
2052       {
2053         next_tag(LOC);
2054         read_Command();
2055       }
2056 
2057    next_tag(LOC);
2058    expect_tag("/Commands", LOC);
2059 }
2060 //-----------------------------------------------------------------------------
2061 void
read_Command()2062 XML_Loading_Archive::read_Command()
2063 {
2064    expect_tag("Command", LOC);
2065 
2066 const UTF8 * name = find_attr("name",  false);
2067 const UTF8 * name_end = name;
2068    while (*name_end != '"')   ++name_end;
2069 UTF8_string name_UTF(name, name_end - name);
2070 UCS_string  name_UCS(name_UTF);
2071 
2072 const UTF8 * fun = find_attr("fun",  false);
2073 const UTF8 * fun_end = fun;
2074    while (*fun_end != '"')   ++fun_end;
2075 UTF8_string fun_UTF(fun, fun_end - fun);
2076 UCS_string  fun_UCS(fun_UTF);
2077 
2078 const int mode = find_int_attr("mode", false, 10);
2079 
2080 Command::user_command ucmd = { name_UCS, fun_UCS, mode };
2081    Workspace::get_user_commands().push_back(ucmd);
2082 }
2083 //-----------------------------------------------------------------------------
2084 void
read_StateIndicator()2085 XML_Loading_Archive::read_StateIndicator()
2086 {
2087    if (copying)
2088       {
2089         skip_to_tag("/StateIndicator");
2090         return;
2091       }
2092 
2093    Log(LOG_archive)   CERR << "read_StateIndicator()" << endl;
2094 
2095 const int levels = find_int_attr("levels", false, 10);
2096 
2097    loop(l, levels)
2098       {
2099         next_tag(LOC);
2100         expect_tag("SI-entry", LOC);
2101 
2102         try
2103            {
2104              read_SI_entry(l);
2105            }
2106         catch (...)
2107            {
2108              CERR <<
2109 "\n"
2110 "*** SORRY! An error occured while reading the )SI stack of the )SAVEd\n"
2111 "    workspace. The )SI stack was reconstructed to the extent possible.\n"
2112 "    We stronly recommend to perform )SIC and then )DUMP the workspace under\n"
2113 "    a different name.\n" << endl;
2114 
2115              // skip rest of <StateIndicator>
2116              //
2117              skip_to_tag("/StateIndicator");
2118              return;
2119            }
2120 
2121         // the parsers loop eats the terminating /SI-entry
2122       }
2123 
2124    next_tag(LOC);
2125    expect_tag("/StateIndicator", LOC);
2126 }
2127 //-----------------------------------------------------------------------------
2128 void
read_SI_entry(int lev)2129 XML_Loading_Archive::read_SI_entry(int lev)
2130 {
2131 const int level = find_int_attr("level", false, 10);
2132 const int pc = find_int_attr("pc", false, 10);
2133 
2134    Log(LOG_archive)   CERR << "    read_SI_entry() level=" << level << endl;
2135 
2136 Executable * exec = 0;
2137    next_tag(LOC);
2138    if      (is_tag("Execute"))        exec = read_Execute();
2139    else if (is_tag("Statements"))     exec = read_Statement();
2140    else if (is_tag("UserFunction"))   exec = read_UserFunction();
2141    else    Assert(0 && "Bad tag at " LOC);
2142 
2143    Assert(lev == level);
2144    Assert(exec);
2145 
2146    Workspace::push_SI(exec, LOC);
2147 StateIndicator * si = Workspace::SI_top();
2148    Assert(si);
2149    si->set_PC(Function_PC(pc));
2150    read_Parser(*si, lev);
2151 
2152    for (;;)
2153        {
2154          // skip old EOC tags
2155          //
2156          next_tag(LOC);
2157          if (is_tag("/SI-entry"))   break;
2158        }
2159 }
2160 //-----------------------------------------------------------------------------
2161 Executable *
read_Execute()2162 XML_Loading_Archive::read_Execute()
2163 {
2164    next_tag(LOC);
2165    expect_tag("UCS", LOC);
2166 
2167 const UTF8 * uni = find_attr("uni", false);
2168 UCS_string text;
2169    while (*uni != '"')   read_chars(text, uni);
2170    next_tag(LOC);
2171    expect_tag("/Execute", LOC);
2172 
2173 ExecuteList * exec = ExecuteList::fix(text, LOC);
2174    Assert(exec);
2175    return exec;
2176 }
2177 //-----------------------------------------------------------------------------
2178 Executable *
read_Statement()2179 XML_Loading_Archive::read_Statement()
2180 {
2181    next_tag(LOC);
2182    expect_tag("UCS", LOC);
2183 const UTF8 * uni = find_attr("uni", false);
2184 UCS_string text;
2185    while (*uni != '"')   read_chars(text, uni);
2186 
2187    next_tag(LOC);
2188    expect_tag("/Statements", LOC);
2189 
2190 StatementList * exec = StatementList::fix(text, LOC);
2191    Assert(exec);
2192    return exec;
2193 }
2194 //-----------------------------------------------------------------------------
2195 Executable *
read_UserFunction()2196 XML_Loading_Archive::read_UserFunction()
2197 {
2198 const int macro_num = find_int_attr("macro-num", true, 10);
2199    if (macro_num != -1)
2200       return Macro::get_macro(Macro::Macro_num(macro_num));
2201 
2202 const UTF8 * lambda_name = find_attr("lambda-name", true);
2203    if (lambda_name)   return read_lambda(lambda_name);
2204 
2205 const int level     = find_int_attr("symbol-level", false, 10);
2206 const UTF8 * name   = find_attr("ufun-name", false);
2207 const UTF8 * n  = name;
2208    while (*n != '"')   ++n;
2209 UTF8_string name_UTF(name, n - name);
2210 UCS_string name_UCS(name_UTF);
2211 
2212 Symbol * symbol = Workspace::lookup_symbol(name_UCS);
2213    Assert(symbol);
2214    Assert(level >= 0);
2215    Assert(level < symbol->value_stack_size());
2216 ValueStackItem & vsi = (*symbol)[level];
2217    Assert(vsi.name_class == NC_FUNCTION);
2218 Function * fun = vsi.sym_val.function;
2219    Assert(fun);
2220 UserFunction * ufun = fun->get_ufun1();
2221    Assert(fun == ufun);
2222 
2223    return ufun;
2224 }
2225 //-----------------------------------------------------------------------------
2226 Executable *
read_lambda(const UTF8 * lambda_name)2227 XML_Loading_Archive::read_lambda(const UTF8 * lambda_name)
2228 {
2229 UCS_string lambda = read_UCS();
2230 
2231 Symbol dummy(ID_No_ID);
2232 UserFunction * ufun = UserFunction::fix_lambda(dummy, lambda);
2233    Assert(ufun);
2234    ufun->increment_refcount(LOC);   // since we push it below
2235 
2236    next_tag(LOC);
2237    expect_tag("/UserFunction", LOC);
2238    return ufun;
2239 }
2240 //-----------------------------------------------------------------------------
2241 UCS_string
read_UCS()2242 XML_Loading_Archive::read_UCS()
2243 {
2244    skip_to_tag("UCS");
2245 const UTF8 * uni = find_attr("uni", false);
2246 UCS_string text;
2247    while (*uni != '"')   read_chars(text, uni);
2248    return text;
2249 }
2250 //-----------------------------------------------------------------------------
2251 void
read_Parser(StateIndicator & si,int lev)2252 XML_Loading_Archive::read_Parser(StateIndicator & si, int lev)
2253 {
2254    next_tag(LOC);
2255    expect_tag("Parser", LOC);
2256 
2257    Log(LOG_archive)   CERR << "        read_Parser() level=" << lev << endl;
2258 
2259 const int stack_size = find_int_attr("size",           false, 10);
2260 const int ass_state  = find_int_attr("assign-pending", false, 10);
2261 const int lah_high   = find_int_attr("lookahead-high", false, 10);
2262 const int action     = find_int_attr("action",         false, 10);
2263 
2264 Prefix & parser = si.current_stack;
2265 
2266    parser.set_assign_state(Assign_state(ass_state));
2267    parser.action = R_action(action);
2268    parser.lookahead_high = Function_PC(lah_high);
2269 
2270    for (;;)
2271        {
2272          Token_loc tl;
2273          const bool success = read_Token(tl);
2274          if (!success)   break;
2275 
2276          if (parser.size() < stack_size)   parser.push(tl);
2277          else                              parser.saved_lookahead.copy(tl, LOC);
2278        }
2279 
2280    Log(LOG_archive)
2281       {
2282          CERR << "        ";
2283          parser.print_stack(CERR, LOC);
2284       }
2285 
2286    expect_tag("/Parser", LOC);
2287 }
2288 //-----------------------------------------------------------------------------
2289 bool
read_Token(Token_loc & tloc)2290 XML_Loading_Archive::read_Token(Token_loc & tloc)
2291 {
2292    next_tag(LOC);
2293    if (is_tag("/Parser"))   return false;
2294    expect_tag("Token", LOC);
2295 
2296    tloc.pc  = Function_PC(find_int_attr("pc", false, 10));
2297 
2298 const TokenTag tag = TokenTag(find_int_attr("tag", false, 16));
2299 
2300    switch(tag & TV_MASK)   // cannot call get_ValueType() yet
2301       {
2302         case TV_NONE:
2303                new (&tloc.tok) Token(tag);
2304              break;
2305 
2306         case TV_CHAR:
2307              {
2308                const Unicode uni = Unicode(find_int_attr("char", false, 10));
2309                new (&tloc.tok) Token(tag, uni);
2310              }
2311              break;
2312 
2313         case TV_INT:
2314              {
2315                const int64_t ival = find_int_attr("int", false, 10);
2316                new (&tloc.tok) Token(tag, ival);
2317              }
2318              break;
2319 
2320         case TV_FLT:
2321              {
2322                const APL_Float val = find_float_attr("float");
2323                new (&tloc.tok) Token(tag, val);
2324              }
2325              break;
2326 
2327         case TV_CPX:
2328              {
2329                const APL_Float real = find_float_attr("real");
2330                const APL_Float imag = find_float_attr("imag");
2331                new (&tloc.tok) Token(tag, real, imag);
2332              }
2333              break;
2334 
2335         case TV_SYM:
2336              {
2337                const UTF8 * sym_name = find_attr("sym", false);
2338                const UTF8 * end = sym_name;
2339                while (*end != '"')   ++end;
2340                UTF8_string name_UTF(sym_name, end - sym_name);
2341                UCS_string name_UCS(name_UTF);
2342 
2343                Symbol * symbol = Avec::is_quad(name_UCS[0])
2344                                ? Workspace::lookup_existing_symbol(name_UCS)
2345                                : Workspace::lookup_symbol(name_UCS);
2346                new (&tloc.tok) Token(tag, symbol);
2347              }
2348              break;
2349 
2350         case TV_LIN:
2351              {
2352                const int ival = find_int_attr("line", false, 10);
2353                new (&tloc.tok) Token(tag, Function_Line(ival));
2354              }
2355              break;
2356 
2357         case TV_VAL:
2358              {
2359                const int vid = find_int_attr("vid", false, 10);
2360                Assert(vid < int(values.size()));
2361                new (&tloc.tok) Token(tag, values[vid]);
2362              }
2363              break;
2364 
2365         case TV_INDEX:
2366              {
2367                const UTF8 * vids = find_attr("index", false);
2368                IndexExpr & idx = *new IndexExpr(ASS_none, LOC);
2369                while (*vids != '"')
2370                   {
2371                     if (*vids == ',')   ++vids;
2372                     if (*vids == '-')   // elided index
2373                        {
2374                          idx.add(Value_P());
2375                        }
2376                     else                // value
2377                        {
2378                          char * end = 0;
2379                          Assert1(*vids == 'v');   ++vids;
2380                          Assert1(*vids == 'i');   ++vids;
2381                          Assert1(*vids == 'd');   ++vids;
2382                          Assert1(*vids == '_');   ++vids;
2383                          const int vid = strtoll(charP(vids), &end, 10);
2384                          Assert(vid < int(values.size()));
2385                          idx.add(values[vid]);
2386                          vids = utf8P(end);
2387                        }
2388                   }
2389                new (&tloc.tok) Token(tag, idx);
2390              }
2391              break;
2392 
2393         case TV_FUN:
2394              {
2395                Function * fun = read_Function_name("ufun-name",
2396                                                    "symbol-level",
2397                                                    "fun-id");
2398                Assert(fun);
2399                new (&tloc.tok) Token(tag, fun);
2400              }
2401              break;
2402 
2403         default: FIXME;
2404       }
2405 
2406    return true;
2407 }
2408 //-----------------------------------------------------------------------------
2409 Function *
read_Function_name(const char * name_attr,const char * level_attr,const char * id_attr)2410 XML_Loading_Archive::read_Function_name(const char * name_attr,
2411                                         const char * level_attr,
2412                                         const char * id_attr)
2413 {
2414 const UTF8 * fun_name = find_attr(name_attr, true);
2415 
2416    if (fun_name)   // user defined function
2417       {
2418         const int level = find_int_attr(level_attr, false, 10);
2419         const UTF8 * end = fun_name;
2420         while (*end != '"')   ++end;
2421         UTF8_string name_UTF(fun_name, end - fun_name);
2422         UCS_string name_UCS(name_UTF);
2423         if (name_UCS == ID::get_name_UCS(ID_LAMBDA))
2424            {
2425              Assert(level == -1);
2426              return find_lambda(name_UCS);
2427            }
2428 
2429         const Symbol & symbol = *Workspace::lookup_symbol(name_UCS);
2430 
2431         Assert(level >= 0);
2432         Assert(level < symbol.value_stack_size());
2433         const ValueStackItem & vsi = symbol[level];
2434         Assert(vsi.name_class == NC_FUNCTION);
2435         Function * fun = vsi.sym_val.function;
2436         Assert(fun);
2437         return fun;
2438       }
2439 
2440 const int fun_id = find_int_attr(id_attr, true, 16);
2441    if (fun_id != -1)
2442       {
2443         Function * sysfun = ID::get_system_function(Id(fun_id));
2444         Assert(sysfun);
2445         return sysfun;
2446       }
2447 
2448    // not found. This can happen when the function is optional.
2449    //
2450    return 0;
2451 }
2452 //-----------------------------------------------------------------------------
2453 Function *
find_lambda(const UCS_string & lambda)2454 XML_Loading_Archive::find_lambda(const UCS_string & lambda)
2455 {
2456 const StateIndicator & si = *Workspace::SI_top();
2457 const Executable & exec = *si.get_executable();
2458 const Token_string & body = exec.get_body();
2459 
2460    loop(b, body.size())
2461       {
2462         const Token & tok = body[b];
2463         if (tok.get_ValueType() == TV_SYM)
2464            {
2465              const Symbol * sym = tok.get_sym_ptr();
2466              Assert(sym);
2467              loop(v, sym->value_stack_size())
2468                 {
2469                   const ValueStackItem & vs = (*sym)[v];
2470                   if (vs.name_class == NC_FUNCTION ||
2471                       vs.name_class == NC_OPERATOR)
2472                      {
2473                        if (vs.sym_val.function->get_name() == lambda)
2474                           {
2475                             return vs.sym_val.function;
2476                           }
2477                      }
2478                 }
2479              continue;   // not found
2480            }
2481         else if (tok.get_ValueType() != TV_FUN)   continue;
2482 
2483         Function * fun = tok.get_function();
2484         Assert1(fun);
2485         const UserFunction * ufun = fun->get_ufun1();
2486         if (!ufun)   continue;   // not a user defined function
2487 
2488         const UCS_string & fname = ufun->get_name();
2489         if (fname == lambda)   return fun;
2490       }
2491 
2492    CERR << "find_lambda() failed for " << lambda
2493         << " at )SI level=" << si.get_level() << endl;
2494    return 0;
2495 }
2496 //=============================================================================
2497 
2498