1 /*
2 This file is part of GNU APL, a free implementation of the
3 ISO/IEC Standard 13751, "Programming Language APL, Extended"
4
5 Copyright (C) 2008-2016 Dr. Jürgen Sauermann
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <fcntl.h>
22 #include <errno.h>
23 #include <iostream>
24 #include <stdio.h>
25 #include <sys/mman.h>
26 #include <sys/stat.h>
27 #include <sys/types.h>
28
29 #include "Archive.hh"
30 #include "buildtag.hh" // for ARCHIVE_SVN
31 #include "Common.hh"
32 #include "Command.hh"
33 #include "CharCell.hh"
34 #include "ComplexCell.hh"
35 #include "Executable.hh"
36 #include "FloatCell.hh"
37 #include "Function.hh"
38 #include "Heapsort.hh"
39 #include "IndexExpr.hh"
40 #include "IntCell.hh"
41 #include "LvalCell.hh"
42 #include "Macro.hh"
43 #include "NativeFunction.hh"
44 #include "Output.hh"
45 #include "PointerCell.hh"
46 #include "PrintOperator.hh"
47 #include "Symbol.hh"
48 #include "Token.hh"
49 #include "UCS_string.hh"
50 #include "UserFunction.hh"
51 #include "Value.hh"
52 #include "Workspace.hh"
53
54 using namespace std;
55
56 /// if there is less than x chars left on the current line then leave
57 /// char mode and start a new line indented.
58 #define NEED(x) if (space < int(x)) \
59 { leave_char_mode(); out << "\n"; space = do_indent(); } out
60
61 //-----------------------------------------------------------------------------
62 bool
xml_allowed(Unicode uni)63 XML_Saving_Archive::xml_allowed(Unicode uni)
64 {
65 if (uni < ' ') return false; // control chars and negative
66 if (uni == '<') return false; // < not allowed
67 if (uni == '&') return false; // < not allowed
68 if (uni == '"') return false; // not allowed in "..."
69 if (uni > '~')
70 {
71 // non-ASCII character. This is, in principle, allowed in XML but
72 // may not print properly if the font used does not provide the
73 // character. We therefore allow characters in ⎕AV and their alternate
74 // characters except our own type markers (⁰¹²...)
75 //
76 if (is_iPAD_char(uni)) return false;
77 if (Avec::find_char(uni) != Invalid_CHT) return true;
78 if (Avec::map_alternative_char(uni) != Invalid_CHT) return true;
79 return false; // allowed, but may
80 }
81 return true;
82 }
83 //-----------------------------------------------------------------------------
84 const char *
decr(int & counter,const char * str)85 XML_Saving_Archive::decr(int & counter, const char * str)
86 {
87 counter -= strlen(str);
88 return str;
89 }
90 //-----------------------------------------------------------------------------
91 int
do_indent()92 XML_Saving_Archive::do_indent()
93 {
94 const int spaces = indent * INDENT_LEN;
95 loop(s, spaces) out << " ";
96
97 return 72 - spaces;
98 }
99 //-----------------------------------------------------------------------------
100 XML_Saving_Archive::Vid
find_vid(const Value * val)101 XML_Saving_Archive::find_vid(const Value * val)
102 {
103 const void * item = bsearch(val, values, value_count, sizeof(_val_par),
104 _val_par::compare_val_par1);
105 if (item == 0) return INVALID_VID;
106 return Vid(reinterpret_cast<const _val_par *>(item) - values);
107 }
108 //-----------------------------------------------------------------------------
109 void
emit_unicode(Unicode uni,int & space)110 XML_Saving_Archive::emit_unicode(Unicode uni, int & space)
111 {
112 if (uni == UNI_ASCII_LF)
113 {
114 leave_char_mode();
115 out << UNI_PAD_U1 << "A" << "\n";
116 space = do_indent();
117 }
118 else if (!xml_allowed(uni))
119 {
120 space -= leave_char_mode();
121 char cc[40];
122 snprintf(cc, sizeof(cc), "%X", uni);
123 NEED(1 + strlen(cc)) << UNI_PAD_U1 << decr(space, cc);
124 space--; // PAD_U1
125 }
126 else
127 {
128 NEED(1) << "";
129 space -= enter_char_mode();
130 out << uni;
131 space--; // uni
132 }
133 }
134 //-----------------------------------------------------------------------------
135 void
save_UCS(const UCS_string & ucs)136 XML_Saving_Archive::save_UCS(const UCS_string & ucs)
137 {
138 int space = do_indent();
139 out << decr(space, "<UCS uni=\"");
140 Assert(char_mode == false);
141 ++indent;
142 loop(u, ucs.size()) emit_unicode(ucs[u], space);
143 leave_char_mode();
144 out << "\"/>" << endl;
145 space -= 2;
146 --indent;
147 }
148 //-----------------------------------------------------------------------------
149 XML_Saving_Archive &
save_shape(Vid vid)150 XML_Saving_Archive::save_shape(Vid vid)
151 {
152 const Value & v = *values[vid]._val;
153
154 do_indent();
155 out << "<Value flg=\"" << HEX(v.get_flags()) << "\" vid=\"" << vid << "\"";
156
157 const Vid parent_vid = values[vid]._par;
158 out << " parent=\"" << parent_vid << "\" rk=\"" << v.get_rank()<< "\"";
159
160 loop (r, v.get_rank())
161 {
162 out << " sh-" << r << "=\"" << v.get_shape_item(r) << "\"";
163 }
164
165 out << "/>" << endl;
166 return *this;
167 }
168 //-----------------------------------------------------------------------------
169 XML_Saving_Archive &
save_Ravel(Vid vid)170 XML_Saving_Archive::save_Ravel(Vid vid)
171 {
172 const Value & v = *values[vid]._val;
173
174 int space = do_indent();
175
176 char cc[80];
177 snprintf(cc, sizeof(cc), "<Ravel vid=\"%d\" cells=\"", vid);
178 out << decr(space, cc);
179
180 ++indent;
181 const ShapeItem len = v.nz_element_count();
182 const Cell * C = &v.get_ravel(0);
183 loop(l, len) emit_cell(*C++, space);
184
185 space -= leave_char_mode();
186 out<< "\"/>" << endl;
187 space -= 2;
188 --indent;
189
190 return *this;
191 }
192 //-----------------------------------------------------------------------------
193 void
emit_cell(const Cell & cell,int & space)194 XML_Saving_Archive::emit_cell(const Cell & cell, int & space)
195 {
196 char cc[80];
197 switch(cell.get_cell_type())
198 {
199 case CT_CHAR: // uses UNI_PAD_U0, UNI_PAD_U1, and UNI_PAD_U2
200 emit_unicode(cell.get_char_value(), space);
201 break;
202
203 case CT_INT: // uses UNI_PAD_U3
204 space -= leave_char_mode();
205 snprintf(cc, sizeof(cc), "%lld",
206 long_long(cell.get_int_value()));
207 NEED(1 + strlen(cc)) << UNI_PAD_U3 << decr(--space, cc);
208 break;
209
210 case CT_FLOAT: // uses UNI_PAD_U4 or UNI_PAD_U8
211 space -= leave_char_mode();
212 #ifdef RATIONAL_NUMBERS_WANTED
213 {
214 const FloatCell & flt = cell.cFloatCell();
215 if (const APL_Integer denom = flt.get_denominator())
216 {
217 const APL_Integer numer = flt.get_numerator();
218 snprintf(cc, sizeof(cc), "%lld÷%lld", long_long(numer),
219 long_long(denom));
220 NEED(1 + strlen(cc)) << UNI_PAD_U8 << decr(--space, cc);
221 break;
222 }
223 }
224 #endif
225 snprintf(cc, sizeof(cc), "%.17g",
226 double(cell.get_real_value()));
227 NEED(1 + strlen(cc)) << UNI_PAD_U4 << decr(--space, cc);
228 break;
229
230 case CT_COMPLEX: // uses UNI_PAD_U5
231 space -= leave_char_mode();
232 snprintf(cc, sizeof(cc), "%17gJ%17g",
233 double(cell.get_real_value()),
234 double(cell.get_imag_value()));
235 NEED(1 + strlen(cc)) << UNI_PAD_U5 << decr(--space, cc);
236 break;
237
238 case CT_POINTER: // uses UNI_PAD_U6
239 space -= leave_char_mode();
240 {
241 const Vid vid = find_vid(cell.get_pointer_value().get());
242 snprintf(cc, sizeof(cc), "%d", vid);
243 NEED(1 + strlen(cc)) << UNI_PAD_U6 << decr(--space, cc);
244 }
245 break;
246
247 case CT_CELLREF: // uses UNI_PAD_U7
248 space -= leave_char_mode();
249 {
250 const Cell * cp = cell.get_lval_value();
251 if (cp) // valid Cell *
252 {
253 const Value * owner = cell.cLvalCell().get_cell_owner();
254 const long long offset = owner->get_offset(cp);
255 const Vid vid = find_vid(owner);
256 snprintf(cc, sizeof(cc), "%d[%lld]", vid, offset);
257 NEED(1 + strlen(cc)) << UNI_PAD_U7 << decr(--space, cc);
258 }
259 else // 0-cell-pointer
260 {
261 snprintf(cc, sizeof(cc), "0");
262 NEED(2) << UNI_PAD_U7 << "0" << decr(--space, cc);
263 }
264 }
265 break;
266
267 default: Assert(0);
268 }
269 }
270 //-----------------------------------------------------------------------------
271 void
save_Function(const Function & fun)272 XML_Saving_Archive::save_Function(const Function & fun)
273 {
274 const int * eprops = fun.get_exec_properties();
275 const APL_time_us creation_time = fun.get_creation_time();
276 do_indent();
277 out << "<Function creation-time=\"" << creation_time
278 << "\" exec-properties=\""
279 << eprops[0] << "," << eprops[1] << ","
280 << eprops[2] << "," << eprops[3] << "\"";
281
282 if (fun.is_native()) out << " native=\"1\"";
283
284 out << ">" << endl;
285 ++indent;
286
287 save_UCS(fun.canonical(false));
288
289 --indent;
290 do_indent();
291 out << "</Function>" << endl;
292 }
293 //-----------------------------------------------------------------------------
294 int
save_Function_name(const char * ufun_prefix,const char * level_prefix,const char * id_prefix,const Function & fun)295 XML_Saving_Archive::save_Function_name(const char * ufun_prefix,
296 const char * level_prefix,
297 const char * id_prefix,
298 const Function & fun)
299 {
300 if (fun.is_derived())
301 {
302 CERR << endl <<
303 "WARNING: The )SI stack contains a derived function. )SAVEing a workspace in\n"
304 " such a state is currently not supported and WILL cause problems\n"
305 " when )LOADing the workspace. Please perform )SIC (or →) and then\n"
306 " )SAVE this workspace again." << endl;
307 }
308
309 const UserFunction * ufun = fun.get_ufun1();
310 if (ufun) // user defined function
311 {
312 const UCS_string & fname = ufun->get_name();
313 Symbol * sym = Workspace::lookup_symbol(fname);
314 Assert(sym);
315 const int sym_depth = sym->get_ufun_depth(ufun);
316 out << " " << ufun_prefix << "-name=\"" << fname << "\""
317 << " " << level_prefix << "-level=\"" << sym_depth << "\"";
318 return 2; // two attributes
319 }
320 else // primitive or quad function
321 {
322 out << " " << id_prefix << "-id=\"" << HEX(fun.get_Id());
323 return 1; // one attribute
324 }
325 }
326 //-----------------------------------------------------------------------------
327 void
save_Parser(const Prefix & prefix)328 XML_Saving_Archive::save_Parser(const Prefix & prefix)
329 {
330 do_indent();
331 out << "<Parser size=\"" << prefix.size()
332 << "\" assign-pending=\"" << prefix.get_assign_state()
333 << "\" action=\"" << prefix.action
334 << "\" lookahead-high=\"" << prefix.get_lookahead_high()
335 << "\">" << endl;
336
337 // write the lookahead token, starting at the fifo's get position
338 //
339 ++indent;
340 loop(s, prefix.size())
341 {
342 const Token_loc & tloc = prefix.at(prefix.size() - s - 1);
343 save_token_loc(tloc);
344 }
345 save_token_loc(prefix.saved_lookahead);
346 --indent;
347
348
349 do_indent();
350 out << "</Parser>" << endl;
351 }
352 //-----------------------------------------------------------------------------
353 void
save_symtab(const SymbolTable & symtab)354 XML_Saving_Archive::save_symtab(const SymbolTable & symtab)
355 {
356 std::vector<const Symbol *> symbols = symtab.get_all_symbols();
357
358 // remove erased symbols
359 //
360 for (size_t s = 0; s < symbols.size();)
361 {
362 const Symbol * sym = symbols[s];
363 if (sym->is_erased())
364 {
365 symbols[s] = symbols.back();
366 symbols.pop_back();
367 continue;
368 }
369
370 ++s;
371 }
372
373 do_indent();
374 out << "<SymbolTable size=\"" << symbols.size() << "\">" << endl;
375
376 ++indent;
377
378 while (symbols.size() > 0)
379 {
380 // set idx to the alphabetically smallest name
381 //
382 int idx = 0;
383 for (size_t i = 1; i < symbols.size(); ++i)
384 {
385 if (symbols[idx]->compare(*symbols[i]) > 0) idx = i;
386 }
387
388 const Symbol * sym = symbols[idx];
389 save_Symbol(*sym);
390
391 symbols[idx] = symbols.back();
392 symbols.pop_back();
393 }
394
395 --indent;
396
397 do_indent();
398 out << "</SymbolTable>" << endl << endl;
399 }
400 //-----------------------------------------------------------------------------
401 void
save_SI_entry(const StateIndicator & si)402 XML_Saving_Archive::save_SI_entry(const StateIndicator & si)
403 {
404 const Executable & exec = *si.get_executable();
405
406 do_indent();
407 out << "<SI-entry level=\"" << si.get_level()
408 << "\" pc=\"" << si.get_PC()
409 << "\" line=\"" << exec.get_line(si.get_PC()) << "\""
410 <<">" << endl << flush;
411
412 ++indent;
413 do_indent();
414 switch(exec.get_parse_mode())
415 {
416 case PM_FUNCTION:
417 {
418 Symbol * sym = Workspace::lookup_symbol(exec.get_name());
419 Assert(sym);
420 const UserFunction * ufun = exec.get_ufun();
421 Assert(ufun);
422 const int sym_depth = sym->get_ufun_depth(ufun);
423
424 if (ufun->is_macro())
425 out << "<UserFunction macro-num=\"" << ufun->get_macnum()
426 << "\"/>" << endl;
427 else if (ufun->is_lambda())
428 {
429 out << "<UserFunction lambda-name=\"" << ufun->get_name()
430 << "\">" << endl;
431 ++indent;
432 save_UCS(ufun->canonical(false));
433 --indent;
434 do_indent();
435 out << "</UserFunction>" << endl;
436 }
437 else
438 out << "<UserFunction ufun-name=\"" << sym->get_name()
439 << "\" symbol-level=\"" << sym_depth << "\"/>" << endl;
440 }
441 break;
442
443 case PM_STATEMENT_LIST:
444 out << "<Statements>" << endl;
445 ++indent;
446 save_UCS(exec.get_text(0));
447 --indent;
448 do_indent();
449 out << "</Statements>" << endl;
450 break;
451
452 case PM_EXECUTE:
453 out << "<Execute>" << endl;
454 ++indent;
455 save_UCS(exec.get_text(0));
456 --indent;
457 do_indent();
458 out << "</Execute>" << endl;
459 break;
460
461 default: FIXME;
462 }
463
464 // print the parser states...
465 //
466 save_Parser(si.current_stack);
467
468 --indent;
469
470 do_indent();
471 out << "</SI-entry>" << endl << endl;
472 }
473 //-----------------------------------------------------------------------------
474 void
save_Symbol(const Symbol & sym)475 XML_Saving_Archive::save_Symbol(const Symbol & sym)
476 {
477 do_indent();
478 out << "<Symbol name=\"" << sym.get_name() << "\" stack-size=\""
479 << sym.value_stack_size() << "\">" << endl;
480
481 ++indent;
482 loop(v, sym.value_stack_size()) save_vstack_item(sym[v]);
483 --indent;
484
485 do_indent();
486 out << "</Symbol>" << endl << endl;
487 }
488 //-----------------------------------------------------------------------------
489 void
save_user_commands(const std::vector<Command::user_command> & cmds)490 XML_Saving_Archive::save_user_commands(
491 const std::vector<Command::user_command> & cmds)
492 {
493 if (cmds.size() == 0) return;
494
495 do_indent();
496 out << "<Commands size=\"" << cmds.size() << "\">" << endl;
497
498 ++indent;
499 loop(u, cmds.size())
500 {
501 const Command::user_command & ucmd = cmds[u];
502 do_indent();
503 out << "<Command name=\"" << ucmd.prefix
504 << "\" mode=\"" << ucmd.mode
505 << "\" fun=\"" << ucmd.apl_function << "\"/>" << endl;
506 }
507
508 --indent;
509 do_indent();
510 out << "</Commands>" << endl << endl;
511 }
512 //-----------------------------------------------------------------------------
513 void
save_token_loc(const Token_loc & tloc)514 XML_Saving_Archive::save_token_loc(const Token_loc & tloc)
515 {
516 do_indent();
517 out << "<Token pc=\"" << tloc.pc
518 << "\" tag=\"" << HEX(tloc.tok.get_tag()) << "\"";
519 emit_token_val(tloc.tok);
520
521 out << "/>" << endl;
522 }
523 //-----------------------------------------------------------------------------
524 void
emit_token_val(const Token & tok)525 XML_Saving_Archive::emit_token_val(const Token & tok)
526 {
527 switch(tok.get_ValueType())
528 {
529 case TV_NONE: break;
530
531 case TV_CHAR: Log(LOG_archive) CERR << "Saving TV_SYM Token" << endl;
532 out << " char=\"" << int(tok.get_char_val()) << "\"";
533 break;
534
535 case TV_INT: Log(LOG_archive) CERR << "Saving TV_INT Token" << endl;
536 out << " int=\"" << tok.get_int_val() << "\"";
537 break;
538
539 case TV_FLT: Log(LOG_archive) CERR << "Saving TV_FLT Token" << endl;
540 out << " float=\"" << tok.get_flt_val() << "\"";
541 break;
542
543 case TV_CPX: Log(LOG_archive) CERR << "Saving TV_CPX Token" << endl;
544 out << " real=\"" << tok.get_cpx_real()
545 << "\" imag=\"" << tok.get_cpx_imag() << "\"";
546 break;
547
548 case TV_SYM: Log(LOG_archive) CERR << "Saving TV_SYM Token" << endl;
549 {
550 Symbol * sym = tok.get_sym_ptr();
551 const UCS_string name = sym->get_name();
552 out << " sym=\"" << name << "\"";
553 }
554 break;
555
556 case TV_LIN: Log(LOG_archive) CERR << "Saving TV_LIN Token" << endl;
557 out << " line=\"" << tok.get_fun_line() << "\"";
558 break;
559
560 case TV_VAL: {
561 Log(LOG_archive)
562 CERR << "Saving TV_VAL Token" << endl;
563
564 const Vid vid = find_vid(tok.get_apl_val().get());
565 out << " vid=\"" << vid << "\"";
566 }
567 break;
568
569 case TV_INDEX: {
570 Log(LOG_archive)
571 CERR << "Saving TV_INDEX Token" << endl;
572 const IndexExpr & idx = tok.get_index_val();
573 const int rank = idx.value_count();
574 out << " index=\"";
575 loop(i, rank)
576 {
577 if (i) out << ",";
578 const Value * val = idx.values[i].get();
579 if (val) out << "vid_" << find_vid(val);
580 else out << "-";
581 out << "\"";
582 }
583 }
584 break;
585
586 case TV_FUN: {
587 Log(LOG_archive)
588 CERR << "Saving TV_FUN Token" << endl;
589
590 Function * fun = tok.get_function();
591 Assert1(fun);
592 save_Function_name("ufun", "symbol", "fun", *fun);
593 }
594 out << "\"";
595 break;
596
597 default: FIXME;
598
599 }
600 }
601 //-----------------------------------------------------------------------------
602 void
save_vstack_item(const ValueStackItem & vsi)603 XML_Saving_Archive::save_vstack_item(const ValueStackItem & vsi)
604 {
605 switch(vsi.name_class)
606 {
607 case NC_UNUSED_USER_NAME:
608 do_indent();
609 out << "<unused-name/>" << endl;
610 break;
611
612 case NC_LABEL:
613 do_indent();
614 out << "<Label value=\"" << vsi.sym_val.label << "\"/>" << endl;
615 break;
616
617 case NC_VARIABLE:
618 do_indent();
619 out << "<Variable vid=\"" << find_vid(vsi.apl_val.get())
620 << "\"/>" << endl;
621 break;
622
623 case NC_FUNCTION:
624 case NC_OPERATOR:
625 save_Function(*vsi.sym_val.function);
626 break;
627
628 case NC_SHARED_VAR:
629 do_indent();
630 out << "<Shared-Variable key=\"" << vsi.sym_val.sv_key
631 << "\"/>" << endl;
632 break;
633
634 default: Assert(0);
635 }
636 }
637 //-----------------------------------------------------------------------------
638 bool
compare_val_par(const _val_par & A,const _val_par & B,const void *)639 XML_Saving_Archive::_val_par::compare_val_par(const _val_par & A,
640 const _val_par & B, const void *)
641 {
642 return A._val > B._val;
643 }
644 //-----------------------------------------------------------------------------
645 int
compare_val_par1(const void * key,const void * B)646 XML_Saving_Archive::_val_par::compare_val_par1(const void * key, const void * B)
647 {
648 const void * Bv = (reinterpret_cast<const _val_par *>(B))->_val;
649 return charP(key) - charP(Bv);
650 }
651 //-----------------------------------------------------------------------------
652 XML_Saving_Archive &
save()653 XML_Saving_Archive::save()
654 {
655 tm * t;
656 {
657 timeval now;
658 gettimeofday(&now, 0);
659 t = gmtime(&now.tv_sec);
660 }
661
662 const int offset = Workspace::get_v_Quad_TZ().get_offset(); // timezone offset
663
664 // check with: xmllint --valid workspace.xml >/dev/null
665 //
666 out <<
667 "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n"
668 "\n"
669 "<!DOCTYPE Workspace\n"
670 "[\n"
671 " <!ELEMENT Workspace (Value*,Ravel*,SymbolTable,Symbol*,Commands,StateIndicator)>\n"
672 " <!ATTLIST Workspace wsid CDATA #REQUIRED>\n"
673 " <!ATTLIST Workspace year CDATA #REQUIRED>\n"
674 " <!ATTLIST Workspace month CDATA #REQUIRED>\n"
675 " <!ATTLIST Workspace day CDATA #REQUIRED>\n"
676 " <!ATTLIST Workspace hour CDATA #REQUIRED>\n"
677 " <!ATTLIST Workspace minute CDATA #REQUIRED>\n"
678 " <!ATTLIST Workspace second CDATA #REQUIRED>\n"
679 " <!ATTLIST Workspace timezone CDATA #REQUIRED>\n"
680 " <!ATTLIST Workspace saving_SVN CDATA #REQUIRED>\n"
681 "\n"
682 " <!ELEMENT Value (#PCDATA)>\n"
683 " <!ATTLIST Value flg CDATA #REQUIRED>\n"
684 " <!ATTLIST Value vid CDATA #REQUIRED>\n"
685 " <!ATTLIST Value parent CDATA #IMPLIED>\n"
686 " <!ATTLIST Value rk CDATA #REQUIRED>\n"
687 " <!ATTLIST Value sh-0 CDATA #IMPLIED>\n"
688 " <!ATTLIST Value sh-1 CDATA #IMPLIED>\n"
689 " <!ATTLIST Value sh-2 CDATA #IMPLIED>\n"
690 " <!ATTLIST Value sh-3 CDATA #IMPLIED>\n"
691 " <!ATTLIST Value sh-4 CDATA #IMPLIED>\n"
692 " <!ATTLIST Value sh-5 CDATA #IMPLIED>\n"
693 " <!ATTLIST Value sh-6 CDATA #IMPLIED>\n"
694 " <!ATTLIST Value sh-7 CDATA #IMPLIED>\n"
695 "\n"
696 " <!ELEMENT Ravel (#PCDATA)>\n"
697 " <!ATTLIST Ravel vid CDATA #REQUIRED>\n"
698 " <!ATTLIST Ravel cells CDATA #REQUIRED>\n"
699 "\n"
700 " <!ELEMENT SymbolTable (Symbol*)>\n"
701 " <!ATTLIST SymbolTable size CDATA #REQUIRED>\n"
702 "\n"
703 " <!ELEMENT Symbol (unused-name|Variable|Function|Label|Shared-Variable)*>\n"
704 " <!ATTLIST Symbol name CDATA #REQUIRED>\n"
705 " <!ATTLIST Symbol stack-size CDATA #REQUIRED>\n"
706 "\n"
707 " <!ELEMENT unused-name EMPTY>\n"
708 "\n"
709 " <!ELEMENT Variable (#PCDATA)>\n"
710 " <!ATTLIST Variable vid CDATA #REQUIRED>\n"
711 "\n"
712 " <!ELEMENT Function (UCS)>\n"
713 " <!ATTLIST Function creation-time CDATA #IMPLIED>\n"
714 " <!ATTLIST Function exec-properties CDATA #IMPLIED>\n"
715 "\n"
716 " <!ELEMENT Label (#PCDATA)>\n"
717 " <!ATTLIST Label value CDATA #REQUIRED>\n"
718 "\n"
719 " <!ELEMENT Shared-Variable (#PCDATA)>\n"
720 " <!ATTLIST Shared-Variable key CDATA #REQUIRED>\n"
721 "\n"
722 " <!ELEMENT UCS (#PCDATA)>\n"
723 " <!ATTLIST UCS uni CDATA #REQUIRED>\n"
724 "\n"
725 " <!ELEMENT Commands (Command*)>\n"
726 " <!ATTLIST Commands size CDATA #REQUIRED>\n"
727 "\n"
728 " <!ELEMENT Command (#PCDATA)>\n"
729 " <!ATTLIST Command name CDATA #REQUIRED>\n"
730 " <!ATTLIST Command mode CDATA #REQUIRED>\n"
731 " <!ATTLIST Command fun CDATA #REQUIRED>\n"
732 "\n"
733 " <!ELEMENT StateIndicator (SI-entry*)>\n"
734 " <!ATTLIST StateIndicator levels CDATA #REQUIRED>\n"
735 "\n"
736 " <!ELEMENT SI-entry ((Execute|Statements|UserFunction),Parser+)>\n"
737 " <!ATTLIST SI-entry level CDATA #REQUIRED>\n"
738 " <!ATTLIST SI-entry pc CDATA #REQUIRED>\n"
739 " <!ATTLIST SI-entry line CDATA #REQUIRED>\n"
740 "\n"
741 " <!ELEMENT Statements (UCS)>\n"
742 "\n"
743 " <!ELEMENT Execute (UCS)>\n"
744 "\n"
745 " <!ELEMENT UserFunction (#PCDATA)>\n"
746 " <!ATTLIST UserFunction ufun-name CDATA #IMPLIED>\n"
747 " <!ATTLIST UserFunction macro-num CDATA #IMPLIED>\n"
748 " <!ATTLIST UserFunction lambda-name CDATA #IMPLIED>\n"
749 " <!ATTLIST UserFunction symbol-level CDATA #IMPLIED>\n"
750 "\n"
751 " <!ELEMENT Parser (Token*)>\n"
752 " <!ATTLIST Parser size CDATA #REQUIRED>\n"
753 " <!ATTLIST Parser assign-pending CDATA #REQUIRED>\n"
754 " <!ATTLIST Parser lookahead-high CDATA #REQUIRED>\n"
755 " <!ATTLIST Parser action CDATA #REQUIRED>\n"
756
757 " <!ELEMENT Token (#PCDATA)>\n"
758 " <!ATTLIST Token pc CDATA #REQUIRED>\n"
759 " <!ATTLIST Token tag CDATA #REQUIRED>\n"
760 " <!ATTLIST Token char CDATA #IMPLIED>\n"
761 " <!ATTLIST Token int CDATA #IMPLIED>\n"
762 " <!ATTLIST Token float CDATA #IMPLIED>\n"
763 " <!ATTLIST Token real CDATA #IMPLIED>\n"
764 " <!ATTLIST Token imag CDATA #IMPLIED>\n"
765 " <!ATTLIST Token sym CDATA #IMPLIED>\n"
766 " <!ATTLIST Token line CDATA #IMPLIED>\n"
767 " <!ATTLIST Token vid CDATA #IMPLIED>\n"
768 " <!ATTLIST Token index CDATA #IMPLIED>\n"
769 " <!ATTLIST Token fun-id CDATA #IMPLIED>\n"
770 " <!ATTLIST Token ufun-name CDATA #IMPLIED>\n"
771 " <!ATTLIST Token symbol-level CDATA #IMPLIED>\n"
772 " <!ATTLIST Token comment CDATA #IMPLIED>\n"
773 "\n"
774 "]>\n"
775 "\n"
776 "\n"
777 " <!-- hour/minute/second is )SAVE time in UTC (aka. GMT).\n"
778 " timezone is offset to UTC in seconds.\n"
779 " local time is UTC + offset -->\n"
780 "<Workspace wsid=\"" << Workspace::get_WS_name()
781 << "\" year=\"" << (t->tm_year + 1900)
782 << "\" month=\"" << (t->tm_mon + 1)
783 << "\" day=\"" << t->tm_mday << "\"" << endl <<
784 " hour=\"" << t->tm_hour
785 << "\" minute=\"" << t->tm_min
786 << "\" second=\"" << t->tm_sec
787 << "\" timezone=\"" << offset << "\"" << endl <<
788 " saving_SVN=\"" << ARCHIVE_SVN
789 << "\">\n" << endl;
790
791 ++indent;
792
793 // collect all values to be saved. We mark the values to avoid
794 // saving of stale values and unmark the used values
795 //
796 Value::mark_all_dynamic_values();
797 Workspace::unmark_all_values();
798
799 for (const DynamicObject * dob = DynamicObject::get_all_values()->get_next();
800 dob != DynamicObject::get_all_values(); dob = dob->get_next())
801 {
802 // WARNING: do not use pValue() here !
803 const Value * val = static_cast<const Value *>(dob);
804
805 if (val->is_marked()) continue; // stale
806
807 ++value_count;
808 }
809
810 values = new _val_par[value_count];
811 ShapeItem idx = 0;
812
813 for (const DynamicObject * dob = DynamicObject::get_all_values()->get_next();
814 dob != DynamicObject::get_all_values(); dob = dob->get_next())
815 {
816 // WARNING: do not use pValue() here !
817 const Value * val = static_cast<const Value *>(dob);
818
819 if (val->is_marked()) continue; // stale
820
821 val->unmark();
822 new (values + idx++) _val_par(val, INVALID_VID);
823 }
824
825 Assert(idx == value_count);
826
827 // some people use an excessive number of values. We therefore sort them
828 // by the address of the value as to speed up finding them later on
829 //
830 Heapsort<_val_par>::sort(values, value_count, 0, &_val_par::compare_val_par);
831 loop(v, (value_count - 1)) Assert(&values[v]._val < &values[v + 1]._val);
832
833 // set up parents of values
834 //
835 loop(p, value_count) // for every (parent-) value
836 {
837 const Value & parent = *values[p]._val;
838 const ShapeItem ec = parent.nz_element_count();
839 loop(e, ec) // for every ravel cell of the (parent-) value
840 {
841 const Cell & cP = parent.get_ravel(e);
842 if (cP.is_pointer_cell())
843 {
844 const Value * sub = cP.get_pointer_value().get();
845 Assert1(sub);
846 const Vid sub_idx = find_vid(sub);
847 Assert(sub_idx < value_count);
848 if (values[sub_idx]._par != INVALID_VID)
849 {
850 // sub already has a parent, which supposedly cannot
851 // happen. print out some more information about this
852 // case.
853 //
854 CERR << "*** Sub-Value "
855 << voidP(sub) << " has two parents." << endl
856 << "Child: vid=" << sub_idx << ", _val="
857 << values[sub_idx]._val << "_par="
858 << values[sub_idx]._par << endl
859 << "Parent 2: vid=" << p << ", _val="
860 << values[p]._val << "_par="
861 << values[p]._par << endl
862 << "Call stack:" << endl;
863 BACKTRACE
864 CERR << endl << " Running )CHECK..." << endl;
865 Command::cmd_CHECK(CERR);
866 CERR << endl;
867
868
869 #if VALUE_HISTORY_WANTED
870 print_history(CERR, sub, 0);
871 print_history(CERR, values[sub_idx]._par, 0);
872 print_history(CERR, values[p]._val, 0);
873 #endif
874
875 CERR << endl <<
876 "The workspace will be )SAVEd, but using it for anything other than for\n"
877 " recovering its content (i.e. defined functions or variables) means asking\n"
878 " for BIG trouble!" << endl;
879 }
880
881 values[sub_idx] = _val_par(values[sub_idx]._val, Vid(p));
882 }
883 else if (cP.is_lval_cell())
884 {
885 Log(LOG_archive)
886 CERR << "LVAL CELL in " << p << " at " LOC << endl;
887 }
888 }
889 }
890
891
892 // save all values (without their ravel)
893 //
894 loop(vid, value_count) save_shape(Vid(vid));
895
896 // save ravels of all values
897 //
898 loop(vid, value_count) save_Ravel(Vid(vid));
899
900 // save user defined symbols
901 //
902 save_symtab(Workspace::get_symbol_table());
903
904 // save certain system variables
905 //
906 #define rw_sv_def(x, _str, _txt) save_Symbol(Workspace::get_v_ ## x());
907 #define ro_sv_def(x, _str, _txt) save_Symbol(Workspace::get_v_ ## x());
908 #include "SystemVariable.def"
909
910 // save user-defined commands (if any)
911 //
912 save_user_commands(Workspace::get_user_commands());
913
914 // save state indicator
915 //
916 {
917 const int levels = Workspace::SI_entry_count();
918 do_indent();
919 out << "<StateIndicator levels=\"" << levels << "\">" << endl;
920
921 ++indent;
922
923 loop(l, levels)
924 {
925 for (const StateIndicator * si = Workspace::SI_top();
926 si; si = si->get_parent())
927 {
928 if (si->get_level() == l)
929 {
930 save_SI_entry(*si);
931 break;
932 }
933 }
934 }
935
936 --indent;
937
938 do_indent();
939 out << "</StateIndicator>" << endl << endl;
940 }
941
942 --indent;
943
944 do_indent();
945
946 // write closing tag and a few 0's so that string functions
947 // can be used on the mmaped file.
948 //
949 out << "</Workspace>" << endl
950 << char(0) << char(0) <<char(0) <<char(0) << endl;
951
952 return *this;
953 }
954 //=============================================================================
XML_Loading_Archive(const char * _filename,int & dump_fd)955 XML_Loading_Archive::XML_Loading_Archive(const char * _filename, int & dump_fd)
956 : fd(-1),
957 map_start(0),
958 map_length(0),
959 file_start(0),
960 line_start(0),
961 line_no(1),
962 current_char(UNI_ASCII_SPACE),
963 data(0),
964 file_end(0),
965 copying(false),
966 protection(false),
967 reading_vids(false),
968 have_allowed_objects(false),
969 filename(_filename),
970 file_is_complete(false)
971 {
972 Log(LOG_archive) CERR << "Loading file " << filename << endl;
973
974 fd = open(filename, O_RDONLY);
975 if (fd == -1) return;
976
977 struct stat st;
978 if (fstat(fd, &st))
979 {
980 CERR << "fstat() failed: " << strerror(errno) << endl;
981 close(fd);
982 fd = -1;
983 return;
984 }
985
986 map_length = st.st_size;
987 map_start = mmap(0, map_length, PROT_READ, MAP_SHARED, fd, 0);
988 if (map_start == reinterpret_cast<const void *>(-1))
989 {
990 CERR << "mmap() failed: " << strerror(errno) << endl;
991 close(fd);
992 fd = -1;
993 return;
994 }
995
996 // success
997 //
998 file_start = charP(map_start);
999 file_end = utf8P(file_start + map_length);
1000
1001 reset();
1002
1003 if (!strncmp(file_start, "#!", 2) || // )DUMP file
1004 !strncmp(file_start, "<!", 2) || // )DUMP-HTML file
1005 !strncmp(file_start, "⍝!", 4)) // a library
1006 {
1007 // the file was either written with )DUMP or is a library.
1008 // Return the open file descriptor (the destructor will unmap())
1009 //
1010 dump_fd = fd;
1011 fd = -1; // file will be closed via dump_fd
1012 return;
1013 }
1014
1015 if (strncmp(file_start, "<?xml", 5)) // not an xml file
1016 {
1017 CERR << "file " << filename << " does not " << endl
1018 << "have the format of a GNU APL .xml or .apl file" << endl;
1019 close(fd);
1020 fd = -1;
1021 return;
1022 }
1023 }
1024 //-----------------------------------------------------------------------------
~XML_Loading_Archive()1025 XML_Loading_Archive::~XML_Loading_Archive()
1026 {
1027 if (map_start) munmap(map_start, map_length);
1028 if (fd != -1) close(fd);
1029 }
1030 //-----------------------------------------------------------------------------
1031 void
reset()1032 XML_Loading_Archive::reset()
1033 {
1034 line_start = data = utf8P(file_start);
1035 line_no = 1;
1036 next_tag(LOC);
1037 }
1038 //-----------------------------------------------------------------------------
1039 bool
skip_to_tag(const char * tag)1040 XML_Loading_Archive::skip_to_tag(const char * tag)
1041 {
1042 for (;;)
1043 {
1044 if (next_tag(LOC)) return true;
1045 if (is_tag(tag)) break;
1046 }
1047
1048 return false;
1049 }
1050 //-----------------------------------------------------------------------------
1051 void
read_vids()1052 XML_Loading_Archive::read_vids()
1053 {
1054 reset(); // skips to <Workspace>
1055 reading_vids = true;
1056 read_Workspace(true);
1057 reading_vids = false;
1058
1059 reset();
1060 }
1061 //-----------------------------------------------------------------------------
1062 void
where(ostream & out)1063 XML_Loading_Archive::where(ostream & out)
1064 {
1065 out << "line=" << line_no << "+" << (data - line_start) << " '";
1066
1067 loop(j, 40) { if (data[j] == 0x0A) break; out << data[j]; }
1068 out << "'" << endl;
1069 }
1070 //-----------------------------------------------------------------------------
1071 void
where_att(ostream & out)1072 XML_Loading_Archive::where_att(ostream & out)
1073 {
1074 out << "line=" << line_no << "+" << (attributes - line_start) << " '";
1075
1076 loop(j, 40)
1077 {
1078 if (attributes[j] == 0x0A) break;
1079 if (attributes + j >= end_attr) break;
1080 out << attributes[j];
1081 }
1082
1083 out << "'" << endl;
1084 }
1085 //-----------------------------------------------------------------------------
1086 bool
get_uni()1087 XML_Loading_Archive::get_uni()
1088 {
1089 if (data > file_end) return true; // EOF
1090
1091 int len = 0;
1092 current_char = UTF8_string::toUni(data, len, true);
1093 data += len;
1094 if (current_char == 0x0A) { ++line_no; line_start = data; }
1095 return false;
1096 }
1097 //-----------------------------------------------------------------------------
1098 bool
is_tag(const char * prefix) const1099 XML_Loading_Archive::is_tag(const char * prefix) const
1100 {
1101 return !strncmp(charP(tag_name), prefix, strlen(prefix));
1102 }
1103 //-----------------------------------------------------------------------------
1104 void
expect_tag(const char * prefix,const char * loc) const1105 XML_Loading_Archive::expect_tag(const char * prefix, const char * loc) const
1106 {
1107 if (!is_tag(prefix))
1108 {
1109 CERR << " Got tag ";
1110 print_tag(CERR);
1111 CERR << " when expecting tag " << prefix
1112 << " at " << loc << " line " << line_no << endl;
1113 DOMAIN_ERROR;
1114 }
1115 }
1116 //-----------------------------------------------------------------------------
1117 void
print_tag(ostream & out) const1118 XML_Loading_Archive::print_tag(ostream & out) const
1119 {
1120 loop(t, attributes - tag_name) out << tag_name[t];
1121 }
1122 //-----------------------------------------------------------------------------
1123 const UTF8 *
find_attr(const char * att_name,bool optional)1124 XML_Loading_Archive::find_attr(const char * att_name, bool optional)
1125 {
1126 const int att_len = strlen(att_name);
1127
1128 for (const UTF8 * d = attributes; d < end_attr; ++d)
1129 {
1130 if (strncmp(att_name, charP(d), att_len)) continue;
1131 const UTF8 * dd = d + att_len;
1132 while (*dd <= ' ') ++dd; // skip whitespaces
1133 if (*dd++ != '=') continue;
1134
1135 // attribute= found. find value.
1136 while (*dd <= ' ') ++dd; // skip whitespaces
1137 Assert(*dd == '"');
1138 return dd + 1;
1139 }
1140
1141 // not found
1142 //
1143 if (!optional)
1144 {
1145 CERR << "Attribute name '" << att_name << "' not found in:" << endl;
1146 where_att(CERR);
1147 DOMAIN_ERROR;
1148 }
1149 return 0; // not found
1150 }
1151 //-----------------------------------------------------------------------------
1152 int64_t
find_int_attr(const char * attrib,bool optional,int base)1153 XML_Loading_Archive::find_int_attr(const char * attrib, bool optional, int base)
1154 {
1155 const UTF8 * value = find_attr(attrib, optional);
1156 if (value == 0) return -1; // not found
1157
1158 const int64_t val = strtoll(charP(value), 0, base);
1159 return val;
1160 }
1161 //-----------------------------------------------------------------------------
1162 APL_Float
find_float_attr(const char * attrib)1163 XML_Loading_Archive::find_float_attr(const char * attrib)
1164 {
1165 const UTF8 * value = find_attr(attrib, false);
1166 const APL_Float val = strtod(charP(value), 0);
1167 return val;
1168 }
1169 //-----------------------------------------------------------------------------
1170 bool
next_tag(const char * loc)1171 XML_Loading_Archive::next_tag(const char * loc)
1172 {
1173 again:
1174
1175 // read chars up to (including) '<'
1176 //
1177 while (current_char != '<')
1178 {
1179 if (get_uni()) return true;
1180 }
1181
1182 tag_name = data;
1183
1184 // read char after <
1185 //
1186 if (get_uni()) return true;
1187
1188 if (current_char == '?') goto again; // processing instruction
1189 if (current_char == '!') goto again; // comment
1190 if (current_char == '/') get_uni(); // / at start of name
1191
1192 // read chars before attributes (if any)
1193 //
1194 for (;;)
1195 {
1196 if (current_char == ' ') break;
1197 if (current_char == '/') break;
1198 if (current_char == '>') break;
1199 if (get_uni()) return true;
1200 }
1201
1202 attributes = data;
1203
1204 // read chars before end of tag
1205 //
1206 while (current_char != '>')
1207 {
1208 if (get_uni()) return true;
1209 }
1210
1211 end_attr = data;
1212
1213 /*
1214 CERR << "See tag ";
1215 for (const UTF8 * t = tag_name; t < attributes; ++t) CERR << (char)*t;
1216 CERR << " at " << loc << " line " << line_no << endl;
1217 */
1218
1219 return false;
1220 }
1221 //-----------------------------------------------------------------------------
1222 void
read_Workspace(bool silent)1223 XML_Loading_Archive::read_Workspace(bool silent)
1224 {
1225 expect_tag("Workspace", LOC);
1226
1227 const int offset = find_int_attr("timezone", false, 10);
1228 const UTF8 * wsid = find_attr("wsid", false);
1229
1230 int year = find_int_attr("year", false, 10);
1231 int mon = find_int_attr("month", false, 10);
1232 int day = find_int_attr("day", false, 10);
1233 int hour = find_int_attr("hour", false, 10);
1234 int min = find_int_attr("minute", false, 10);
1235 int sec = find_int_attr("second", false, 10);
1236
1237 UCS_string saving_SVN;
1238 UCS_string current_SVN(ARCHIVE_SVN);
1239 {
1240 const UTF8 * saving = find_attr("saving_SVN", true);
1241 while (saving && *saving != '"') saving_SVN.append(Unicode(*saving++));
1242 }
1243 bool mismatch = false;
1244
1245 if (saving_SVN.size() == 0) // saved with very old version
1246 {
1247 mismatch = true;
1248 CERR << "WARNING: this workspace was )SAVEd with a VERY "
1249 << "old SVN version of GNU APL." << endl;
1250 }
1251 else if (saving_SVN != current_SVN) // saved with different version
1252 {
1253 mismatch = true;
1254 CERR << "WARNING: this workspace was )SAVEd with SVN version "
1255 << saving_SVN << endl <<
1256 " but is now being )LOADed with a SVN version "
1257 << current_SVN << " or greater" << endl;
1258 }
1259
1260 if (mismatch)
1261 {
1262 CERR << "Expect problems, in particular when the )SI was not clear.\n";
1263 if (!copying)
1264 CERR << "In case of problems, please try )COPY instead of )LOAD."
1265 << endl;
1266 }
1267
1268 sec += offset % 60;
1269 min += (offset / 60) % 60;
1270 hour += (offset / 3600) % 60;
1271 if (sec >= 60) { sec -= 60; ++min; }
1272 else if (sec < 0) { sec += 60; --min; }
1273
1274 if (min >= 60) { min -= 60; ++hour; }
1275 else if (min < 0) { min += 60; --hour; }
1276
1277 if (hour >= 24) { hour -= 24; ++day; }
1278 else if (hour < 0) { hour += 24; --day; }
1279
1280 bool next_month = false;
1281 bool prev_month = false;
1282 switch(day)
1283 {
1284 case 32: next_month = true;
1285 break;
1286
1287 case 31: if (mon == 4 || mon == 6 || mon == 9 || mon == 11)
1288 next_month = true;
1289 break;
1290
1291 case 30: if (mon == 2) next_month = true;
1292 break;
1293
1294 case 29: if (mon != 2) break; // not february
1295 if (year & 3) next_month = true; // not leap year
1296 // the above fails if someone loads a workspace that
1297 // was saved around midnight on 2/28/2100. Dont do that!
1298 break;
1299
1300 case 0: prev_month = true;
1301 break;
1302
1303 default: break;
1304 }
1305
1306 if (next_month) { day = 1; ++mon; }
1307 else if (prev_month)
1308 {
1309 day = 31; --mon;
1310 if (mon == 4 || mon == 6 || mon == 9 || mon == 11) day = 30;
1311 else if (mon == 2) day = (year & 3) ? 28 : 29;
1312 }
1313
1314 if (mon > 12) { mon = 1; ++year; }
1315 else if (mon < 1) { mon = 12; --year; }
1316
1317 Log(LOG_archive) CERR << "read_Workspace() " << endl;
1318
1319 // quick check that the file is complete
1320 //
1321 for (const UTF8 * c = file_end - 12; (c > data) && (c > file_end - 200); --c)
1322 {
1323 if (!strncmp(charP(c), "</Workspace>", 12))
1324 {
1325 file_is_complete = true;
1326 break;
1327 }
1328 }
1329
1330 if (!file_is_complete && !copying)
1331 {
1332 CERR <<
1333 "*** workspace file " << filename << endl <<
1334 " seems to be incomplete (possibly caused by a crash on )SAVE?)\n"
1335 " You may still be able to )COPY from it.\n"
1336 "\nNOT COPIED" << endl;
1337 return;
1338 }
1339
1340 // the order in which tags are written to the xml file
1341 //
1342 const char * tag_order[] =
1343 {
1344 "Value",
1345 "Ravel",
1346 "SymbolTable",
1347 "Symbol",
1348 "Commands",
1349 "StateIndicator",
1350 "/Workspace",
1351 0
1352 };
1353 const char ** tag_pos = tag_order;
1354
1355 for (;;)
1356 {
1357 next_tag(LOC);
1358
1359 // make sure that we do not move backwards in tag_order
1360 //
1361 if (!is_tag(*tag_pos)) // new tag
1362 {
1363 tag_pos++;
1364 for (;;)
1365 {
1366 if (*tag_pos == 0) DOMAIN_ERROR; // end of list
1367 if (is_tag(*tag_pos)) break; // found
1368 ++tag_pos;
1369 }
1370 }
1371
1372 if (is_tag("Value")) read_Value();
1373 else if (is_tag("Ravel")) read_Ravel();
1374 else if (is_tag("SymbolTable")) read_SymbolTable();
1375 else if (is_tag("Symbol")) read_Symbol();
1376 else if (is_tag("Commands")) read_Commands();
1377 else if (copying) break;
1378 else if (is_tag("StateIndicator")) read_StateIndicator();
1379 else if (is_tag("/Workspace")) break;
1380 else /* complain */ expect_tag("UNEXPECTED", LOC);
1381 }
1382
1383 // loaded workspace can contain stale variables, e.g. shared vars.
1384 // remove them.
1385 //
1386 Value::erase_stale(LOC);
1387
1388 if (reading_vids) return;
1389
1390 const char * tz_sign = (offset < 0) ? "" : "+";
1391 if (!silent) COUT
1392 << "SAVED "
1393 << setfill('0') << year << "-"
1394 << setw(2) << mon << "-"
1395 << setw(2) << day << " "
1396 << setw(2) << hour << ":"
1397 << setw(2) << min << ":"
1398 << setw(2) << sec << " (GMT"
1399 << tz_sign << offset/3600 << ")"
1400 << setfill(' ') << endl;
1401
1402 if (!copying)
1403 {
1404 const UTF8 * end = wsid;
1405 while (*end != '"') ++end;
1406
1407 Workspace::set_WS_name(UCS_string(UTF8_string(wsid, end - wsid)));
1408 }
1409
1410 if (have_allowed_objects && allowed_objects.size())
1411 {
1412 CERR << "NOT COPIED:";
1413 loop(a, allowed_objects.size()) CERR << " " << allowed_objects[a];
1414 CERR << endl;
1415 }
1416 }
1417 //-----------------------------------------------------------------------------
1418 void
read_Value()1419 XML_Loading_Archive::read_Value()
1420 {
1421 expect_tag("Value", LOC);
1422
1423 const int vid = find_int_attr("vid", false, 10);
1424 const int parent = find_int_attr("parent", true, 10);
1425 const int rk = find_int_attr("rk", false, 10);
1426
1427 Log(LOG_archive) CERR << " read_Value() vid=" << vid << endl;
1428
1429 if (reading_vids)
1430 {
1431 parents.push_back(parent);
1432 return;
1433 }
1434
1435 Shape sh_value;
1436 loop(r, rk)
1437 {
1438 char sh[20];
1439 snprintf(sh, sizeof(sh), "sh-%d", int(r));
1440 const UTF8 * sh_r = find_attr(sh, false);
1441 sh_value.add_shape_item(atoll(charP(sh_r)));
1442 }
1443
1444 // if we do )COPY or )PCOPY and vid is not in vids_COPY list, then we
1445 // push 0 (so that indexing with vid still works) and ignore such
1446 // values in read_Ravel.
1447 //
1448 bool no_copy = false; // assume the value is needed
1449 if (copying)
1450 {
1451 // if vid is a sub-value then find its topmost owner
1452 //
1453 int parent = vid;
1454 for (;;)
1455 {
1456 Assert(parent < int(parents.size()));
1457 if (parents[parent] == -1) break; // topmost owner found
1458 parent = parents[parent];
1459 }
1460
1461 no_copy = true; // assume the value is not needed
1462 loop(v, vids_COPY.size())
1463 {
1464 if (parent == vids_COPY[v]) // vid is in the list: copy
1465 {
1466 no_copy = false;
1467 break;
1468 }
1469 }
1470 }
1471
1472 if (no_copy)
1473 {
1474 values.push_back(Value_P());
1475 }
1476 else
1477 {
1478 Assert(vid == int(values.size()));
1479
1480 Value_P val(sh_value, LOC);
1481 values.push_back(val);
1482 }
1483 }
1484 //-----------------------------------------------------------------------------
1485 void
read_Cells(Cell * & C,Value & C_owner,const UTF8 * & first)1486 XML_Loading_Archive::read_Cells(Cell * & C, Value & C_owner,
1487 const UTF8 * & first)
1488 {
1489 while (*first <= ' ') ++first;
1490
1491 int len;
1492 const Unicode type = UTF8_string::toUni(first, len, true);
1493
1494 switch (type)
1495 {
1496 case UNI_PAD_U0: // end of UNI_PAD_U2
1497 case '\n': // end of UNI_PAD_U2 (fix old bug)
1498 first += len;
1499 break;
1500
1501 case UNI_PAD_U1: // hex Unicode
1502 case UNI_PAD_U2: // printable ASCII
1503 {
1504 UCS_string ucs;
1505 read_chars(ucs, first);
1506 loop(u, ucs.size()) new (C++) CharCell(ucs[u]);
1507 }
1508 break;
1509
1510 case UNI_PAD_U3: // integer
1511 first += len;
1512 {
1513 char * end = 0;
1514 const APL_Integer val = strtoll(charP(first), &end, 10);
1515 new (C++) IntCell(val);
1516 first = utf8P(end);
1517 }
1518 break;
1519
1520 case UNI_PAD_U4: // real
1521 first += len;
1522 {
1523 char * end = 0;
1524 const APL_Float val = strtod(charP(first), &end);
1525 new (C++) FloatCell(val);
1526 first = utf8P(end);
1527 }
1528 break;
1529
1530 case UNI_PAD_U5: // complex
1531 first += len;
1532 {
1533 char * end = 0;
1534 const APL_Float real = strtod(charP(first), &end);
1535 first = utf8P(end);
1536 Assert(*end == 'J');
1537 ++end;
1538 const APL_Float imag = strtod(end, &end);
1539 new (C++) ComplexCell(real, imag);
1540 first = utf8P(end);
1541 }
1542 break;
1543
1544 case UNI_PAD_U6: // pointer
1545 first += len;
1546 {
1547 char * end = 0;
1548 const int vid = strtoll(charP(first), &end, 10);
1549 Assert(vid >= 0);
1550 Assert(vid < int(values.size()));
1551 C++->init_from_value(values[vid].get(), C_owner, LOC);
1552 first = utf8P(end);
1553 }
1554 break;
1555
1556 case UNI_PAD_U7: // cellref
1557 first += len;
1558 if (first[0] == '0') // 0-cell-pointer
1559 {
1560 new (C++) LvalCell(0, 0);
1561 first++;
1562 }
1563 else
1564 {
1565 char * end = 0;
1566 const int vid = strtoll(charP(first), &end, 16);
1567 Assert(vid >= 0);
1568 Assert(vid < int(values.size()));
1569 Assert(*end == '['); ++end;
1570 const ShapeItem offset = strtoll(end, &end, 16);
1571 Assert(*end == ']'); ++end;
1572 new (C++) LvalCell(&values[vid]->get_ravel(offset),
1573 values[vid].get());
1574 first = utf8P(end);
1575 }
1576 break;
1577
1578 case UNI_PAD_U8: // rational quotient
1579 //
1580 // we should understand rational quotients even if we are
1581 // not ./configured for them..
1582 //
1583 first += len;
1584 {
1585 char * end = 0;
1586 const uint64_t numer = strtoll(charP(first), &end, 10);
1587 first = utf8P(end);
1588
1589 // skip ÷ (which is is C3 B7 in UTF8)
1590 Assert((*end++ & 0xFF) == 0xC3);
1591 Assert((*end++ & 0xFF) == 0xB7);
1592 const uint64_t denom = strtoll(end, &end, 10);
1593 Assert(denom > 0);
1594 #ifdef RATIONAL_NUMBERS_WANTED
1595 new (C++) FloatCell(numer, denom);
1596 #else
1597 new (C++) FloatCell((1.0*(numer))/denom);
1598 #endif
1599 first = utf8P(end);
1600 }
1601 break;
1602
1603
1604 default: Q1(type) Q1(line_no) DOMAIN_ERROR;
1605 }
1606 }
1607 //-----------------------------------------------------------------------------
1608 void
read_chars(UCS_string & ucs,const UTF8 * & utf)1609 XML_Loading_Archive::read_chars(UCS_string & ucs, const UTF8 * & utf)
1610 {
1611 while (*utf <= ' ') ++utf; // skip leading whitespace
1612
1613 for (bool char_mode = false;;)
1614 {
1615 if (*utf == '"') break; // end of attribute value
1616
1617 int len;
1618 const Unicode uni = UTF8_string::toUni(utf, len, true);
1619
1620 if (char_mode && *utf != '\n' && uni != UNI_PAD_U0)
1621 {
1622 ucs.append(uni);
1623 utf += len;
1624 continue;
1625 }
1626
1627 if (uni == UNI_PAD_U2) // start of char_mode
1628 {
1629 utf += len; // skip UNI_PAD_U2
1630 char_mode = true;
1631 continue;
1632 }
1633
1634 if (uni == UNI_PAD_U1) // start of hex mode
1635 {
1636 utf += len; // skip UNI_PAD_U1
1637 char_mode = false;
1638 char * end = 0;
1639 const int hex = strtoll(charP(utf), &end, 16);
1640 ucs.append(Unicode(hex));
1641 utf = utf8P(end);
1642 continue;
1643 }
1644
1645 if (uni == UNI_PAD_U0) // end of char_mode
1646 {
1647 utf += len; // skip UNI_PAD_U0
1648 char_mode = false;
1649 continue;
1650 }
1651
1652 if (uni == '\n') // end of char_mode (fix old bug)
1653 {
1654 // due to an old bug, the trailing UNI_PAD_U0 may be missing
1655 // at the end of the line. We therefore reset char_mode at the
1656 // end of the line so that workspaces save with that fault can
1657 // be read.
1658 //
1659 utf += len; // skip UNI_PAD_U0
1660 char_mode = false;
1661 continue;
1662 }
1663
1664 break;
1665 }
1666
1667 while (*utf <= ' ') ++utf; // skip trailing whitespace
1668 }
1669 //-----------------------------------------------------------------------------
1670 void
read_Ravel()1671 XML_Loading_Archive::read_Ravel()
1672 {
1673 if (reading_vids) return;
1674
1675 const int vid = find_int_attr("vid", false, 10);
1676 const UTF8 * cells = find_attr("cells", false);
1677
1678 Log(LOG_archive) CERR << " read_Ravel() vid=" << vid << endl;
1679
1680 Assert(vid < int(values.size()));
1681 Value_P val = values[vid];
1682
1683 if (!val) // )COPY with vids_COPY or static value
1684 {
1685 return;
1686 }
1687
1688 const ShapeItem count = val->nz_element_count();
1689 Cell * C = &val->get_ravel(0);
1690 Cell * end = C + count;
1691
1692 while (C < end)
1693 {
1694 read_Cells(C, val.getref(), cells);
1695 while (*cells <= ' ') ++cells; // skip trailing whitespace
1696 }
1697
1698 if (C != end) // unless all cells read
1699 {
1700 CERR << "vid=" << vid << endl;
1701 FIXME;
1702 }
1703
1704 {
1705 int len = 0;
1706 const Unicode next = UTF8_string::toUni(cells, len, true);
1707 Assert(next == '"');
1708 }
1709
1710 val->check_value(LOC);
1711 }
1712 //-----------------------------------------------------------------------------
1713 void
read_unused_name(int d,Symbol & symbol)1714 XML_Loading_Archive::read_unused_name(int d, Symbol & symbol)
1715 {
1716 Log(LOG_archive) CERR << " [" << d << "] unused name" << endl;
1717
1718 if (d == 0) return; // Symbol::Symbol has already created the top level
1719
1720 symbol.push();
1721 }
1722 //-----------------------------------------------------------------------------
1723 void
read_Variable(int d,Symbol & symbol)1724 XML_Loading_Archive::read_Variable(int d, Symbol & symbol)
1725 {
1726 const int vid = find_int_attr("vid", false, 10);
1727 Assert(vid < int(values.size()));
1728
1729 Log(LOG_archive) CERR << " [" << d << "] read_Variable() vid=" << vid
1730 << " name=" << symbol.get_name() << endl;
1731
1732 // some system variables are saved for troubleshooting purposes, but
1733 // should not be loaded...
1734 //
1735 if (symbol.is_readonly()) return;
1736 if (symbol.get_name().starts_iwith("⎕NLT")) return; // extinct
1737 if (symbol.get_Id() == ID_Quad_SVE) return;
1738 if (symbol.get_Id() == ID_Quad_SYL) return;
1739 if (symbol.get_Id() == ID_Quad_PS) return;
1740
1741 if (vid == -1) // stale variable
1742 {
1743 Log(LOG_archive) CERR << " " << symbol.get_name()
1744 << " looks like a stale variable" << endl;
1745 return;
1746 }
1747
1748 if (!values[vid]) return; // value filtered out
1749
1750 while (symbol.value_stack_size() <= d) symbol.push();
1751 // if (d != 0) symbol.push();
1752
1753 try
1754 {
1755 symbol.assign(values[vid], true, LOC);
1756 }
1757 catch (...)
1758 {
1759 CERR << "*** Could not assign value " << *values[vid]
1760 << " to variable " << symbol.get_name() << " ***" << endl;
1761 }
1762 }
1763 //-----------------------------------------------------------------------------
1764 void
read_Label(int d,Symbol & symbol)1765 XML_Loading_Archive::read_Label(int d, Symbol & symbol)
1766 {
1767 const int value = find_int_attr("value", false, 10);
1768 if (d == 0) symbol.pop();
1769 symbol.push_label(Function_Line(value));
1770 }
1771 //-----------------------------------------------------------------------------
1772 void
read_Function(int d,Symbol & symbol)1773 XML_Loading_Archive::read_Function(int d, Symbol & symbol)
1774 {
1775 const int native = find_int_attr("native", true, 10);
1776 const APL_time_us creation_time = find_int_attr("creation-time", true, 10);
1777 int eprops[4] = { 0, 0, 0, 0 };
1778 const UTF8 * ep = find_attr("exec-properties", true);
1779 if (ep)
1780 {
1781 sscanf(charP(ep), "%d,%d,%d,%d",
1782 eprops, eprops + 1, eprops + 2, eprops+ 3);
1783 }
1784
1785 Log(LOG_archive)
1786 CERR << " [" << d << "] read_Function(" << symbol.get_name()
1787 << ") native=" << native << endl;
1788
1789 next_tag(LOC);
1790 expect_tag("UCS", LOC);
1791 const UTF8 * uni = find_attr("uni", false);
1792 next_tag(LOC);
1793 expect_tag("/Function", LOC);
1794
1795 UCS_string text;
1796 while (*uni != '"') read_chars(text, uni);
1797
1798 if (native == 1)
1799 {
1800 NativeFunction * nfun = NativeFunction::fix(text, symbol.get_name());
1801 if (nfun) // fix succeeded
1802 {
1803 nfun->set_creation_time(creation_time);
1804 if (d == 0) symbol.pop();
1805 symbol.push_function(nfun);
1806 }
1807 else // fix failed
1808 {
1809 CERR << " *** loading of native function " << text
1810 << " failed" << endl << endl;
1811 if (d == 0) symbol.pop();
1812 symbol.push();
1813 }
1814 }
1815 else
1816 {
1817 int err = 0;
1818 UCS_string creator_UCS(filename);
1819 creator_UCS.append(UNI_ASCII_COLON);
1820 creator_UCS.append_number(line_no);
1821 UTF8_string creator(creator_UCS);
1822
1823 UserFunction * ufun = 0;
1824 if (text[0] == UNI_LAMBDA)
1825 {
1826 ufun = UserFunction::fix_lambda(symbol, text);
1827 ufun->increment_refcount(LOC); // since we push it below
1828 }
1829 else
1830 {
1831 ufun = UserFunction::fix(text, err, false, LOC, creator, false);
1832 }
1833
1834 if (d == 0) symbol.pop();
1835 if (ufun)
1836 {
1837 ufun->set_creation_time(creation_time);
1838 symbol.push_function(ufun);
1839 ufun->set_exec_properties(eprops);
1840 }
1841 else
1842 {
1843 CERR << " ⎕FX " << symbol.get_name() << " failed: "
1844 << Workspace::more_error() << endl;
1845 symbol.push();
1846 }
1847 }
1848 }
1849 //-----------------------------------------------------------------------------
1850 void
read_Shared_Variable(int d,Symbol & symbol)1851 XML_Loading_Archive::read_Shared_Variable(int d, Symbol & symbol)
1852 {
1853 // const SV_key key = find_int_attr("key", false, 10);
1854 if (d != 0) symbol.push();
1855
1856 CERR << "WARNING: workspace was )SAVEd with a shared variable "
1857 << symbol.get_name() << endl
1858 << " (shared variables are not restored by )LOAD or )COPY)" << endl;
1859
1860 // symbol.share_var(key);
1861 }
1862 //-----------------------------------------------------------------------------
1863 void
read_SymbolTable()1864 XML_Loading_Archive::read_SymbolTable()
1865 {
1866 const int size = find_int_attr("size", false, 10);
1867
1868 Log(LOG_archive) CERR << " read_SymbolTable()" << endl;
1869
1870 loop(s, size)
1871 {
1872 next_tag(LOC);
1873 read_Symbol();
1874 }
1875
1876 next_tag(LOC);
1877 expect_tag("/SymbolTable", LOC);
1878 }
1879 //-----------------------------------------------------------------------------
1880 void
read_Symbol()1881 XML_Loading_Archive::read_Symbol()
1882 {
1883 expect_tag("Symbol", LOC);
1884
1885 const UTF8 * name = find_attr("name", false);
1886 const UTF8 * name_end = name;
1887 while (*name_end != '"') ++name_end;
1888
1889 UTF8_string name_UTF(name, name_end - name);
1890 UCS_string name_UCS(name_UTF);
1891 if (name_UCS.size() == 0)
1892 {
1893 CERR << "*** Warning: empty Symbol name in XML archive " << filename
1894 << " around line " << line_no << endl;
1895 skip_to_tag("/Symbol");
1896 return;
1897 }
1898
1899 Log(LOG_archive) CERR << " read_Symbol() name=" << name_UCS << endl;
1900
1901 // ⎕NLT and ⎕PT were removed, but could lurk around in old workspaces.
1902 // ⎕PW and ⎕TZ are session variables that must not )LOADed (but might be
1903 // )COPYd)
1904 //
1905 if (name_UTF == UTF8_string("⎕NLT") || name_UTF == UTF8_string("⎕PT"))
1906 {
1907 Log(LOG_archive) CERR << " skipped at " << LOC << endl;
1908 skip_to_tag("/Symbol");
1909 return;
1910 }
1911
1912 const int depth = find_int_attr("stack-size", false, 10);
1913
1914 // lookup symbol, trying ⎕xx first
1915 //
1916 Symbol * symbol;
1917 if (name_UCS == ID::get_name_UCS(ID_LAMBDA))
1918 symbol = &Workspace::get_v_LAMBDA();
1919 else if (name_UCS == ID::get_name_UCS(ID_ALPHA))
1920 symbol = &Workspace::get_v_ALPHA();
1921 else if (name_UCS == ID::get_name_UCS(ID_ALPHA_U))
1922 symbol = &Workspace::get_v_ALPHA_U();
1923 else if (name_UCS == ID::get_name_UCS(ID_CHI))
1924 symbol = &Workspace::get_v_CHI();
1925 else if (name_UCS == ID::get_name_UCS(ID_OMEGA))
1926 symbol = &Workspace::get_v_OMEGA();
1927 else if (name_UCS == ID::get_name_UCS(ID_OMEGA_U))
1928 symbol = &Workspace::get_v_OMEGA_U();
1929 else
1930 symbol = Workspace::lookup_existing_symbol(name_UCS);
1931
1932 // we do NOT copy if:
1933 //
1934 // 1. )PCOPY and the symbol exists, or
1935 // 2. there is an object list and this symbol is not contained in the list
1936 //
1937 const bool is_protected = symbol && protection;
1938 const bool is_selected = allowed_objects.contains(name_UCS);
1939 bool no_copy = is_protected || (have_allowed_objects && !is_selected);
1940
1941 if (reading_vids)
1942 {
1943 // we prepare vids for )COPY or )PCOPY, so we do not create a symbol
1944 // and care only for the top level
1945 //
1946 if (no_copy || (depth == 0))
1947 {
1948 Log(LOG_archive) CERR << " skipped at " << LOC << endl;
1949 skip_to_tag("/Symbol");
1950 return;
1951 }
1952
1953 // we have entries and copying is allowed
1954 //
1955 next_tag(LOC);
1956 if (is_tag("Variable"))
1957 {
1958 const int vid = find_int_attr("vid", false, 10);
1959 vids_COPY.push_back(vid);
1960 }
1961 skip_to_tag("/Symbol");
1962 return;
1963 }
1964
1965 // in a )COPY without dedicated objects only
1966 // ⎕CT, ⎕FC, ⎕IO, ⎕LX, ⎕PP, ⎕PR, and ⎕RL shall be copied
1967 //
1968 if (!have_allowed_objects && // no dedicated object list
1969 copying && // )COPY
1970 (name_UCS == ID::get_name_UCS(ID_Quad_CT) ||
1971 name_UCS == ID::get_name_UCS(ID_Quad_FC) ||
1972 name_UCS == ID::get_name_UCS(ID_Quad_IO) ||
1973 name_UCS == ID::get_name_UCS(ID_Quad_LX) ||
1974 name_UCS == ID::get_name_UCS(ID_Quad_PP) ||
1975 name_UCS == ID::get_name_UCS(ID_Quad_PR) ||
1976 name_UCS == ID::get_name_UCS(ID_Quad_RL)
1977 ))
1978 {
1979 Log(LOG_archive) CERR << name_UCS << " not copied at " << LOC << endl;
1980 no_copy = true;
1981 }
1982
1983 // in a )LOAD silently ignore session variables (⎕PW and ⎕TZ)
1984 //
1985 if (!copying && // )LOAD
1986 (name_UCS == ID::get_name_UCS(ID_Quad_PW) ||
1987 name_UCS == ID::get_name_UCS(ID_Quad_TZ)))
1988 {
1989 skip_to_tag("/Symbol");
1990 return;
1991 }
1992
1993 if (copying)
1994 {
1995 if (no_copy || (depth == 0))
1996 {
1997 skip_to_tag("/Symbol");
1998 return;
1999 }
2000 }
2001
2002 // remove this symbol from allowed_objects so that we can print NOT COPIED
2003 // at the end for all objects that are still in the list.
2004 //
2005 loop(a, allowed_objects.size())
2006 {
2007 if (allowed_objects[a] == name_UCS)
2008 {
2009 allowed_objects[a] = allowed_objects.back();
2010 allowed_objects.pop_back();
2011 break;
2012 }
2013 }
2014
2015 if (symbol == 0)
2016 {
2017 symbol = Workspace::lookup_symbol(name_UCS);
2018 }
2019 Assert(symbol);
2020
2021 loop(d, depth)
2022 {
2023 // for )COPY skip d > 0
2024 //
2025 if (copying && (d > 0))
2026 {
2027 skip_to_tag("/Symbol");
2028 return;
2029 }
2030
2031 next_tag(LOC);
2032 if (is_tag("unused-name")) read_unused_name(d, *symbol);
2033 else if (is_tag("Variable")) read_Variable(d, *symbol);
2034 else if (is_tag("Function")) read_Function(d, *symbol);
2035 else if (is_tag("Label")) read_Label(d, *symbol);
2036 else if (is_tag("Shared-Variable")) read_Shared_Variable(d, *symbol);
2037 }
2038
2039 Assert(symbol->value_stack_size() == depth);
2040 next_tag(LOC);
2041 expect_tag("/Symbol", LOC);
2042 }
2043 //-----------------------------------------------------------------------------
2044 void
read_Commands()2045 XML_Loading_Archive::read_Commands()
2046 {
2047 const int size = find_int_attr("size", false, 10);
2048
2049 Log(LOG_archive) CERR << " read_Commands()" << endl;
2050
2051 loop(s, size)
2052 {
2053 next_tag(LOC);
2054 read_Command();
2055 }
2056
2057 next_tag(LOC);
2058 expect_tag("/Commands", LOC);
2059 }
2060 //-----------------------------------------------------------------------------
2061 void
read_Command()2062 XML_Loading_Archive::read_Command()
2063 {
2064 expect_tag("Command", LOC);
2065
2066 const UTF8 * name = find_attr("name", false);
2067 const UTF8 * name_end = name;
2068 while (*name_end != '"') ++name_end;
2069 UTF8_string name_UTF(name, name_end - name);
2070 UCS_string name_UCS(name_UTF);
2071
2072 const UTF8 * fun = find_attr("fun", false);
2073 const UTF8 * fun_end = fun;
2074 while (*fun_end != '"') ++fun_end;
2075 UTF8_string fun_UTF(fun, fun_end - fun);
2076 UCS_string fun_UCS(fun_UTF);
2077
2078 const int mode = find_int_attr("mode", false, 10);
2079
2080 Command::user_command ucmd = { name_UCS, fun_UCS, mode };
2081 Workspace::get_user_commands().push_back(ucmd);
2082 }
2083 //-----------------------------------------------------------------------------
2084 void
read_StateIndicator()2085 XML_Loading_Archive::read_StateIndicator()
2086 {
2087 if (copying)
2088 {
2089 skip_to_tag("/StateIndicator");
2090 return;
2091 }
2092
2093 Log(LOG_archive) CERR << "read_StateIndicator()" << endl;
2094
2095 const int levels = find_int_attr("levels", false, 10);
2096
2097 loop(l, levels)
2098 {
2099 next_tag(LOC);
2100 expect_tag("SI-entry", LOC);
2101
2102 try
2103 {
2104 read_SI_entry(l);
2105 }
2106 catch (...)
2107 {
2108 CERR <<
2109 "\n"
2110 "*** SORRY! An error occured while reading the )SI stack of the )SAVEd\n"
2111 " workspace. The )SI stack was reconstructed to the extent possible.\n"
2112 " We stronly recommend to perform )SIC and then )DUMP the workspace under\n"
2113 " a different name.\n" << endl;
2114
2115 // skip rest of <StateIndicator>
2116 //
2117 skip_to_tag("/StateIndicator");
2118 return;
2119 }
2120
2121 // the parsers loop eats the terminating /SI-entry
2122 }
2123
2124 next_tag(LOC);
2125 expect_tag("/StateIndicator", LOC);
2126 }
2127 //-----------------------------------------------------------------------------
2128 void
read_SI_entry(int lev)2129 XML_Loading_Archive::read_SI_entry(int lev)
2130 {
2131 const int level = find_int_attr("level", false, 10);
2132 const int pc = find_int_attr("pc", false, 10);
2133
2134 Log(LOG_archive) CERR << " read_SI_entry() level=" << level << endl;
2135
2136 Executable * exec = 0;
2137 next_tag(LOC);
2138 if (is_tag("Execute")) exec = read_Execute();
2139 else if (is_tag("Statements")) exec = read_Statement();
2140 else if (is_tag("UserFunction")) exec = read_UserFunction();
2141 else Assert(0 && "Bad tag at " LOC);
2142
2143 Assert(lev == level);
2144 Assert(exec);
2145
2146 Workspace::push_SI(exec, LOC);
2147 StateIndicator * si = Workspace::SI_top();
2148 Assert(si);
2149 si->set_PC(Function_PC(pc));
2150 read_Parser(*si, lev);
2151
2152 for (;;)
2153 {
2154 // skip old EOC tags
2155 //
2156 next_tag(LOC);
2157 if (is_tag("/SI-entry")) break;
2158 }
2159 }
2160 //-----------------------------------------------------------------------------
2161 Executable *
read_Execute()2162 XML_Loading_Archive::read_Execute()
2163 {
2164 next_tag(LOC);
2165 expect_tag("UCS", LOC);
2166
2167 const UTF8 * uni = find_attr("uni", false);
2168 UCS_string text;
2169 while (*uni != '"') read_chars(text, uni);
2170 next_tag(LOC);
2171 expect_tag("/Execute", LOC);
2172
2173 ExecuteList * exec = ExecuteList::fix(text, LOC);
2174 Assert(exec);
2175 return exec;
2176 }
2177 //-----------------------------------------------------------------------------
2178 Executable *
read_Statement()2179 XML_Loading_Archive::read_Statement()
2180 {
2181 next_tag(LOC);
2182 expect_tag("UCS", LOC);
2183 const UTF8 * uni = find_attr("uni", false);
2184 UCS_string text;
2185 while (*uni != '"') read_chars(text, uni);
2186
2187 next_tag(LOC);
2188 expect_tag("/Statements", LOC);
2189
2190 StatementList * exec = StatementList::fix(text, LOC);
2191 Assert(exec);
2192 return exec;
2193 }
2194 //-----------------------------------------------------------------------------
2195 Executable *
read_UserFunction()2196 XML_Loading_Archive::read_UserFunction()
2197 {
2198 const int macro_num = find_int_attr("macro-num", true, 10);
2199 if (macro_num != -1)
2200 return Macro::get_macro(Macro::Macro_num(macro_num));
2201
2202 const UTF8 * lambda_name = find_attr("lambda-name", true);
2203 if (lambda_name) return read_lambda(lambda_name);
2204
2205 const int level = find_int_attr("symbol-level", false, 10);
2206 const UTF8 * name = find_attr("ufun-name", false);
2207 const UTF8 * n = name;
2208 while (*n != '"') ++n;
2209 UTF8_string name_UTF(name, n - name);
2210 UCS_string name_UCS(name_UTF);
2211
2212 Symbol * symbol = Workspace::lookup_symbol(name_UCS);
2213 Assert(symbol);
2214 Assert(level >= 0);
2215 Assert(level < symbol->value_stack_size());
2216 ValueStackItem & vsi = (*symbol)[level];
2217 Assert(vsi.name_class == NC_FUNCTION);
2218 Function * fun = vsi.sym_val.function;
2219 Assert(fun);
2220 UserFunction * ufun = fun->get_ufun1();
2221 Assert(fun == ufun);
2222
2223 return ufun;
2224 }
2225 //-----------------------------------------------------------------------------
2226 Executable *
read_lambda(const UTF8 * lambda_name)2227 XML_Loading_Archive::read_lambda(const UTF8 * lambda_name)
2228 {
2229 UCS_string lambda = read_UCS();
2230
2231 Symbol dummy(ID_No_ID);
2232 UserFunction * ufun = UserFunction::fix_lambda(dummy, lambda);
2233 Assert(ufun);
2234 ufun->increment_refcount(LOC); // since we push it below
2235
2236 next_tag(LOC);
2237 expect_tag("/UserFunction", LOC);
2238 return ufun;
2239 }
2240 //-----------------------------------------------------------------------------
2241 UCS_string
read_UCS()2242 XML_Loading_Archive::read_UCS()
2243 {
2244 skip_to_tag("UCS");
2245 const UTF8 * uni = find_attr("uni", false);
2246 UCS_string text;
2247 while (*uni != '"') read_chars(text, uni);
2248 return text;
2249 }
2250 //-----------------------------------------------------------------------------
2251 void
read_Parser(StateIndicator & si,int lev)2252 XML_Loading_Archive::read_Parser(StateIndicator & si, int lev)
2253 {
2254 next_tag(LOC);
2255 expect_tag("Parser", LOC);
2256
2257 Log(LOG_archive) CERR << " read_Parser() level=" << lev << endl;
2258
2259 const int stack_size = find_int_attr("size", false, 10);
2260 const int ass_state = find_int_attr("assign-pending", false, 10);
2261 const int lah_high = find_int_attr("lookahead-high", false, 10);
2262 const int action = find_int_attr("action", false, 10);
2263
2264 Prefix & parser = si.current_stack;
2265
2266 parser.set_assign_state(Assign_state(ass_state));
2267 parser.action = R_action(action);
2268 parser.lookahead_high = Function_PC(lah_high);
2269
2270 for (;;)
2271 {
2272 Token_loc tl;
2273 const bool success = read_Token(tl);
2274 if (!success) break;
2275
2276 if (parser.size() < stack_size) parser.push(tl);
2277 else parser.saved_lookahead.copy(tl, LOC);
2278 }
2279
2280 Log(LOG_archive)
2281 {
2282 CERR << " ";
2283 parser.print_stack(CERR, LOC);
2284 }
2285
2286 expect_tag("/Parser", LOC);
2287 }
2288 //-----------------------------------------------------------------------------
2289 bool
read_Token(Token_loc & tloc)2290 XML_Loading_Archive::read_Token(Token_loc & tloc)
2291 {
2292 next_tag(LOC);
2293 if (is_tag("/Parser")) return false;
2294 expect_tag("Token", LOC);
2295
2296 tloc.pc = Function_PC(find_int_attr("pc", false, 10));
2297
2298 const TokenTag tag = TokenTag(find_int_attr("tag", false, 16));
2299
2300 switch(tag & TV_MASK) // cannot call get_ValueType() yet
2301 {
2302 case TV_NONE:
2303 new (&tloc.tok) Token(tag);
2304 break;
2305
2306 case TV_CHAR:
2307 {
2308 const Unicode uni = Unicode(find_int_attr("char", false, 10));
2309 new (&tloc.tok) Token(tag, uni);
2310 }
2311 break;
2312
2313 case TV_INT:
2314 {
2315 const int64_t ival = find_int_attr("int", false, 10);
2316 new (&tloc.tok) Token(tag, ival);
2317 }
2318 break;
2319
2320 case TV_FLT:
2321 {
2322 const APL_Float val = find_float_attr("float");
2323 new (&tloc.tok) Token(tag, val);
2324 }
2325 break;
2326
2327 case TV_CPX:
2328 {
2329 const APL_Float real = find_float_attr("real");
2330 const APL_Float imag = find_float_attr("imag");
2331 new (&tloc.tok) Token(tag, real, imag);
2332 }
2333 break;
2334
2335 case TV_SYM:
2336 {
2337 const UTF8 * sym_name = find_attr("sym", false);
2338 const UTF8 * end = sym_name;
2339 while (*end != '"') ++end;
2340 UTF8_string name_UTF(sym_name, end - sym_name);
2341 UCS_string name_UCS(name_UTF);
2342
2343 Symbol * symbol = Avec::is_quad(name_UCS[0])
2344 ? Workspace::lookup_existing_symbol(name_UCS)
2345 : Workspace::lookup_symbol(name_UCS);
2346 new (&tloc.tok) Token(tag, symbol);
2347 }
2348 break;
2349
2350 case TV_LIN:
2351 {
2352 const int ival = find_int_attr("line", false, 10);
2353 new (&tloc.tok) Token(tag, Function_Line(ival));
2354 }
2355 break;
2356
2357 case TV_VAL:
2358 {
2359 const int vid = find_int_attr("vid", false, 10);
2360 Assert(vid < int(values.size()));
2361 new (&tloc.tok) Token(tag, values[vid]);
2362 }
2363 break;
2364
2365 case TV_INDEX:
2366 {
2367 const UTF8 * vids = find_attr("index", false);
2368 IndexExpr & idx = *new IndexExpr(ASS_none, LOC);
2369 while (*vids != '"')
2370 {
2371 if (*vids == ',') ++vids;
2372 if (*vids == '-') // elided index
2373 {
2374 idx.add(Value_P());
2375 }
2376 else // value
2377 {
2378 char * end = 0;
2379 Assert1(*vids == 'v'); ++vids;
2380 Assert1(*vids == 'i'); ++vids;
2381 Assert1(*vids == 'd'); ++vids;
2382 Assert1(*vids == '_'); ++vids;
2383 const int vid = strtoll(charP(vids), &end, 10);
2384 Assert(vid < int(values.size()));
2385 idx.add(values[vid]);
2386 vids = utf8P(end);
2387 }
2388 }
2389 new (&tloc.tok) Token(tag, idx);
2390 }
2391 break;
2392
2393 case TV_FUN:
2394 {
2395 Function * fun = read_Function_name("ufun-name",
2396 "symbol-level",
2397 "fun-id");
2398 Assert(fun);
2399 new (&tloc.tok) Token(tag, fun);
2400 }
2401 break;
2402
2403 default: FIXME;
2404 }
2405
2406 return true;
2407 }
2408 //-----------------------------------------------------------------------------
2409 Function *
read_Function_name(const char * name_attr,const char * level_attr,const char * id_attr)2410 XML_Loading_Archive::read_Function_name(const char * name_attr,
2411 const char * level_attr,
2412 const char * id_attr)
2413 {
2414 const UTF8 * fun_name = find_attr(name_attr, true);
2415
2416 if (fun_name) // user defined function
2417 {
2418 const int level = find_int_attr(level_attr, false, 10);
2419 const UTF8 * end = fun_name;
2420 while (*end != '"') ++end;
2421 UTF8_string name_UTF(fun_name, end - fun_name);
2422 UCS_string name_UCS(name_UTF);
2423 if (name_UCS == ID::get_name_UCS(ID_LAMBDA))
2424 {
2425 Assert(level == -1);
2426 return find_lambda(name_UCS);
2427 }
2428
2429 const Symbol & symbol = *Workspace::lookup_symbol(name_UCS);
2430
2431 Assert(level >= 0);
2432 Assert(level < symbol.value_stack_size());
2433 const ValueStackItem & vsi = symbol[level];
2434 Assert(vsi.name_class == NC_FUNCTION);
2435 Function * fun = vsi.sym_val.function;
2436 Assert(fun);
2437 return fun;
2438 }
2439
2440 const int fun_id = find_int_attr(id_attr, true, 16);
2441 if (fun_id != -1)
2442 {
2443 Function * sysfun = ID::get_system_function(Id(fun_id));
2444 Assert(sysfun);
2445 return sysfun;
2446 }
2447
2448 // not found. This can happen when the function is optional.
2449 //
2450 return 0;
2451 }
2452 //-----------------------------------------------------------------------------
2453 Function *
find_lambda(const UCS_string & lambda)2454 XML_Loading_Archive::find_lambda(const UCS_string & lambda)
2455 {
2456 const StateIndicator & si = *Workspace::SI_top();
2457 const Executable & exec = *si.get_executable();
2458 const Token_string & body = exec.get_body();
2459
2460 loop(b, body.size())
2461 {
2462 const Token & tok = body[b];
2463 if (tok.get_ValueType() == TV_SYM)
2464 {
2465 const Symbol * sym = tok.get_sym_ptr();
2466 Assert(sym);
2467 loop(v, sym->value_stack_size())
2468 {
2469 const ValueStackItem & vs = (*sym)[v];
2470 if (vs.name_class == NC_FUNCTION ||
2471 vs.name_class == NC_OPERATOR)
2472 {
2473 if (vs.sym_val.function->get_name() == lambda)
2474 {
2475 return vs.sym_val.function;
2476 }
2477 }
2478 }
2479 continue; // not found
2480 }
2481 else if (tok.get_ValueType() != TV_FUN) continue;
2482
2483 Function * fun = tok.get_function();
2484 Assert1(fun);
2485 const UserFunction * ufun = fun->get_ufun1();
2486 if (!ufun) continue; // not a user defined function
2487
2488 const UCS_string & fname = ufun->get_name();
2489 if (fname == lambda) return fun;
2490 }
2491
2492 CERR << "find_lambda() failed for " << lambda
2493 << " at )SI level=" << si.get_level() << endl;
2494 return 0;
2495 }
2496 //=============================================================================
2497
2498