1 /*
2     This file is part of GNU APL, a free implementation of the
3     ISO/IEC Standard 13751, "Programming Language APL, Extended"
4 
5     Copyright (C) 2008-2017  Dr. Jürgen Sauermann
6 
7     This program is free software: you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation, either version 3 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include <stdio.h>
22 #include <string.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <sys/stat.h>
26 #include <sys/mman.h>
27 #include <errno.h>
28 
29 #include <vector>
30 
31 #include "Bif_F12_TAKE_DROP.hh"
32 #include "Error.hh"
33 #include "Output.hh"
34 #include "Parser.hh"
35 #include "StateIndicator.hh"
36 #include "Symbol.hh"
37 #include "UserFunction.hh"
38 #include "UserPreferences.hh"
39 #include "Value.hh"
40 #include "Workspace.hh"
41 
42 //-----------------------------------------------------------------------------
UserFunction(const UCS_string txt,const char * loc,const UTF8_string & _creator,bool tolerant,bool macro)43 UserFunction::UserFunction(const UCS_string txt, const char * loc,
44                            const UTF8_string & _creator, bool tolerant,
45                            bool macro)
46   : Function(ID_USER_SYMBOL, TOK_FUN2),
47     Executable(txt, true, PM_FUNCTION, loc),
48     header(txt, macro),
49     creator(_creator),
50     error_line(0),   // assume header is wrong
51     error_info("Unspecified")
52 {
53    if (header.get_error())
54       {
55         error_info = header.get_error_info();
56         return;
57       }
58 
59    set_creation_time(now());
60 
61    exec_properties[0] = 0;
62    exec_properties[1] = 0;
63    exec_properties[2] = 0;
64    exec_properties[3] = 0;
65 
66    line_starts.push_back(Function_PC_0);   // will be set later.
67 
68    if (header.get_error() != E_NO_ERROR)   // bad header
69       {
70         error_info = header.get_error_info();
71         return;
72       }
73 
74    // set Function::tag
75    //
76    if      (header.RO())   tag = TOK_OPER2;
77    else if (header.LO())   tag = TOK_OPER1;
78    else if (header.A())    tag = TOK_FUN2;
79    else if (header.B())    tag = TOK_FUN1;
80    else                    tag = TOK_FUN0;
81 
82    parse_body(loc, tolerant, macro);
83    if (error_line > 0)
84       {
85         error_info = "Error in function body";
86         return;
87       }
88 
89    error_line = -1;   // no error
90    error_info = 0;
91 }
92 //-----------------------------------------------------------------------------
UserFunction(Fun_signature sig,int lambda_num,const UCS_string & text,Token_string & lambda_body)93 UserFunction::UserFunction(Fun_signature sig, int lambda_num,
94                            const UCS_string & text, Token_string & lambda_body)
95   : Function(ID_USER_SYMBOL, TOK_FUN0),
96     Executable(sig, lambda_num, text, LOC),
97     header(sig, lambda_num),
98     creator(UNI_LAMBDA),
99     error_line(0),
100     error_info("Unspecified")
101 {
102    set_creation_time(now());
103 
104    exec_properties[0] = 0;
105    exec_properties[1] = 0;
106    exec_properties[2] = 0;
107    exec_properties[3] = 0;
108 
109    if (header.get_error() != E_NO_ERROR)   // bad header
110       {
111         error_info = header.get_error_info();
112         return;
113       }
114 
115    if      (header.RO())   tag = TOK_OPER2;
116    else if (header.LO())   tag = TOK_OPER1;
117    else if (header.A())    tag = TOK_FUN2;
118    else if (header.B())    tag = TOK_FUN1;
119    else                    tag = TOK_FUN0;
120 
121    while (lambda_body.size() > 2 &&
122           lambda_body.back().get_tag() == TOK_SYMBOL &&
123           lambda_body[lambda_body.size() - 2].get_tag() == TOK_SEMICOL)
124       {
125         header.add_local_var(lambda_body.back().get_sym_ptr());
126         lambda_body.pop_back();   // varname
127         lambda_body.pop_back();   // semicolon
128       }
129 
130    // order of local vars is reversed. Fix that.
131    //
132    header.reverse_local_vars();
133 
134    parse_body_line(Function_Line_0, lambda_body, false, false, LOC);
135    setup_lambdas();
136    line_starts.push_back(Function_PC(lambda_body.size() - 1));
137    line_starts.push_back(Function_PC_0);
138    error_line = -1;   // no error
139    error_info = 0;
140 }
141 //-----------------------------------------------------------------------------
~UserFunction()142 UserFunction::~UserFunction()
143 {
144    Log(LOG_UserFunction__enter_leave)
145       CERR << "Function " << get_name() << " deleted." << endl;
146 }
147 //-----------------------------------------------------------------------------
148 Token
eval_()149 UserFunction::eval_()
150 {
151    Log(LOG_UserFunction__enter_leave)
152       CERR << "Function " << get_name() << " calls eval_()" << endl;
153 
154    if (header.B())   SYNTAX_ERROR;   // not defined niladic
155 
156    Workspace::push_SI(this, LOC);
157    if (header.Z())   header.Z()->push();
158 
159    header.eval_common();
160 
161    return Token(TOK_SI_PUSHED);
162 }
163 //-----------------------------------------------------------------------------
164 Token
eval_B(Value_P B)165 UserFunction::eval_B(Value_P B)
166 {
167    Log(LOG_UserFunction__enter_leave)
168       {
169         CERR << "Function " << get_name() << " calls eval_B("
170              << Token(TOK_APL_VALUE1, B) << ")" << endl;
171       }
172 
173    if (header.LO())    SYNTAX_ERROR;   // defined as operator
174 
175    Workspace::push_SI(this, LOC);
176 
177    if (header.Z())   header.Z()->push();
178    if (header.A())   header.A()->push();
179    if (header.X())   header.X()->push();
180    if (header.B())   header.B()->push_value(B);
181 
182    header.eval_common();
183 
184    return Token(TOK_SI_PUSHED);
185 }
186 //-----------------------------------------------------------------------------
187 Token
eval_XB(Value_P X,Value_P B)188 UserFunction::eval_XB(Value_P X, Value_P B)
189 {
190    Log(LOG_UserFunction__enter_leave)
191       {
192         CERR << "Function " << get_name() << " calls eval_B("
193              << Token(TOK_APL_VALUE1, B) << ")" << endl;
194       }
195 
196    if (!header.X())    AXIS_ERROR;
197    if (header.LO())    SYNTAX_ERROR;   // defined as operator
198 
199    Workspace::push_SI(this, LOC);
200 
201    if (header.Z())   header.Z()->push();
202    if (header.A())   header.A()->push();
203    if (header.X())   header.X()->push_value(X);
204    if (header.B())   header.B()->push_value(B);
205 
206    header.eval_common();
207 
208    return Token(TOK_SI_PUSHED);
209 }
210 //-----------------------------------------------------------------------------
211 Token
eval_AB(Value_P A,Value_P B)212 UserFunction::eval_AB(Value_P A, Value_P B)
213 {
214    Log(LOG_UserFunction__enter_leave)
215       {
216         CERR << "Function " << get_name() << " calls eval_AB("
217              << Token(TOK_APL_VALUE1, A) << ", "
218              << Token(TOK_APL_VALUE1, B) << ")" << endl;
219       }
220 
221    if (header.LO())    SYNTAX_ERROR;    // defined as operator
222    if (!header.A())    VALENCE_ERROR;   // monadic
223 
224    Workspace::push_SI(this, LOC);
225 
226    if (header.Z())   header.Z()->push();
227    if (header.A())   header.A()->push_value(A);
228    if (header.X())   header.X()->push();
229    if (header.B())   header.B()->push_value(B);
230 
231    header.eval_common();
232 
233    return Token(TOK_SI_PUSHED);
234 }
235 //-----------------------------------------------------------------------------
236 Token
eval_AXB(Value_P A,Value_P X,Value_P B)237 UserFunction::eval_AXB(Value_P A, Value_P X, Value_P B)
238 {
239    Log(LOG_UserFunction__enter_leave)
240       {
241         CERR << "Function " << get_name() << " calls eval_AB("
242              << Token(TOK_APL_VALUE1, A) << ", "
243              << Token(TOK_APL_VALUE1, B) << ")" << endl;
244       }
245 
246    if (header.LO())    SYNTAX_ERROR;    // defined as operator
247    if (!header.A())    VALENCE_ERROR;   // monadic
248    if (!header.X())    AXIS_ERROR;
249 
250    Workspace::push_SI(this, LOC);
251 
252    if (header.Z())   header.Z()->push();
253    if (header.A())   header.A()->push_value(A);
254    if (header.X())   header.X()->push_value(X);
255    if (header.B())   header.B()->push_value(B);
256 
257    header.eval_common();
258 
259    return Token(TOK_SI_PUSHED);
260 }
261 //-----------------------------------------------------------------------------
262 Token
eval_LB(Token & LO,Value_P B)263 UserFunction::eval_LB(Token & LO, Value_P B)
264 {
265    Log(LOG_UserFunction__enter_leave)
266       {
267         CERR << "Function " << get_name() << " calls " << __FUNCTION__ << "(";
268         print_val_or_fun(CERR, LO) << ", "
269              << Token(TOK_APL_VALUE1, B) << ")" << endl;
270       }
271 
272    if (header.RO())    SYNTAX_ERROR;   // dyadic operator called monadically
273 
274    Workspace::push_SI(this, LOC);
275 
276    if (header.Z())         header.Z() ->push();
277    if (header.A())         header.A() ->push();
278    if (LO.is_function())   header.LO()->push_function(LO.get_function());
279    else                    header.LO()->push_value(LO.get_apl_val());
280    if (header.X())         header.X() ->push();
281    header                        .B() ->push_value(B);
282 
283    header.eval_common();
284 
285    return Token(TOK_SI_PUSHED);
286 }
287 //-----------------------------------------------------------------------------
288 Token
eval_LXB(Token & LO,Value_P X,Value_P B)289 UserFunction::eval_LXB(Token & LO, Value_P X, Value_P B)
290 {
291    Log(LOG_UserFunction__enter_leave)
292       {
293         CERR << "Function " << get_name() << " calls " << __FUNCTION__ << "(";
294         print_val_or_fun(CERR, LO) << ", "
295              << Token(TOK_APL_VALUE1, X) << ", "
296              << Token(TOK_APL_VALUE1, B) << ")" << endl;
297       }
298 
299    if (header.RO())    SYNTAX_ERROR;   // dyadic operator called monadically
300    if (!header.X())    AXIS_ERROR;
301 
302    Workspace::push_SI(this, LOC);
303 
304    if (header.Z())         header.Z()->push();
305    if (header.A())         header.A()->push();
306    if (LO.is_function())   header.LO()->push_function(LO.get_function());
307    else                    header.LO()->push_value(LO.get_apl_val());
308    if (header.X())         header.X()->push_value(X);
309    header                        .B()->push_value(B);
310 
311    header.eval_common();
312 
313    return Token(TOK_SI_PUSHED);
314 }
315 //-----------------------------------------------------------------------------
316 Token
eval_ALB(Value_P A,Token & LO,Value_P B)317 UserFunction::eval_ALB(Value_P A, Token & LO, Value_P B)
318 {
319    Log(LOG_UserFunction__enter_leave)
320       {
321         CERR << "Function " << get_name() << " calls " << __FUNCTION__ << "("
322              << Token(TOK_APL_VALUE1, A) << ", " << endl;
323         print_val_or_fun(CERR, LO) << ", "
324              << Token(TOK_APL_VALUE1, B) << ")" << endl;
325       }
326 
327    if (header.RO())    SYNTAX_ERROR;    // defined as dyadic operator
328    if (!header.A())    VALENCE_ERROR;   // monadic
329 
330    Workspace::push_SI(this, LOC);
331 
332    if (header.X())         header.X()->push();
333 
334    if (header.Z())         header.Z()->push();
335    header                        .A()->push_value(A);
336    if (LO.is_function())   header.LO()->push_function(LO.get_function());
337    else                    header.LO()->push_value(LO.get_apl_val());
338    header                        .B()->push_value(B);
339 
340    header.eval_common();
341 
342    return Token(TOK_SI_PUSHED);
343 }
344 //-----------------------------------------------------------------------------
345 Token
eval_ALXB(Value_P A,Token & LO,Value_P X,Value_P B)346 UserFunction::eval_ALXB(Value_P A, Token & LO, Value_P X, Value_P B)
347 {
348    Log(LOG_UserFunction__enter_leave)
349       {
350         CERR << "Function " << get_name() << " calls " << __FUNCTION__ << "("
351              << Token(TOK_APL_VALUE1, A) << ", " << endl;
352         print_val_or_fun(CERR, LO) << ", "
353              << Token(TOK_APL_VALUE1, X) << ", "
354              << Token(TOK_APL_VALUE1, B) << ")" << endl;
355       }
356 
357    if (header.RO())    SYNTAX_ERROR;    // defined as dyadic operator
358    if (!header.A())    VALENCE_ERROR;   // monadic
359    if (!header.X())    AXIS_ERROR;
360 
361    Workspace::push_SI(this, LOC);
362 
363    if (header.Z())         header.Z()->push();
364    header                        .A()->push_value(A);
365    if (LO.is_function())   header.LO()->push_function(LO.get_function());
366    else                    header.LO()->push_value(LO.get_apl_val());
367    if (header.X())         header.X()->push_value(X);
368    header                        .B()->push_value(B);
369 
370    header.eval_common();
371 
372    return Token(TOK_SI_PUSHED);
373 }
374 //-----------------------------------------------------------------------------
375 Token
eval_LRB(Token & LO,Token & RO,Value_P B)376 UserFunction::eval_LRB(Token & LO, Token & RO, Value_P B)
377 {
378    Log(LOG_UserFunction__enter_leave)
379       {
380         CERR << "Function " << get_name() << " calls " << __FUNCTION__ << "(";
381         print_val_or_fun(CERR, LO) << ", ";
382         print_val_or_fun(CERR, RO) << ", "
383              << Token(TOK_APL_VALUE1, B) << ")" << endl;
384       }
385 
386    if (!header.RO())    SYNTAX_ERROR;   // not defined as dyadic operator
387 
388    Workspace::push_SI(this, LOC);
389 
390    if (header.Z())         header.Z()->push();
391    if (header.A())         header.A()->push();
392 
393    if (LO.is_function())   header.LO()->push_function(LO.get_function());
394    else                    header.LO()->push_value(LO.get_apl_val());
395    if (RO.is_function())   header.RO()->push_function(RO.get_function());
396    else                    header.RO()->push_value(RO.get_apl_val());
397    if (header.X())         header.X() ->push();
398    header                        .B() ->push_value(B);
399 
400    header.eval_common();
401 
402    return Token(TOK_SI_PUSHED);
403 }
404 //-----------------------------------------------------------------------------
405 Token
eval_LRXB(Token & LO,Token & RO,Value_P X,Value_P B)406 UserFunction::eval_LRXB(Token & LO, Token & RO, Value_P X, Value_P B)
407 {
408    Log(LOG_UserFunction__enter_leave)
409       {
410         CERR << "Function " << get_name() << " calls " << __FUNCTION__ << "(";
411         print_val_or_fun(CERR, LO) << ", ";
412         print_val_or_fun(CERR, RO) << ", "
413              << Token(TOK_APL_VALUE1, X) << ", "
414              << Token(TOK_APL_VALUE1, B) << ")" << endl;
415       }
416 
417    if (!header.RO())   SYNTAX_ERROR;   // not defined as dyadic operator
418    if (!header.X())    AXIS_ERROR;
419 
420    Workspace::push_SI(this, LOC);
421 
422    if (header.Z())         header.Z()->push();
423    if (header.A())         header.A()->push();
424    if (LO.is_function())   header.LO()->push_function(LO.get_function());
425    else                    header.LO()->push_value(LO.get_apl_val());
426    if (RO.is_function())   header.RO()->push_function(RO.get_function());
427    if (header.X())         header.X()->push_value(X);
428    else                    header.RO()->push_value(RO.get_apl_val());
429    header                        .B()->push_value(B);
430 
431    header.eval_common();
432 
433    return Token(TOK_SI_PUSHED);
434 }
435 //-----------------------------------------------------------------------------
436 Token
eval_ALRB(Value_P A,Token & LO,Token & RO,Value_P B)437 UserFunction::eval_ALRB(Value_P A, Token & LO, Token & RO, Value_P B)
438 {
439    Log(LOG_UserFunction__enter_leave)
440       {
441         CERR << "Function " << get_name() << " calls " << __FUNCTION__ << "("
442              << Token(TOK_APL_VALUE1, A) << ", " << endl;
443         print_val_or_fun(CERR, LO) << ", ";
444         print_val_or_fun(CERR, RO) << ", "
445              << Token(TOK_APL_VALUE1, B) << ")" << endl;
446       }
447 
448    if (!header.RO())    SYNTAX_ERROR;   // defined monadic op called dyadically
449    if (!header.A())    VALENCE_ERROR;   // monadic
450 
451    Workspace::push_SI(this, LOC);
452 
453    if (header.Z())         header.Z()->push();
454 
455    header                        .A()->push_value(A);
456    if (LO.is_function())   header.LO()->push_function(LO.get_function());
457    else                    header.LO()->push_value(LO.get_apl_val());
458    if (RO.is_function())   header.RO()->push_function(RO.get_function());
459    else                    header.RO()->push_value(RO.get_apl_val());
460    if (header.X())         header.X()->push();
461    header                        .B()->push_value(B);
462 
463    header.eval_common();
464 
465    return Token(TOK_SI_PUSHED);
466 }
467 //-----------------------------------------------------------------------------
468 Token
eval_ALRXB(Value_P A,Token & LO,Token & RO,Value_P X,Value_P B)469 UserFunction::eval_ALRXB(Value_P A, Token & LO, Token & RO,
470                          Value_P X, Value_P B)
471 {
472    Log(LOG_UserFunction__enter_leave)
473       {
474         CERR << "Function " << get_name() << " calls " << __FUNCTION__ << "("
475              << Token(TOK_APL_VALUE1, A) << ", " << endl;
476         print_val_or_fun(CERR, LO) << ", ";
477         print_val_or_fun(CERR, RO) << ", "
478              << Token(TOK_APL_VALUE1, X) << ", "
479              << Token(TOK_APL_VALUE1, B) << ")" << endl;
480       }
481 
482    if (!header.RO())   SYNTAX_ERROR;   // defined monadic op called dyadically
483    if (!header.A())    VALENCE_ERROR;   // monadic
484    if (!header.X())    AXIS_ERROR;
485 
486    Workspace::push_SI(this, LOC);
487 
488    if (header.Z())         header.Z()->push();
489    header                        .A()->push_value(A);
490    if (LO.is_function())   header.LO()->push_function(LO.get_function());
491    else                    header.LO()->push_value(LO.get_apl_val());
492    if (RO.is_function())   header.RO()->push_function(RO.get_function());
493    else                    header.RO()->push_value(RO.get_apl_val());
494    if (header.X())         header.X()->push_value(X);
495    header                        .B()->push_value(B);
496 
497    header.eval_common();
498 
499    return Token(TOK_SI_PUSHED);
500 }
501 //-----------------------------------------------------------------------------
502 Token
eval_fill_B(Value_P B)503 UserFunction::eval_fill_B(Value_P B)
504 {
505 Value_P Z = B->clone(LOC);
506    return Token(TOK_APL_VALUE1, Z);
507 }
508 //-----------------------------------------------------------------------------
509 Token
eval_fill_AB(Value_P A,Value_P B)510 UserFunction::eval_fill_AB(Value_P A, Value_P B)
511 {
512 Value_P Z = B->clone(LOC);
513    return Token(TOK_APL_VALUE1, Z);
514 }
515 //-----------------------------------------------------------------------------
516 void
set_locked_error_info(Error & error) const517 UserFunction::set_locked_error_info(Error & error) const
518 {
519 UCS_string message_2(UTF8_string(error.get_error_line_2()));
520 
521 #define SHORT 0
522    if (header.A())
523       {
524 #if SHORT
525         message_2.append(header.A()->get_name());
526         message_2.append(UNI_ASCII_SPACE);
527 #else
528         Value_P val_A = header.A()->get_value();
529         if (!!val_A)
530            {
531              PrintContext pctx(PR_BOXED_GRAPHIC);
532              PrintBuffer pb(*val_A, pctx, 0);
533              message_2.append(UCS_string(pb, 1, DEFAULT_Quad_PW));
534              message_2.append(UNI_ASCII_SPACE);
535            }
536 #endif
537       }
538 
539    message_2.append(header.get_name());
540 
541    if (header.B())
542       {
543 #if SHORT
544         message_2.append(UNI_ASCII_SPACE);
545         message_2.append(header.B()->get_name());
546 #else
547         Value_P val_B = header.B()->get_value();
548         if (!!val_B)
549            {
550              message_2.append(UNI_ASCII_SPACE);
551              PrintContext pctx(PR_APL_FUN);
552              PrintBuffer pb(*val_B, pctx, 0);
553              message_2.append(UCS_string(pb, 1, DEFAULT_Quad_PW));
554            }
555 #endif
556       }
557 
558    {
559      UTF8_string utf(message_2);
560      error.set_error_line_2(utf.c_str());
561    }
562 
563    error.set_right_caret(error.get_left_caret() + message_2.size() - 7);
564 }
565 //-----------------------------------------------------------------------------
566 void
set_trace_stop(std::vector<Function_Line> & lines,bool stop)567 UserFunction::set_trace_stop(std::vector<Function_Line> & lines, bool stop)
568 {
569    // Sort lines
570    //
571 std::vector<bool> ts_lines;
572 
573    loop(ts, line_starts.size())   ts_lines.push_back(false);
574    loop(ll, lines.size())
575       {
576         Function_Line line = lines[ll];
577         if (line >= 1 && line < int(line_starts.size()))
578            ts_lines[line] = true;
579       }
580 
581    if (stop)
582       {
583         stop_lines.clear();
584         loop(ts, line_starts.size())
585            {
586              if (ts_lines[ts])
587                 stop_lines.push_back(Function_Line(ts));
588            }
589       }
590    else
591       {
592         trace_lines.clear();
593         loop(ts, line_starts.size())
594            {
595              if (ts_lines[ts])
596                 trace_lines.push_back(Function_Line(ts));
597            }
598       }
599 
600    parse_body(LOC, false, false);
601 }
602 //-----------------------------------------------------------------------------
603 ErrorCode
transform_multi_line_strings()604 UserFunction::transform_multi_line_strings()
605 {
606   /* convert a set of lines like
607 
608      [k+1] PREFIX "L1
609      [k+2] L2 ...
610      [k+N] Ln" SUFFIX
611 
612     into:
613 
614     [k+1] PREFIX "L1" "L2" ... "LN" SUFFIX
615     [...] (empty)
616     [k+N] (empty)
617    */
618 
619 enum Line_status
620    {
621      Function_header = 0,
622      APL_text        = 1,
623      Start_of_string = 2,
624      Inside_string   = 3,
625      End_of_string   = 4
626    };
627 
628 char status[get_text_size()];   status[0] = Function_header;
629 Line_status current = APL_text;
630 
631    // determine line status of each line...
632    //
633    for (int l = 1; l < get_text_size(); ++l)
634        {
635          const int count = get_text(l).double_quote_count(false);
636          if (count & 1)   // start or end of string
637             {
638               if (current == APL_text)   // start of multi-line string
639                  {
640                     status[l] = Start_of_string;
641                     current = Inside_string;
642                  }
643               else                       // end of multi-line string
644                  {
645                     status[l] = End_of_string;
646                     current = APL_text;
647                  }
648             }
649          else              // no status change
650             {
651               status[l] = current;
652             }
653        }
654 
655    if (current == Start_of_string || current == Inside_string)
656       {
657          // multi-line string started but not ended.
658          //
659          return E_DEFN_ERROR;
660       }
661 
662    // modify lines...
663    //
664    for (int l = 1; l < get_text_size();)
665        {
666          if (status[l] == APL_text)   { ++l;   continue; }
667          const int start = l;
668          Assert1(status[l] == Start_of_string);
669          UCS_string accu = get_text(l++);
670          accu << "\" ";
671          while (status[l] == Inside_string)
672                {
673                  accu << " \"" << get_text(l).do_escape(true) << "\"";
674                  clear_text(l++);
675                }
676          Assert(status[l] == End_of_string);
677          accu << "\"" << get_text(l);
678          set_text(start, accu);
679          clear_text(l++);
680        }
681 
682    // remove trailing empty lines...
683    //
684    while (get_text_size() && get_text(get_text_size() - 1).size() == 0)
685          text.pop_back();
686 
687 // CERR << endl;
688 // loop(l, get_text_size())   CERR << "[" << l << "]  " << get_text(l) << endl;
689 
690    return E_NO_ERROR;
691 }
692 //-----------------------------------------------------------------------------
693 ErrorCode
transform_multi_line_strings_3()694 UserFunction::transform_multi_line_strings_3()
695 {
696   /* convert a set of lines like
697 
698      [k+1] PREFIX """
699      [k+2] L2 ...
700      [k+N] """
701 
702     into:
703 
704     [k+1] PREFIX "L2" ... "LN"
705     [...] (empty)
706     [k+N] (empty)
707 
708      In contrast to transform_multi_line_strings(), line k+N may only
709      consist of spaces and the terminating """.
710    */
711 
712 enum Line_status
713    {
714      Function_header = 0,
715      APL_text        = 1,
716      Start_of_string = 2,
717      Inside_string   = 3,
718      End_of_string   = 4
719    };
720 
721 char status[get_text_size()];   status[0] = Function_header;
722 Line_status current = APL_text;
723 
724    // determine line status of each line...
725    //
726    for (ShapeItem l = 1; l < get_text_size(); ++l)
727        {
728          const UCS_string & line = get_text(l);
729          const ShapeItem len = line.size();
730          if (line.ends_with("\"\"\""))
731             {
732               if (current == APL_text)   // start of multi-line string
733                  {
734                     status[l] = Start_of_string;
735                     current = Inside_string;
736                  }
737               else                       // end of multi-line string
738                  {
739                    bool blanks_only = true;
740                    for (ShapeItem c = 0; c < (len - 3); ++c)
741                        {
742                           if (line[c] != UNI_ASCII_SPACE &&
743                               line[c] != UNI_ASCII_HT)   blanks_only = false;
744                        }
745 
746                     if (blanks_only)
747                        {
748                          status[l] = End_of_string;
749                          current = APL_text;
750                        }
751                  }
752             }
753          else              // no status change
754             {
755               status[l] = current;
756             }
757        }
758 
759    if (current == Start_of_string || current == Inside_string)
760       {
761          // multi-line string started but not ended.
762          //
763          return E_DEFN_ERROR;
764       }
765 
766    // modify lines...
767    //
768    for (int l = 1; l < get_text_size(); )
769        {
770          if (status[l] == APL_text)   { ++l;   continue; }
771          const int start = l;
772          Assert1(status[l] == Start_of_string);
773          UCS_string accu = get_text(l++);
774          accu.resize(accu.size() - 3);   // remove trailing """
775          accu << " ";
776          while (status[l] == Inside_string)
777                {
778                  accu << " \"" << get_text(l).do_escape(true) << "\"";
779                  clear_text(l++);
780                }
781          Assert(status[l] == End_of_string);
782          set_text(start, accu);
783          clear_text(l++);
784        }
785 
786    // remove trailing empty lines...
787    //
788    while (get_text_size() && get_text(get_text_size() - 1).size() == 0)
789          text.pop_back();
790 
791 // CERR << endl;
792 // loop(l, get_text_size())   CERR << "[" << l << "]  " << get_text(l) << endl;
793 
794    return E_NO_ERROR;
795 }
796 //-----------------------------------------------------------------------------
797 void
parse_body(const char * loc,bool tolerant,bool macro)798 UserFunction::parse_body(const char * loc, bool tolerant, bool macro)
799 {
800    line_starts.clear();
801    line_starts.push_back(Function_PC_0);   // will be set later.
802 
803 UCS_string_vector original_text;
804    //
805    // The text is modified for parsing but restored afterwards so that e,g,
806    // ∇FUN[⎕]∇ shows the text enterd by the user.
807    //
808    // original_text is only set if text was modified.
809    //
810    clear_body();
811 
812    if (uprefs.multi_line_strings_3)   // new-style multi-line strings allowed
813       {
814         for (int l = 1; l < get_text_size(); ++l)
815             {
816               const UCS_string & line = get_text(l);
817               const ShapeItem len = line.size();
818               if (len >= 3 &&
819                   line[len - 1] == UNI_ASCII_DOUBLE_QUOTE &&
820                   line[len - 2] == UNI_ASCII_DOUBLE_QUOTE &&
821                   line[len - 3] == UNI_ASCII_DOUBLE_QUOTE)
822                  {
823                    original_text = text;
824                    const ErrorCode ec = transform_multi_line_strings_3();
825                    if (ec)
826                       {
827                         text = original_text;   // restore function text
828                         error_line = l;
829                         return;
830                       }
831 
832                    break;
833                  }
834             }
835       }
836 
837    if (uprefs.multi_line_strings)   // old-style multi-line strings allowed
838       {
839         for (int l = 1; l < get_text_size(); ++l)
840             {
841               if (get_text(l).double_quote_count(false) & 1)
842                  {
843                    original_text = text;
844                    const ErrorCode ec = transform_multi_line_strings();
845                    if (ec)
846                       {
847                         text = original_text;   // restore function text
848                         error_line = l;
849                         return;
850                       }
851 
852                    break;
853                  }
854             }
855       }
856 
857    for (int l = 1; l < get_text_size(); ++l)
858       {
859         bool stop_line = false;
860         loop(s, stop_lines.size())
861            {
862              if (stop_lines[s] == l)
863                 {
864                   stop_line = true;
865                   break;
866                 }
867            }
868 
869         bool trace_line = false;
870         loop(t, trace_lines.size())
871            {
872              if (trace_lines[t] == l)
873                 {
874                   trace_line = true;
875                   break;
876                 }
877            }
878 
879         error_line = l;   // assume error
880         line_starts.push_back(Function_PC(body.size()));
881 
882         if (stop_line)
883            {
884              body.push_back(Token(TOK_STOP_LINE));
885              const int64_t tr = 0;
886              body.push_back(Token(TOK_END, tr));
887            }
888 
889         const UCS_string & line = get_text(l);
890         ErrorCode ec = E_SYNTAX_ERROR;
891         try {
892               ec = parse_body_line(Function_Line(l), line, trace_line,
893                                    tolerant, loc, macro);
894 
895               if (tolerant && ec != E_NO_ERROR)
896                  {
897                    UCS_string new_line = "## ";
898                    new_line.append(line);
899                    text[l] = new_line;
900                    CERR << "WARNING: SYNTAX ERROR in function "
901                         << header.get_name() << endl;
902                  }
903             }
904         catch(const Error & err)
905             {
906               return;
907             }
908       }
909 
910    error_line = -1;   // OK
911    setup_lambdas();
912 
913    Log(LOG_UserFunction__fix)
914       {
915         CERR << "body.size() is " << body.size() << endl
916              << "line_starts.size() is " << line_starts.size() <<endl;
917       }
918 
919    // let [0] be the end of the function.
920    line_starts[0] = Function_PC(body.size());
921 
922    if (header.Z())   body.push_back(Token(TOK_RETURN_SYMBOL, header.Z()));
923    else              body.push_back(Token(TOK_RETURN_VOID));
924 
925    // restore the original text (before multi-line expansion)
926    if (original_text.size())   text = original_text;
927 }
928 //-----------------------------------------------------------------------------
929 UserFunction *
load(const char * workspace,const char * function)930 UserFunction::load(const char * workspace, const char * function)
931 {
932 UserFunction * fun = 0;
933 
934    try
935       {
936         load(workspace, function, fun);
937       }
938    catch (Error & err)
939       {
940         delete fun;
941 
942         err.print(CERR, LOC);
943       }
944    catch (...)
945       {
946         delete fun;
947         CERR << "Caught some exception\n";
948         return 0;
949       }
950 
951    return fun;
952 }
953 //-----------------------------------------------------------------------------
954 void
load(const char * workspace,const char * function,UserFunction * & fun)955 UserFunction::load(const char * workspace, const char * function,
956                    UserFunction * & fun)
957 {
958 char filename[FILENAME_MAX + 10];
959    snprintf(filename, FILENAME_MAX + 5,
960             "workspaces/%s/%s.fun", workspace, function);
961 
962    if (strlen(filename) > FILENAME_MAX)
963       {
964         CERR << "file name '" << filename << "' is too long" << endl;
965         throw_apl_error(E_SYS_LIMIT_FILENAME, LOC);
966       }
967 
968 int in = open(filename, O_RDONLY);
969    if (in == -1)
970       {
971         CERR << "Can't open() workspace file '"
972              << filename << "': " << strerror(errno) << endl;
973         throw_apl_error(E_WS_OPEN, LOC);
974       }
975 
976 struct stat st;
977    if (fstat(in, &st) == -1)
978       {
979         CERR << "Can't fstat() workspace file '"
980              << filename << "': " << strerror(errno) << endl;
981         close(in);
982         throw_apl_error(E_WS_FSTAT, LOC);
983       }
984 
985 off_t len = st.st_size;
986 void * start = mmap(0, len, PROT_READ, MAP_SHARED, in, 0);
987 
988    if (start == reinterpret_cast<const void *>(-1))
989       {
990         CERR << "Can't mmap() workspace file '"
991              << filename << "': " << strerror(errno) << endl;
992         close(in);
993         throw_apl_error(E_WS_MMAP, LOC);
994       }
995 
996 UTF8_string utf(utf8P(start), len);
997 
998    // skip trailing \r and \n.
999    //
1000    while (utf.size() &&
1001           (utf.back() == '\r' || utf.back() == '\n'))   utf.pop_back();
1002 
1003    munmap(start, st.st_size);
1004    close(in);
1005 
1006 UCS_string ucs(utf);
1007 int error_line = -1;
1008    fun = fix(ucs, error_line, false, LOC, filename, false);
1009 }
1010 //-----------------------------------------------------------------------------
1011 Function_PC
pc_for_line(Function_Line line) const1012 UserFunction::pc_for_line(Function_Line line) const
1013 {
1014    if (line < 1 || line >= int(line_starts.size()))
1015       return Function_PC(body.size() - 1);
1016 
1017    return line_starts[line];
1018 }
1019 //-----------------------------------------------------------------------------
1020 UserFunction *
fix(const UCS_string & text,int & err_line,bool keep_existing,const char * loc,const UTF8_string & creator,bool tolerant)1021 UserFunction::fix(const UCS_string & text, int & err_line,
1022                   bool keep_existing, const char * loc,
1023                   const UTF8_string & creator, bool tolerant)
1024 {
1025    Log(LOG_UserFunction__fix)
1026       {
1027         CERR << "fix pmode=user function:" << endl << text << endl
1028              <<  "------------------- UserFunction::fix() --" << endl;
1029       }
1030 
1031 UserFunction * ufun = new UserFunction(text, loc, creator, tolerant, false);
1032 const char * info = ufun->get_error_info();
1033    err_line = ufun->get_error_line();
1034 
1035 const bool bad_function = info || err_line != -1;
1036    if (bad_function)   // something went wrong
1037       {
1038         if (info)
1039            {
1040              Log(LOG_UserFunction__fix)   CERR << "Error: " << info << endl;
1041              MORE_ERROR() << info;
1042            }
1043          else info = "Error";
1044 
1045          if (err_line == 0)
1046            {
1047              MORE_ERROR() << info << "in function header";
1048              Log(LOG_UserFunction__fix) CERR << "Bad header line" <<  endl;
1049            }
1050          else if (err_line > 0)
1051            {
1052              UCS_string & more = MORE_ERROR();
1053              more << info << " in function line [" << err_line << "] of:\n";
1054              loop(l, ufun->get_text_size())
1055                 more << "[" << l << "] " << ufun->get_text(l) << "\n";
1056 
1057              Log(LOG_UserFunction__fix)
1058                 CERR << "Bad function line: " << err_line << endl;
1059            }
1060 
1061         delete ufun;
1062         return 0;
1063       }
1064 
1065 Symbol * symbol = Workspace::lookup_symbol(ufun->header.get_name());
1066 Function * old_function = symbol->get_function();
1067    if (old_function && keep_existing)
1068       {
1069         err_line = 0;
1070         delete ufun;
1071         return 0;
1072       }
1073 
1074    // check that the function can be defined (e.g. is not on the )SI stack)
1075    //
1076    if (old_function && symbol->cant_be_defined())
1077       {
1078         err_line = 0;
1079         delete ufun;
1080         return 0;
1081       }
1082 
1083    if (old_function)
1084       {
1085         const UserFunction * old_ufun = old_function->get_ufun1();
1086         Assert(old_ufun);
1087         delete old_ufun;
1088       }
1089 
1090    // bind function to symbol
1091    //
1092    if (ufun->header.LO())   ufun->header.FUN()->set_nc(NC_OPERATOR, ufun);
1093    else                     ufun->header.FUN()->set_nc(NC_FUNCTION, ufun);
1094 
1095    Log(LOG_UserFunction__fix)
1096       {
1097         CERR << " addr " << voidP(ufun) << endl;
1098         ufun->print(CERR);
1099       }
1100 
1101    return ufun;
1102 }
1103 //-----------------------------------------------------------------------------
1104 UserFunction *
fix_lambda(Symbol & var,const UCS_string & text)1105 UserFunction::fix_lambda(Symbol & var, const UCS_string & text)
1106 {
1107    /* Example: consider {⍺+⍵;LOCAL}
1108 
1109       text a normal (non-lambda) function like:
1110 
1111       λ←⍺ λ1 ⍵;LOCAL
1112       λ← ⍺+⍵
1113 
1114        which has a slightly different lambda body like:
1115 
1116        λ← ⍺+⍵;LOCAL
1117 
1118     */
1119 int signature = SIG_FUN | SIG_Z;
1120 int t = 0;
1121 
1122 ShapeItem semi = -1;
1123    while (t < text.size())
1124        {
1125          switch(text[t++])
1126             {
1127               case UNI_CHI:             signature |= SIG_X;    continue;
1128               case UNI_OMEGA:           signature |= SIG_B;    continue;
1129               case UNI_ALPHA_UNDERBAR:  signature |= SIG_LO;   continue;
1130               case UNI_OMEGA_UNDERBAR:  signature |= SIG_RO;   continue;
1131               case UNI_ALPHA:           signature |= SIG_A;    continue;
1132 
1133               case UNI_ASCII_SEMICOLON: if (semi == -1)   semi = t - 1;
1134                                         continue;
1135 
1136               case UNI_ASCII_LF:        break;   // header done
1137               default:                  continue;
1138             }
1139 
1140          break;   // header done
1141        }
1142 
1143    // discard leading spaces
1144    //
1145    while (t < text.size() && text[t] == UNI_ASCII_SPACE)   ++t;
1146 
1147 UCS_string body_text;
1148    for (; t < text.size(); ++t)   body_text.append(text[t]);
1149 
1150    while (body_text.back() == UNI_ASCII_LF)  body_text.pop_back();
1151 
1152 Token_string body;
1153    {
1154      Token ret_lambda(TOK_RETURN_SYMBOL, &Workspace::get_v_LAMBDA());
1155      body.push_back(ret_lambda);
1156      const int64_t trace = 0;
1157      Token tok_endl(TOK_ENDL, trace);
1158      body.push_back(tok_endl);
1159    }
1160 
1161    if (semi != -1)
1162       {
1163         for (ShapeItem s = semi; text[s] != UNI_ASCII_LF; ++s)
1164            {
1165              body_text.append(text[s]);
1166            }
1167       }
1168 
1169 const Parser parser(PM_FUNCTION, LOC, false);
1170 const ErrorCode ec = parser.parse(body_text, body);
1171    if (ec)
1172       {
1173         CERR << "Parsing '" << body_text << "' failed" << endl;
1174         return 0;
1175       }
1176 
1177 UserFunction * ufun = new UserFunction(Fun_signature(signature), 0,
1178                                        body_text, body);
1179    return ufun;
1180 }
1181 //-----------------------------------------------------------------------------
1182 void
destroy()1183 UserFunction::destroy()
1184 {
1185    // delete will call ~Executable(), which releases the values owned by body.
1186    //
1187    if (is_lambda())   decrement_refcount(LOC);
1188    else               delete this;
1189 }
1190 //-----------------------------------------------------------------------------
1191 bool
pushes_sym(const Symbol * sym) const1192 UserFunction::pushes_sym(const Symbol * sym) const
1193 {
1194    if (sym == header.Z())   return true;
1195    if (sym == header.A())   return true;
1196    if (sym == header.LO())   return true;
1197    if (sym == header.X())   return true;
1198    if (sym == header.RO())   return true;
1199    if (sym == header.B())   return true;
1200 
1201    loop(l, local_var_count())
1202        {
1203          if (sym == get_local_var(l))   return true;
1204        }
1205 
1206    return false;
1207 }
1208 //-----------------------------------------------------------------------------
1209 void
help(ostream & out) const1210 UserFunction::help(ostream & out) const
1211 {
1212    CERR << "    Header: " << get_text(0) << endl;
1213 
1214    if (is_lambda())
1215       {
1216          UCS_string body(get_text(1), 2, get_text(1).size() - 2);
1217          CERR << "Lambda: { " << body << " ";
1218          loop(v, local_var_count())
1219             {
1220               const Symbol & sym = *get_local_var(v);
1221               CERR << ";" << sym.get_name();
1222             }
1223          CERR << " }" << endl;
1224          return;
1225       }
1226 
1227 bool got_lamps = false;
1228 bool toronto = false;
1229 const UCS_string two_lamps(UTF8_string("⍝⍝"));
1230    for (int l = 1; l < get_text_size(); ++l)
1231        {
1232          UCS_string line(get_text(l));
1233          line.remove_leading_and_trailing_whitespaces();
1234          if (line.size() < 2)          continue;   // too short
1235 
1236          if (line[0] != UNI_COMMENT)   // not a comment
1237             {
1238               toronto = false;
1239               continue;
1240             }
1241 
1242          const bool double_lamps = line[1] == UNI_COMMENT;   // ⍝⍝ line
1243          if (line[1] == UNI_ASCII_FULLSTOP)                  // ⍝. line
1244             {
1245               toronto = true;
1246             }
1247 
1248          if (double_lamps || toronto)
1249             {
1250               got_lamps = true;
1251               CERR << "    " << line << endl;
1252             }
1253        }
1254 
1255    if (!got_lamps)   CERR << "    (no ⍝⍝ or ⍝. comment lines)" << endl;
1256 }
1257 //-----------------------------------------------------------------------------
1258 ostream &
print(ostream & out) const1259 UserFunction::print(ostream & out) const
1260 {
1261    out << header.get_name();
1262    return out;
1263 
1264 /*
1265    out << "Function header:" << endl;
1266    if (header.Z())     out << "Result:         " << *header.Z()   << endl;
1267    if (header.A())     out << "Left Argument:  " << *header.A()   << endl;
1268    if (header.LO())    out << "Left Op Arg:    " << *header.LO()  << endl;
1269                        out << "Function:       " << header.get_name() << endl;
1270    if (header.RO())    out << "Right Op Arg:   " << *header.RO()  << endl;
1271    if (header.B())     out << "Right Argument: " << *header.B()   << endl;
1272    return Executable::print(out);
1273 */
1274 }
1275 //-----------------------------------------------------------------------------
1276 void
print_properties(ostream & out,int indent) const1277 UserFunction::print_properties(ostream & out, int indent) const
1278 {
1279    header.print_properties(out, indent);
1280 UCS_string ind(indent, UNI_ASCII_SPACE);
1281    out << ind << "Body Lines:      " << line_starts.size() << endl
1282        << ind << "Creator:         " << get_creator()      << endl
1283        << ind << "Body: " << body << endl;
1284 }
1285 //-----------------------------------------------------------------------------
1286 UCS_string
get_name_and_line(Function_PC pc) const1287 UserFunction::get_name_and_line(Function_PC pc) const
1288 {
1289 UCS_string ret = header.get_name();
1290    if (ret.size() && ret[0] == UNI_LAMBDA)
1291       {
1292         UCS_string name = Workspace::find_lambda_name(this);
1293         if (name.size())   ret = name;
1294       }
1295    ret.append(UNI_ASCII_L_BRACK);
1296 
1297    // pc may point to the next token already. If that is the case then
1298    // we go back one token.
1299    //
1300    if (pc > 0 && body[pc - 1].get_Class() == TC_END)   pc = Function_PC(pc - 1);
1301 
1302 const Function_Line line = get_line(pc);
1303    ret.append_number(line);
1304    ret.append(UNI_ASCII_R_BRACK);
1305    return ret;
1306 }
1307 //-----------------------------------------------------------------------------
1308 Function_Line
get_line(Function_PC pc) const1309 UserFunction::get_line(Function_PC pc) const
1310 {
1311    Assert(pc >= -1);
1312    if (pc < 0)   pc = Function_PC_0;
1313 
1314    // search line_starts backwards until a line with non-greater pc is found.
1315    //
1316    for (int l = line_starts.size() - 1; l > 0; --l)
1317        {
1318          if (line_starts[l] <= pc)   return Function_Line(l);
1319        }
1320 
1321    return Function_Line_1;
1322 }
1323 //-----------------------------------------------------------------------------
1324 UCS_string
canonical(bool with_lines) const1325 UserFunction::canonical(bool with_lines) const
1326 {
1327 UCS_string ucs;
1328    loop(t, text.size())
1329       {
1330         if (with_lines)   ucs.append(line_prefix(Function_Line(t)));
1331         ucs.append(text[t]);
1332         ucs.append(UNI_ASCII_LF);
1333       }
1334 
1335    return ucs;
1336 }
1337 //-----------------------------------------------------------------------------
1338 void
adjust_line_starts()1339 UserFunction::adjust_line_starts()
1340 {
1341    // this function is called from Executable::setup_lambdas() just before
1342    // Parser::remove_void_token(body) in order to adjust line_starts
1343    //
1344 std::vector<ShapeItem> gaps;
1345    gaps.reserve(line_starts.size());   // count TOK_VOID in every line
1346    loop(ls, line_starts.size())
1347       {
1348          gaps.push_back(0);
1349          if (ls == 0)   continue;   // function header (has no TOK_VOID)
1350 
1351          const ShapeItem from = line_starts[ls];
1352          ShapeItem to = body.size();    // end of function (for last line)
1353          if (ls < (ShapeItem(line_starts.size()) - 1))
1354             to = line_starts[ls + 1];
1355 
1356         for (ShapeItem b = from; b < to; ++b)
1357             {
1358              if (body[b].get_tag() == TOK_VOID)   ++gaps.back();
1359             }
1360       }
1361 
1362 int total_gaps = 0;
1363    loop(ls, line_starts.size())
1364        {
1365           line_starts[ls] = Function_PC(line_starts[ls] - total_gaps);
1366           total_gaps += gaps[ls];
1367        }
1368 }
1369 //-----------------------------------------------------------------------------
1370 Function_PC
line_start(Function_Line line) const1371 UserFunction::line_start(Function_Line line) const
1372 {
1373    if (line < 0 || size_t(line) >= line_starts.size())
1374       {
1375         Q1(line)
1376         Q1(line_starts.size())
1377         Assert(0);
1378       }
1379 
1380    return line_starts[line];
1381 }
1382 //-----------------------------------------------------------------------------
1383 ostream &
print_val_or_fun(ostream & out,Token & tok)1384 UserFunction::print_val_or_fun(ostream & out, Token & tok)
1385 {
1386    if (tok.is_function())         out << *tok.get_function();
1387    else if (tok.is_apl_val())     out << tok;
1388    else if (tok.is_void())        out << "((VOID))";
1389    else                           FIXME;
1390 
1391    return out;
1392 }
1393 //-----------------------------------------------------------------------------
1394 UCS_string
line_prefix(Function_Line l) const1395 UserFunction::line_prefix(Function_Line l) const
1396 {
1397 char cc[40];
1398    if      (text.size() > 100)   snprintf(cc, sizeof(cc), "[%3d] ", l);
1399    else if (text.size() > 10)    snprintf(cc, sizeof(cc), "[%2d] ", l);
1400    else                          snprintf(cc, sizeof(cc), "[%d] ",  l);
1401    return UCS_string(cc);
1402 }
1403 //-----------------------------------------------------------------------------
1404