1 /*
2     This file is part of GNU APL, a free implementation of the
3     ISO/IEC Standard 13751, "Programming Language APL, Extended"
4 
5     Copyright (C) 2008-2017  Dr. Jürgen Sauermann
6 
7     This program is free software: you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation, either version 3 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #ifndef __TOKEN_HH_DEFINED__
22 #define __TOKEN_HH_DEFINED__
23 
24 #include <ostream>
25 #include <vector>
26 
27 #include "Avec.hh"
28 #include "Common.hh"
29 #include "Error_macros.hh"
30 #include "Id.hh"
31 #include "TokenEnums.hh"
32 #include "Value.hh"
33 
34 class Function;
35 class IndexExpr;
36 class Symbol;
37 class Value;
38 class Workspace;
39 
40 /**
41     A Token, consisting of a \b tag and a \b value. The \b tag (actually
42     already (tag & TV_MASK) identifies the type of the \b value.
43  */
44 /// One atom of an APL function or expression
45 class Token
46 {
47 public:
48    /// Construct a VOID token. VOID token are used for two purposes: (1) to
49    /// fill positions when e.g. 3 tokens were replaced by 2 tokens during
50    /// parsing, and (2) as return values of user defined functions that
51    /// do not return values.
Token()52    Token()
53    : tag(TOK_VOID) { value.int_vals[0] = 0; }
54 
55    /// copy constructor
56    Token(const Token & other);
57 
58    /// copy \b src into \b this token. leaving APL value pointer in
59    /// src (if any) and add events as needed
60    void copy_1(const Token & src, const char * loc);
61 
62    /// move mutable \b src into \b this token. clears APL value pointer in
63    /// src (if any) and add events as needed
64    void move_1(Token & src, const char * loc);
65 
66    /// move const \b src into \b this token. clears APL value pointer in
67    /// src (if any) and add events as needed
68    void move_2(const Token & src, const char * loc);
69 
70    /// Construct a token without a value
Token(TokenTag tg)71    Token(TokenTag tg)
72    : tag(tg) { Assert(get_ValueType() == TV_NONE);   value.int_vals[0] = 0; }
73 
74    /// Construct a token for a \b Function.
Token(TokenTag tg,Function * fun)75    Token(TokenTag tg, Function * fun)
76    : tag(tg) { Assert(get_ValueType() == TV_FUN);   value.function = fun; }
77 
78    /// Construct a token for a \b line number
Token(TokenTag tg,Function_Line line)79    Token(TokenTag tg, Function_Line line)
80    : tag(tg) { Assert(tg == TOK_LINE);   value.fun_line = line; }
81 
82    /// Construct a token for an \b error code
Token(TokenTag tg,ErrorCode ec)83    Token(TokenTag tg, ErrorCode ec)
84    : tag(tg) { Assert(tg == TOK_ERROR);   value.int_vals[0] = ec; }
85 
86    /// Construct a token for a \b Symbol
Token(TokenTag tg,Symbol * sp)87    Token(TokenTag tg, Symbol * sp)
88    : tag(tg) { Assert(get_ValueType() == TV_SYM);  value.sym_ptr = sp; }
89 
90    /// Construct a token with tag tg for a UNICODE character. The tag is
91    /// defined in Avec.def. This token in temporary in the sense that
92    /// get_ValueType() can be anything rather than TV_CHAR. The value
93    /// for the token (if any) will be added later (after parsing it).
Token(TokenTag tg,Unicode uni)94    Token(TokenTag tg, Unicode uni)
95    : tag(tg) { value.char_val = uni; }
96 
97    /// Construct a token for a single integer value.
Token(TokenTag tg,int64_t ival)98    Token(TokenTag tg, int64_t ival)
99    : tag(tg) { value.int_vals[0] = ival; }
100 
101    /// Construct a token for a single floating point value.
Token(TokenTag tg,APL_Float flt)102    Token(TokenTag tg, APL_Float flt)
103    : tag(tg) { value.float_vals[0] = flt; }
104 
105    /// Construct a token for a single complex value.
Token(TokenTag tg,APL_Float r,APL_Float i)106    Token(TokenTag tg, APL_Float r, APL_Float i)
107    : tag(tg)
108      {
109        value.float_vals[0] = r;
110        value.float_vals[1] = i;
111      }
112 
113    /// Construct a token for an APL value.
Token(TokenTag tg,Value_P vp)114    Token(TokenTag tg, Value_P vp)
115    : tag(tg)
116    { Assert1(get_ValueType() == TV_VAL);
117      Assert(!!vp);   new (&value.apl_val) Value_P(vp); }
118 
119    /// Construct a token for an index
120    Token(TokenTag tg, IndexExpr & idx);
121 
122    /// destructor
~Token()123    ~Token()
124      { extract_apl_val("~Token()");  }
125 
126    /// swap this and \b other
Hswap(Token & other)127    inline void Hswap(Token & other)
128       { ::Hswap(tag, other.tag);
129         ::Hswap(value.int_vals[0], other.value.int_vals[0]);
130         ::Hswap(value.int_vals[1], other.value.int_vals[1]);
131       }
132 
133    /// return the TokenValueType of this token.
get_ValueType() const134    TokenValueType get_ValueType() const
135       { return TokenValueType(tag & TV_MASK); }
136 
137    /// return the TokenClass of this token.
get_Class() const138    TokenClass get_Class() const
139       { return TokenClass(tag & TC_MASK); }
140 
141    /// return the Id of this token.
get_Id() const142    Id get_Id() const
143       { return Id(tag >> 16); }
144 
145    /// return the tag of this token
get_tag() const146    const TokenTag get_tag() const   { return tag; }
147 
148    /// return the Unicode value of this token
get_char_val() const149    Unicode get_char_val() const
150       { Assert(get_ValueType() == TV_CHAR);   return value.char_val; }
151 
152    /// return the integer value of this token
get_int_val() const153    int64_t get_int_val() const
154       { Assert(get_ValueType() == TV_INT);   return value.int_vals[0]; }
155 
156    /// return the second integer value of this token
get_int_val2() const157    int64_t get_int_val2() const
158       { return value.int_vals[1]; }
159 
160    /// return the error code value of this token
get_ErrorCode() const161    ErrorCode get_ErrorCode() const
162       { Assert1(get_tag() == TOK_ERROR);
163         Assert1(get_ValueType() == TV_INT);
164         return ErrorCode(value.int_vals[0]); }
165 
166    /// set the integer value of this token
set_int_val(int64_t val)167    void set_int_val(int64_t val)
168       { Assert(get_ValueType() == TV_INT);   value.int_vals[0] = val; }
169 
170    /// set the second integer value of this token
set_int_val2(int64_t val)171    void set_int_val2(int64_t val)
172       { value.int_vals[1] = val; }
173 
174    /// return the float value of this token
get_flt_val() const175    APL_Float get_flt_val() const
176       { Assert(get_ValueType() == TV_FLT);   return value.float_vals[0]; }
177 
178    /// return the complex real value of this token
get_cpx_real() const179    APL_Float get_cpx_real() const
180       { Assert(get_ValueType() == TV_CPX);   return value.float_vals[0]; }
181 
182    /// return the complex imag value of this token
get_cpx_imag() const183    APL_Float get_cpx_imag() const
184       { Assert(get_ValueType() == TV_CPX);   return value.float_vals[1]; }
185 
186    /// return the Symbol * value of this token
get_sym_ptr() const187    Symbol * get_sym_ptr() const
188       { Assert(get_ValueType() == TV_SYM);   return value.sym_ptr; }
189 
190    /// return the Function_Line value of this token
get_fun_line() const191    Function_Line get_fun_line() const
192       { Assert(get_ValueType() == TV_LIN);   return value.fun_line; }
193 
194    /// return true iff \b this token has no value
is_void() const195    bool is_void() const
196       { return (get_ValueType() == TV_NONE); }
197 
198    /// return true iff \b this token is an apl value
is_apl_val() const199    bool is_apl_val() const
200       { return (get_ValueType() == TV_VAL); }
201 
202    /// return the Value_P value of this token. The token could be TOK_NO_VALUE;
203    /// in that case VALUE_ERROR is thrown.
get_apl_val() const204    Value_P get_apl_val() const
205       { if (is_apl_val())   return value._apl_val();   VALUE_ERROR; }
206 
207    /// return the address of the Value_P value of this token.
get_apl_valp() const208    Value_P * get_apl_valp() const
209       { if (is_apl_val())   return &value._apl_val();   VALUE_ERROR; }
210 
211    /// clear this token, properly clearing Value token
clear(const char * loc)212    void clear(const char * loc)
213       {
214          if (is_apl_val())   value.apl_val.reset();
215          new (this) Token();
216       }
217 
218    /// return the axis specification of this token (expect non-zero axes)
get_nonzero_axes() const219    Value_P get_nonzero_axes() const
220       { Assert1(!!value.apl_val && (get_tag() == TOK_AXES));
221         return value._apl_val(); }
222 
223    /// return the axis specification of this token
get_axes() const224    Value_P get_axes() const
225       { Assert1(get_tag() == TOK_AXES);  return value._apl_val(); }
226 
227    /// set the Value_P value of this token
set_apl_val(Value_P val)228    void set_apl_val(Value_P val)
229       { Assert(get_ValueType() == TV_VAL);   value._apl_val() = val; }
230 
231    /// return the IndexExpr value of this token
get_index_val() const232    IndexExpr & get_index_val() const
233       { Assert(get_ValueType() == TV_INDEX);   return *value.index_val; }
234 
235    /// return true if \b this token is a function (or operator)
is_function() const236    bool is_function() const
237       { return (get_ValueType() == TV_FUN); }
238 
239    /// return the Function * value of this token
get_function() const240    Function * get_function() const
241       { if (!is_function())   SYNTAX_ERROR;   return value.function; }
242 
243    /// return value usage counter
244    int value_use_count() const;
245 
246    /// clear the Value_P value (if any) of this token, updating
247    /// its refcount as needed
248    void extract_apl_val(const char * loc);
249 
250    /// clear the Value_P (if any) without updating its refcount. Return
251    /// the old Value * that was overridden
252    Value * extract_and_keep(const char * loc);
253 
254    /// change the tag (within the same TokenValueType)
255    void ChangeTag(TokenTag new_tag);
256 
257    /// helper function to print a function.
258    ostream & print_function(ostream & out) const;
259 
260    /// helper function to print an APL value
261    ostream & print_value(ostream & out) const;
262 
263    /// show trace output for this token
264    void show_trace(ostream & out, const UCS_string & fun_name,
265                    Function_Line line) const;
266 
267    /// the Quad_CR representation of the token.
268    UCS_string canonical(PrintStyle style) const;
269 
270    /// the tag in readable form (TOK_...)
271    UCS_string tag_name() const;
272 
273    /// print the token to \b out in the format used by print_error_info().
274    /// return the number of characters printed.
275    int error_info(UCS_string & out) const;
276 
277    /// copy src to \b this token, updating ref counts for APL values
278    void copy_N(const Token & src);
279 
280    /// return a brief token class name for debugging purposes
281    static const char * short_class_name(TokenClass cls);
282 
283    /// the optional value of the token.
284    union sval
285       {
286         Unicode         char_val;        ///< the Unicode for CTV_CHARTV_
287         APL_Integer     int_vals[2];     ///< the integer for TV_INT
288         APL_Float_Base  float_vals[2];   ///< the doubles for TV_FLT and TV_CPX
289         Symbol        * sym_ptr;         ///< the symbol for TV_SYM
290         Function_Line   fun_line;        ///< the function line for TV_LIN
291         IndexExpr     * index_val;       ///< the index for TV_INDEX
292         Function      * function;        ///< the function for TV_FUN
293         Value_P_Base    apl_val;         ///< the APL value for TV_VAL
294 
295         /// a shortcut for accessing apl_val
_apl_val() const296         Value_P & _apl_val() const
297            { return reinterpret_cast<Value_P &>
298                     (const_cast<Value_P_Base &>(apl_val)); }
299       };
300 
301    /// the name of \b tc
302    static const char * class_name(TokenClass tc);
303 
304 protected:
305    /// The tag indicating the type of \b this token
306    TokenTag tag;
307 
308    /// The value of \b this token
309    sval value;
310 
311    /// helper function to print Quad-function (system function or variable).
312    ostream & print_quad(ostream & out) const;
313 };
314 //-----------------------------------------------------------------------------
315 /// A sequence of Token
316 class Token_string : public  std::vector<Token>
317 {
318 public:
319    /// construct an empty string
Token_string()320    Token_string()   {}
321 
322    /// construct a string of \b len Token, starting at \b data.
Token_string(const Token * data,ShapeItem len)323    Token_string(const Token * data, ShapeItem len)
324       { loop(l, len)   push_back(data[l]); }
325 
326    /// construct a string of \b len Token from another token string
Token_string(const Token_string & other,uint32_t pos,uint32_t len)327    Token_string(const Token_string & other, uint32_t pos, uint32_t len)
328       { loop(l, len)   push_back(other[pos++]); }
329 
330    /// reversde the token order from \b from to \b to (including)
331    void reverse_from_to(ShapeItem from, ShapeItem to);
332 
333    /// print this token string
334    void print(ostream & out, bool details) const;
335 
336 private:
337    /// prevent accidental copying
338    Token_string & operator =(const Token_string & other);
339 };
340 //-----------------------------------------------------------------------------
341 /** a token with its location information. For token copied from a function
342     body: low = high = PC. For token from a reduction low is the low location
343     of the first token and high is the high of the last token of the token
344     range that led to e.g a result token.
345     A Token and its position (in a Token_string)
346  */
347 /// A Token and its location information (position in a Token_string)
348 struct Token_loc
349 {
350    /// constructor: invalid Token_loc
Token_locToken_loc351    Token_loc()
352    : pc(Function_PC_invalid)
353    {}
354 
355    /// constructor: invalid Token with valid loc
Token_locToken_loc356    Token_loc(Function_PC _pc)
357    : pc(_pc)
358    {}
359 
360    /// constructor: valid Token with valid loc
Token_locToken_loc361    Token_loc(const Token & t, Function_PC _pc)
362    : tok(t),
363      pc(_pc)
364    {}
365 
366    /// copy this Token_loc to \b other
copyToken_loc367    void copy(const Token_loc & other, const char * loc)
368       {
369         pc = other.pc;
370         tok.copy_1(other.tok, loc);
371       }
372 
373    /// the token
374    Token tok;
375 
376    /// the PC of the leftmost (highest PC) token
377    Function_PC pc;
378 };
379 //-----------------------------------------------------------------------------
380 
381 #endif // __TOKEN_HH_DEFINED__
382