1 /*===========================================================================
2  *
3  *                            PUBLIC DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  */
26 
27 #include "AST.hpp"
28 
29 #include <strtol.h>
30 #include <ctype.h>
31 #include <os-native.h>
32 
33 #include <klib/symbol.h>
34 #include <klib/printf.h>
35 #include <klib/rc.h>
36 
37 #include "../vdb/schema-parse.h"
38 #include "../vdb/dbmgr-priv.h"
39 #include "../vdb/schema-expr.h"
40 
41 using namespace ncbi::SchemaParser;
42 #define YYDEBUG 1
43 #include "schema-ast-tokens.h"
44 
45 #include "ASTBuilder.hpp"
46 
47 using namespace std;
48 
49 // AST
50 
AST()51 AST :: AST ()
52 : ParseTree ( Token ( PT_EMPTY ) )
53 {
54 }
55 
AST(Token::TokenType p_tokenType)56 AST :: AST ( Token :: TokenType p_tokenType ) // no-value token
57 : ParseTree ( Token ( p_tokenType ) )
58 {
59 }
60 
AST(const Token * p_token)61 AST :: AST ( const Token * p_token )
62 : ParseTree ( * p_token )
63 {
64 }
65 
AST(const Token * p_token,AST * p_child1)66 AST :: AST ( const Token * p_token, AST * p_child1 )
67 : ParseTree ( * p_token )
68 {
69     assert ( p_child1 != 0 );
70     AddNode ( p_child1 );
71 }
72 
AST(const Token * p_token,AST * p_child1,AST * p_child2)73 AST :: AST ( const Token * p_token, AST * p_child1, AST * p_child2 )
74 : ParseTree ( * p_token )
75 {
76     assert ( p_child1 != 0 );
77     AddNode ( p_child1 );
78     assert ( p_child2 != 0 );
79     AddNode ( p_child2 );
80 }
81 
AST(const Token * p_token,AST * p_child1,AST * p_child2,AST * p_child3)82 AST :: AST ( const Token * p_token, AST * p_child1, AST * p_child2, AST * p_child3 )
83 : ParseTree ( * p_token )
84 {
85     assert ( p_child1 != 0 );
86     AddNode ( p_child1 );
87     assert ( p_child2 != 0 );
88     AddNode ( p_child2 );
89     assert ( p_child3 != 0 );
90     AddNode ( p_child3 );
91 }
92 
AST(const Token * p_token,AST * p_child1,AST * p_child2,AST * p_child3,AST * p_child4,AST * p_child5)93 AST :: AST ( const Token * p_token,
94              AST * p_child1,
95              AST * p_child2,
96              AST * p_child3,
97              AST * p_child4,
98              AST * p_child5 )
99 : ParseTree ( * p_token )
100 {
101     assert ( p_child1 != 0 );
102     AddNode ( p_child1 );
103     assert ( p_child2 != 0 );
104     AddNode ( p_child2 );
105     assert ( p_child3 != 0 );
106     AddNode ( p_child3 );
107     assert ( p_child4 != 0 );
108     AddNode ( p_child4 );
109     assert ( p_child5 != 0 );
110     AddNode ( p_child5 );
111 }
112 
AST(const Token * p_token,AST * p_child1,AST * p_child2,AST * p_child3,AST * p_child4)113 AST :: AST ( const Token * p_token,
114              AST * p_child1,
115              AST * p_child2,
116              AST * p_child3,
117              AST * p_child4 )
118 : ParseTree ( * p_token )
119 {
120     assert ( p_child1 != 0 );
121     AddNode ( p_child1 );
122     assert ( p_child2 != 0 );
123     AddNode ( p_child2 );
124     assert ( p_child3 != 0 );
125     AddNode ( p_child3 );
126     assert ( p_child4 != 0 );
127     AddNode ( p_child4 );
128 }
129 
AST(const Token * p_token,AST * p_child1,AST * p_child2,AST * p_child3,AST * p_child4,AST * p_child5,AST * p_child6)130 AST :: AST ( const Token * p_token,
131              AST * p_child1,
132              AST * p_child2,
133              AST * p_child3,
134              AST * p_child4,
135              AST * p_child5,
136              AST * p_child6 )
137 : ParseTree ( * p_token )
138 {
139     assert ( p_child1 != 0 );
140     AddNode ( p_child1 );
141     assert ( p_child2 != 0 );
142     AddNode ( p_child2 );
143     assert ( p_child3 != 0 );
144     AddNode ( p_child3 );
145     assert ( p_child4 != 0 );
146     AddNode ( p_child4 );
147     assert ( p_child5 != 0 );
148     AddNode ( p_child5 );
149     assert ( p_child6 != 0 );
150     AddNode ( p_child6 );
151 }
152 
153 void
AddNode(AST * p_child)154 AST :: AddNode ( AST * p_child )
155 {
156     AddChild ( p_child );
157 }
158 
159 void
AddNode(const Token * p_child)160 AST :: AddNode ( const Token * p_child )
161 {
162     AddChild ( new AST ( p_child ) );
163 }
164 
165 // AST_FQN
166 
AST_FQN(const Token * p_token)167 AST_FQN :: AST_FQN ( const Token* p_token )
168 :   AST ( p_token ),
169     m_version ( 0 )
170 {
171     assert ( p_token -> GetType () == PT_IDENT );
172 }
173 
174 uint32_t
NamespaceCount() const175 AST_FQN :: NamespaceCount() const
176 {
177     uint32_t count = ChildrenCount ();
178     return count > 0 ? ChildrenCount () - 1 : 0;
179 }
180 
181 void
GetIdentifier(String & p_str) const182 AST_FQN :: GetIdentifier ( String & p_str ) const
183 {
184     uint32_t count = ChildrenCount ();
185     if ( count > 0 )
186     {
187         StringInitCString ( & p_str, GetChild ( count - 1 ) -> GetTokenValue () );
188     }
189     else
190     {
191         CONST_STRING ( & p_str, "" );
192     }
193 }
194 
195 void
GetFullName(char * p_buf,size_t p_bufSize) const196 AST_FQN :: GetFullName ( char* p_buf, size_t p_bufSize ) const
197 {
198     GetPartialName ( p_buf, p_bufSize, ChildrenCount () );
199 }
200 
201 void
GetPartialName(char * p_buf,size_t p_bufSize,uint32_t p_lastMember) const202 AST_FQN :: GetPartialName ( char* p_buf, size_t p_bufSize, uint32_t p_lastMember ) const
203 {
204     uint32_t count = ChildrenCount ();
205     if ( p_lastMember < count )
206     {
207         count = p_lastMember + 1;
208     }
209     size_t offset = 0;
210     for ( uint32_t i = 0 ; i < count; ++ i )
211     {
212         size_t num_writ;
213         rc_t rc = string_printf ( p_buf + offset, p_bufSize - offset - 1, & num_writ, "%s%s",
214                                   GetChild ( i ) -> GetTokenValue (),
215                                   i == count - 1 ? "" : ":" );
216         offset += num_writ;
217         if ( rc != 0 )
218         {
219             break;
220         }
221     }
222 
223     p_buf [ p_bufSize - 1 ] = 0;
224 }
225 
226 void
SetVersion(const char * p_version)227 AST_FQN :: SetVersion ( const char* p_version )
228 {   // assume the token comes from a scanner which guarantees correctness
229     assert ( p_version != 0 );
230     assert ( p_version [ 0 ] == '#' );
231     const char* str = p_version + 1;
232     uint32_t len = string_measure ( str, 0 );
233     const char *dot = string_chr ( str, len, '.' );
234     m_version = strtou32 ( str, 0, 10 ) << 24;
235     if ( dot != 0 )
236     {
237         str = dot + 1;
238         len = string_measure ( str, 0 );
239         dot = string_chr ( str, len, '.' );
240         m_version |= strtou32 ( str, 0, 10 ) << 16;
241         if ( dot != 0 )
242         {
243             m_version |= strtou32 ( dot + 1, 0, 10 );
244         }
245     }
246 }
247 
248 AST_FQN *
ToFQN(AST * p_ast)249 ncbi :: SchemaParser :: ToFQN ( AST * p_ast)
250 {
251     assert ( p_ast != 0 );
252     AST_FQN * ret = dynamic_cast < AST_FQN * > ( p_ast );
253     assert ( ret != 0 );
254     return ret;
255 }
256 
257 const AST_FQN *
ToFQN(const AST * p_ast)258 ncbi :: SchemaParser :: ToFQN ( const AST * p_ast)
259 {
260     assert ( p_ast != 0 );
261     const AST_FQN * ret = dynamic_cast < const AST_FQN * > ( p_ast );
262     assert ( ret != 0 );
263     return ret;
264 }
265 
266 // AST_Expr
267 
AST_Expr(const Token * p_token)268 AST_Expr :: AST_Expr ( const Token* p_token )
269 : AST ( p_token )
270 {
271 }
272 
AST_Expr(AST_FQN * p_fqn)273 AST_Expr :: AST_Expr ( AST_FQN* p_fqn )
274 : AST ( & p_fqn -> GetToken () )
275 {
276     AddNode ( p_fqn );
277 }
278 
AST_Expr(AST_Expr * p_fqn)279 AST_Expr :: AST_Expr ( AST_Expr* p_fqn )
280 {
281     AddNode ( p_fqn );
282 }
283 
AST_Expr(Token::TokenType p_type)284 AST_Expr :: AST_Expr ( Token :: TokenType p_type )    // '@' etc
285 {
286     SetToken ( Token ( p_type ) );
287 }
288 
289 SExpression *
EvaluateConst(ASTBuilder & p_builder) const290 AST_Expr :: EvaluateConst ( ASTBuilder & p_builder ) const
291 {
292     SExpression * ret = MakeExpression ( p_builder );
293     if ( ret != 0 )
294     {
295         switch ( ret -> var )
296         {
297         case eConstExpr:
298             break;
299         case eVectorExpr:
300             // MakeVectorConstant() makes sure all elements are const
301             break;
302         default:
303             p_builder . ReportError ( GetLocation (), "Not a constant expression" );
304             SExpressionWhack ( ret );
305             ret = 0;
306             break;
307         }
308     }
309     return ret;
310 }
311 
312 static
SSymExprMake(ASTBuilder & p_builder,uint32_t p_type,const KSymbol * p_sym)313 SExpression * SSymExprMake ( ASTBuilder & p_builder, uint32_t p_type, const KSymbol* p_sym )
314 {
315     SSymExpr *x = p_builder . Alloc < SSymExpr > ();
316     if ( x == 0 )
317     {
318         return 0;
319     }
320 
321     x -> dad . var = p_type;
322     atomic32_set ( & x -> dad . refcount, 1 );
323     x -> _sym = p_sym;
324     x -> alt = false;
325 
326     return & x -> dad;
327 }
328 
329 SExpression *
MakeSymExpr(ASTBuilder & p_builder,const KSymbol * p_sym) const330 AST_Expr :: MakeSymExpr ( ASTBuilder & p_builder, const KSymbol* p_sym ) const
331 {
332     if ( p_sym != 0 )
333     {
334         switch ( p_sym -> type )
335         {
336         case eSchemaParam :
337         case eFactParam :
338             return SSymExprMake ( p_builder, eIndirectExpr, p_sym );
339         case eProduction:
340             return SSymExprMake ( p_builder, eProdExpr, p_sym );
341         case eFuncParam :
342             return SSymExprMake ( p_builder, eParamExpr, p_sym );
343         case eColumn :
344             return SSymExprMake ( p_builder, eColExpr, p_sym );
345         case ePhysMember:
346             return SSymExprMake ( p_builder, ePhysExpr, p_sym );
347         case eForward:
348         case eVirtual:
349             return SSymExprMake ( p_builder, eFwdExpr, p_sym );
350         case eConstant:
351         {
352             const SConstant * cnst = reinterpret_cast < const SConstant * > ( p_sym -> u . obj );
353             assert ( cnst -> expr != NULL );
354             atomic32_inc ( & const_cast < SExpression * > ( cnst -> expr ) -> refcount );
355             return const_cast < SExpression * > ( cnst -> expr );
356         }
357         case eFunction :
358             p_builder . ReportError ( GetLocation (), "Function expressions are not yet implemented" );
359             break;
360         default:
361             p_builder . ReportError ( GetLocation (), "Object cannot be used in this context", p_sym -> name );
362             break;
363         }
364     }
365     return 0;
366 }
367 
368 /* hex_to_int
369  *  where 'c' is known to be hex
370  */
371 static
hex_to_int(char c)372 unsigned int CC hex_to_int ( char c )
373 {
374     int i = c - '0';
375     if ( c > '9' )
376     {
377         if ( c < 'a' )
378             i = c - 'A' + 10;
379         else
380             i = c - 'a' + 10;
381     }
382 
383     assert ( i >= 0 && i < 16 );
384     return i;
385 }
386 
387 SExpression *
MakeUnsigned(ASTBuilder & p_builder) const388 AST_Expr :: MakeUnsigned ( ASTBuilder & p_builder ) const
389 {
390     assert ( GetTokenType () == PT_UINT );
391     SConstExpr * x = p_builder . Alloc < SConstExpr > ( sizeof * x - sizeof x -> u + sizeof x -> u . u64 [ 0 ] );
392     if ( x != 0 )
393     {
394         assert ( ChildrenCount () == 1 );
395         const char * val = GetChild ( 0 ) -> GetTokenValue ();
396         uint64_t i64 = 0;
397         switch ( GetChild ( 0 ) -> GetTokenType () )
398         {
399         case DECIMAL:
400             {
401                 uint32_t i = 0;
402                 while ( val [ i ] != 0 )
403                 {
404                     i64 *= 10;
405                     i64 += val [ i ] - '0';
406                     ++ i;
407                 }
408             }
409             break;
410         case HEX:
411             {
412                 uint32_t i = 2;
413                 while ( val [ i ] != 0 )
414                 {
415                     i64 <<= 4;
416                     i64 += hex_to_int ( val [ i ] );
417                     ++ i;
418                 }
419             }
420             break;
421         case OCTAL:
422             {
423                 uint32_t i = 1;
424                 while ( val [ i ] != 0 )
425                 {
426                     i64 <<= 3;
427                     i64 += val [ i ] - '0';
428                     ++ i;
429                 }
430             }
431             break;
432         default:
433             assert ( 0 );
434         }
435 
436         x -> u . u64 [ 0 ] = i64;
437         x -> dad . var = eConstExpr;
438         atomic32_set ( & x -> dad . refcount, 1 );
439         x -> td . type_id = p_builder . IntrinsicTypeId ( "U64" );
440         x -> td . dim = 1;
441 
442         return & x -> dad;
443     }
444     return 0;
445 }
446 
447 SExpression *
MakeFloat(ASTBuilder & p_builder) const448 AST_Expr :: MakeFloat ( ASTBuilder & p_builder ) const
449 {
450     assert ( GetTokenType () == FLOAT_ || GetTokenType () == EXP_FLOAT );
451     SConstExpr * x = p_builder . Alloc < SConstExpr > ( sizeof * x - sizeof x -> u + sizeof x -> u . u64 [ 0 ] );
452     if ( x != 0 )
453     {
454         const char * val = GetTokenValue ();
455         char * end;
456         double f64 = strtod ( val, & end );
457         if ( ( end - val ) != ( int ) string_size ( val ) )
458         {
459             p_builder . ReportError ( GetLocation (), "Invalid floating point constant" );
460             return 0;
461         }
462 
463         x -> u . f64 [ 0 ] = f64;
464         x -> dad . var = eConstExpr;
465         atomic32_set ( & x -> dad . refcount, 1 );
466         x -> td . type_id = p_builder . IntrinsicTypeId ( "F64" );
467         x -> td . dim = 1;
468 
469         return & x -> dad;
470     }
471     return 0;
472 }
473 
474 SExpression *
MakeString(ASTBuilder & p_builder) const475 AST_Expr :: MakeString ( ASTBuilder & p_builder ) const
476 {
477     assert ( GetTokenType () == STRING );
478     const char * val = GetTokenValue ();
479     size_t size = string_size ( val ) - 2; // minus quotes
480     SConstExpr * x = p_builder . Alloc < SConstExpr > ( sizeof * x - sizeof x -> u + size + 1 );
481     if ( x != 0 )
482     {
483         string_copy ( x -> u . utf8, size + 1, val + 1, size ); // add 1 for NUL
484         x -> dad . var = eConstExpr;
485         atomic32_set ( & x -> dad . refcount, 1 );
486         x -> td . type_id = p_builder . IntrinsicTypeId ( "ascii" );
487         x -> td . dim = ( uint32_t ) size;
488 
489         return & x -> dad;
490     }
491     return 0;
492 }
493 
494 SExpression *
MakeEscapedString(ASTBuilder & p_builder) const495 AST_Expr :: MakeEscapedString ( ASTBuilder & p_builder ) const
496 {
497     assert ( GetTokenType () == ESCAPED_STRING );
498     const char * val = GetTokenValue ();
499     size_t size = string_size ( val ) - 2; // minus quotes
500     SConstExpr * x = p_builder . Alloc < SConstExpr > ( sizeof * x - sizeof x -> u + size + 1 );
501     if ( x != 0 )
502     {
503         char * buffer = x -> u . utf8;
504         uint32_t j = 0 ;
505         uint32_t i = 1; // skip the opening quote
506         while ( i <= size )
507         {
508             if ( val [ i ] == '\\' )
509             {
510                 ++ i;
511                 if ( i > size )
512                 {
513                     break;
514                 }
515 
516                 switch ( val [ i ] )
517                 {
518                     /* control characters */
519                 case 'n':
520                     buffer [ j ] = '\n';
521                     break;
522                 case 't':
523                     buffer [ j ] = '\t';
524                     break;
525                 case 'r':
526                     buffer [ j ] = '\r';
527                     break;
528                 case '0':
529                     buffer [ j ] = '\0';
530                     break;
531 
532                 case 'a':
533                     buffer [ j ] = '\a';
534                     break;
535                 case 'b':
536                     buffer [ j ] = '\b';
537                     break;
538                 case 'v':
539                     buffer [ j ] = '\v';
540                     break;
541                 case 'f':
542                     buffer [ j ] = '\f';
543                     break;
544 
545                 case 'x': case 'X':
546                     /* expect 2 additional hex characters */
547                     if ( ( i + 2 ) < size &&
548                         isxdigit ( val [ i + 1 ] ) &&
549                         isxdigit ( val [ i + 2 ] ) )
550                     {
551                         /* go ahead and convert */
552                         buffer [ j ] = ( char )
553                             ( ( hex_to_int ( val [ i + 1 ] ) << 4 ) |
554                                 hex_to_int ( val [ i + 2 ] ) );
555                         i += 2;
556                         break;
557                     }
558                     /* no break */
559 
560                     /* just quote self */
561                 default:
562                     buffer [ j ] = val [ i ];
563                 }
564             }
565             else
566             {
567                 buffer [ j ] = val [ i ];
568             }
569 
570             ++ j;
571             ++ i;
572         }
573         buffer [ j ] = 0;
574 
575         x -> dad . var = eConstExpr;
576         atomic32_set ( & x -> dad . refcount, 1 );
577         x -> td . type_id = p_builder . IntrinsicTypeId ( "ascii" );
578         x -> td . dim = j;
579 
580         return & x -> dad;
581     }
582     return 0;
583 }
584 
585 SExpression *
MakeVectorConstant(ASTBuilder & p_builder) const586 AST_Expr :: MakeVectorConstant ( ASTBuilder & p_builder ) const
587 {
588     assert ( GetTokenType () == PT_CONSTVECT );
589     SVectExpr * x = p_builder . Alloc < SVectExpr > ();
590     if ( x != 0 )
591     {
592         x -> dad . var = eVectorExpr;
593         atomic32_set ( & x -> dad . refcount, 1 );
594         VectorInit ( & x -> expr, 0, 16 );
595 
596         assert ( ChildrenCount () == 1 );
597         const AST & values = * GetChild ( 0 );
598         uint32_t count = values . ChildrenCount ();
599         bool good = true;
600         for ( uint32_t i = 0 ; i != count; ++i )
601         {
602             SExpression * vx = ToExpr ( values . GetChild ( i ) ) -> EvaluateConst ( p_builder );
603             if ( vx == 0 )
604             {
605                 good = false;
606                 break;
607             }
608             if ( vx -> var == eVectorExpr )
609             {
610                 p_builder . ReportError ( GetLocation (), "Nested vector constants are not allowed" );
611                 good = false;
612                 break;
613             }
614             if ( ! p_builder . VectorAppend ( x -> expr, 0, vx ) )
615             {
616                 SExpressionWhack ( vx );
617                 good = false;
618                 break;
619             }
620         }
621 
622         if ( good )
623         {
624             return & x -> dad;
625         }
626 
627         SExpressionWhack ( & x -> dad );
628     }
629     return 0;
630 }
631 
632 SExpression *
MakeBool(ASTBuilder & p_builder) const633 AST_Expr :: MakeBool ( ASTBuilder & p_builder ) const
634 {
635     SConstExpr * x = p_builder . Alloc < SConstExpr > ( sizeof * x - sizeof x -> u + sizeof x -> u . b [ 0 ] );
636     if ( x != 0 )
637     {
638         x -> u . b [ 0 ] = GetTokenType () == KW_true;
639         x -> dad . var = eConstExpr;
640         atomic32_set ( & x -> dad . refcount, 1 );
641         x -> td . type_id = p_builder . IntrinsicTypeId ( "bool" );
642         x -> td . dim = 1;
643 
644         return & x -> dad;
645     }
646     return 0;
647 }
648 
649 SExpression *
MakeNegate(ASTBuilder & p_builder) const650 AST_Expr :: MakeNegate ( ASTBuilder & p_builder ) const
651 {
652     assert ( GetTokenType () == PT_NEGATE );
653     assert ( ChildrenCount () == 1 );
654 
655     SExpression * xp = ToExpr ( GetChild ( 0 ) ) -> MakeExpression ( p_builder );
656     if ( xp != 0 )
657     {
658         switch ( xp -> var )
659         {
660         case eConstExpr:
661             {
662                 SConstExpr * cx = reinterpret_cast < SConstExpr * > ( xp );
663                 if ( cx -> td . dim < 2 )
664                 {
665                     const SDatatype *dt = VSchemaFindTypeid ( p_builder . GetSchema(), cx -> td . type_id );
666                     if ( dt != NULL )
667                     {
668                         static atomic32_t s_I8_id;
669                         static atomic32_t s_I16_id;
670                         static atomic32_t s_I32_id;
671                         static atomic32_t s_I64_id;
672 
673                         switch ( dt -> domain )
674                         {
675                         case vtdUint:
676                             switch ( dt -> size )
677                             {
678                             case 8:
679                                 cx -> td . type_id  = VSchemaCacheIntrinsicTypeId ( p_builder . GetSchema(), & s_I8_id, "I8" );
680                                 break;
681                             case 16:
682                                 cx -> td . type_id  = VSchemaCacheIntrinsicTypeId ( p_builder . GetSchema(), & s_I16_id, "I16" );
683                                 break;
684                             case 32:
685                                 cx -> td . type_id  = VSchemaCacheIntrinsicTypeId ( p_builder . GetSchema(), & s_I32_id, "I32" );
686                                 break;
687                             case 64:
688                                 cx -> td . type_id  = VSchemaCacheIntrinsicTypeId ( p_builder . GetSchema(), & s_I64_id, "I64" );
689                                 break;
690                             }
691                             /* no break */
692                         case vtdInt:
693                             switch ( dt -> size )
694                             {
695                             case 8:
696                                 cx -> u . i8 [ 0 ] = - cx -> u . i8 [ 0 ];
697                                 break;
698                             case 16:
699                                 cx -> u . i16 [ 0 ] = - cx -> u . i16 [ 0 ];
700                                 break;
701                             case 32:
702                                 cx -> u . i32 [ 0 ] = - cx -> u . i32 [ 0 ];
703                                 break;
704                             case 64:
705                                 cx -> u . i64 [ 0 ] = - cx -> u . i64 [ 0 ];
706                                 break;
707                             }
708                             break;
709                         case vtdFloat:
710                             switch ( dt -> size )
711                             {
712                             case 32:
713                                 cx -> u . f32 [ 0 ] = - cx -> u . f32 [ 0 ];
714                                 break;
715                             case 64:
716                                 cx -> u . f64 [ 0 ] = - cx -> u . f64 [ 0 ];
717                                 break;
718                             }
719                             break;
720                         }
721 
722                         return xp;
723                     }
724                     // const expression of an unknown type, must have reported an error already
725                 }
726                 else
727                 {
728                     p_builder . ReportError ( GetLocation (), "Negation applied to a non-scalar" );
729                 }
730             }
731             break;
732 
733         case eIndirectExpr:
734             {   /* if type is known, at least verify domain */
735                 const SSymExpr * sx = ( const SSymExpr* ) xp;
736                 const SExpression * td = ( ( const SIndirectConst* ) sx -> _sym -> u . obj ) -> td;
737                 if ( td != NULL )
738                 {
739                     const STypeExpr *tx = ( const STypeExpr* ) td;
740                     if ( tx-> dad . var == eTypeExpr && tx -> resolved )
741                     {
742                         /* cannot have formats, but this is verified elsewhere */
743                         if ( tx -> fd . fmt == 0 && tx -> fd . td . dim < 2 )
744                         {
745                             /* determine domain */
746                             const SDatatype *dt = VSchemaFindTypeid ( p_builder . GetSchema(), tx -> fd . td . type_id );
747                             if ( dt != NULL && dt -> domain == vtdUint )
748                             {
749                                 p_builder . ReportError ( GetLocation (), "Negation applied to an unsigned integer" );
750                             }
751                         }
752                     }
753                 }
754 
755                 SUnaryExpr * x = p_builder . Alloc < SUnaryExpr > ();
756                 if ( x != 0 )
757                 {
758                     x -> dad . var = eNegateExpr;
759                     atomic32_set ( & x -> dad . refcount, 1 );
760                     x -> expr = xp;
761                     return & x -> dad;
762                 }
763             }
764             break;
765 
766         default:
767             p_builder . ReportError ( GetLocation (), "Negation applied to a non-const operand" );
768             break;
769         }
770         SExpressionWhack ( xp );
771     }
772 
773     return 0;
774 }
775 
776 SExpression *
MakeCast(ASTBuilder & p_builder) const777 AST_Expr :: MakeCast ( ASTBuilder & p_builder ) const
778 {
779     assert ( GetTokenType () == PT_CASTEXPR );
780     assert ( ChildrenCount () == 2 );
781 
782     STypeExpr * type = p_builder . MakeTypeExpr ( * GetChild ( 0 ) );
783     if ( type != 0 )
784     {
785         SExpression * expr = ToExpr ( GetChild ( 1 ) ) -> MakeExpression ( p_builder );
786         if ( expr != 0 )
787         {
788             SBinExpr * x = p_builder . Alloc < SBinExpr > ();
789             if ( x != 0 )
790             {
791                 x -> dad . var = eCastExpr;
792                 atomic32_set ( & x -> dad . refcount, 1 );
793                 x -> left = & type -> dad;
794                 x -> right = expr;
795                 return & x -> dad;
796             }
797             SExpressionWhack ( expr );
798         }
799         SExpressionWhack ( & type -> dad );
800     }
801 
802     return 0;
803 }
804 
805 static
806 SExpression *
SMembExprMake(ASTBuilder & p_builder,const KSymbol * p_obj,const KSymbol * p_mem,const SExpression * p_rowId)807 SMembExprMake ( ASTBuilder & p_builder, const KSymbol* p_obj, const KSymbol* p_mem, const SExpression * p_rowId )
808 {
809     SMembExpr *x = p_builder . Alloc < SMembExpr > ();
810     if ( x == 0 )
811     {
812         return 0;
813     }
814 
815     x -> dad . var = eMembExpr;
816     atomic32_set ( & x -> dad . refcount, 1 );
817 
818     x -> view = p_builder . GetView ();
819 
820     // link to the corresponding parameter of the current view
821     uint32_t start = VectorStart ( & x -> view -> params );
822     uint32_t count = VectorLength ( & x -> view -> params );
823     for ( uint32_t i = 0; i < count; ++i )
824     {
825         if ( VectorGet ( & x -> view -> params, start + i ) == p_obj )
826         {
827             x -> paramId = start + i;
828             break;
829         }
830     }
831 
832     x -> member = p_mem;
833     x -> rowId = p_rowId;
834 
835     return & x -> dad;
836 }
837 
838 static
839 SExpression *
MakeSMembExpr(ASTBuilder & p_builder,const AST & p_struc,const AST & p_member,const AST_Expr * p_rowId=0)840 MakeSMembExpr ( ASTBuilder & p_builder, const AST & p_struc, const AST & p_member, const AST_Expr * p_rowId = 0 )
841 {
842     assert ( p_struc . GetTokenType () == PT_IDENT );
843     assert ( p_struc . ChildrenCount () == 1 );
844     assert ( p_member . GetTokenType () == PT_IDENT );
845     assert ( p_member . ChildrenCount () == 1 );
846 
847     const KSymbol * sym = p_builder . Resolve ( p_struc . GetChild ( 0 ) -> GetLocation (),
848                                                 p_struc . GetChild ( 0 ) -> GetTokenValue (),
849                                                 true );
850     if ( sym != 0 )
851     {
852         const SExpression * rowId = 0;
853         if ( p_rowId != 0 )
854         {
855             rowId = p_rowId -> MakeExpression ( p_builder );
856             if ( rowId == 0 )
857             {
858                 return 0;
859             }
860         }
861 
862         switch ( sym -> type )
863         {
864         case eTable:
865             {
866                 const STable * t = static_cast < const STable * > ( sym -> u . obj );
867                 // find member . GetChild ( 0 ) in t -> scope
868                 String memName;
869                 StringInitCString ( & memName, p_member . GetChild ( 0 ) -> GetTokenValue () );
870                 const KSymbol * mem = ( const KSymbol* ) BSTreeFind ( & t -> scope, & memName, KSymbolCmp );
871                 if ( mem != 0 )
872                 {
873                     assert ( mem -> type == eColumn || mem -> type == eProduction );
874                     return SMembExprMake ( p_builder, sym, mem, rowId );
875                 }
876                 else
877                 {
878                     p_builder . ReportError ( p_member . GetLocation (), "Column/production not found", memName );
879                 }
880             }
881             break;
882         case eView:
883             {
884                 const SView * v = static_cast < const SView * > ( sym -> u . obj );
885                 // find member . GetChild ( 0 ) in v -> scope
886                 String memName;
887                 StringInitCString ( & memName, p_member . GetChild ( 0 ) -> GetTokenValue () );
888                 const KSymbol * mem = ( const KSymbol* ) BSTreeFind ( & v -> scope, & memName, KSymbolCmp );
889                 if ( mem != 0 )
890                 {
891                     assert ( mem -> type == eColumn || mem -> type == eProduction );
892                     return SMembExprMake ( p_builder, sym, mem, rowId );
893                 }
894                 else
895                 {
896                     p_builder . ReportError ( p_member . GetLocation (), "Column/production not found", memName );
897                 }
898             }
899             break;
900     default:
901             //error
902             break;
903         }
904     }
905 
906     return 0;
907 }
908 
909 SExpression *
MakeMember(ASTBuilder & p_builder) const910 AST_Expr :: MakeMember ( ASTBuilder & p_builder ) const
911 {
912     assert ( GetTokenType () == PT_MEMBEREXPR );
913     assert ( ChildrenCount () == 2 ); // ident, ident
914     return MakeSMembExpr ( p_builder, * GetChild ( 0 ), * GetChild ( 1 ) );
915 }
916 
917 SExpression *
MakeJoin(ASTBuilder & p_builder) const918 AST_Expr :: MakeJoin ( ASTBuilder & p_builder ) const
919 {
920     assert ( GetTokenType () == PT_JOINEXPR );
921     assert ( ChildrenCount () == 3 ); // ident, rowid-expr, ident
922     return MakeSMembExpr ( p_builder,
923                            * GetChild ( 0 ),
924                            * GetChild ( 2 ),
925                            ToExpr ( GetChild ( 1 ) ) );
926 }
927 
928 SExpression *
MakeExpression(ASTBuilder & p_builder) const929 AST_Expr :: MakeExpression ( ASTBuilder & p_builder ) const
930 {
931     switch ( GetTokenType () )
932     {
933     case PT_EMPTY: // expr [ | expr | ... ]
934         {
935             uint32_t count = ChildrenCount ();
936             assert ( count > 0 );
937 
938             SExpression * xp = ToExpr (  GetChild ( 0 ) ) -> MakeExpression ( p_builder );
939             for ( uint32_t i = 0; i < count - 1; ++i )
940             {
941                 SBinExpr * x = p_builder . Alloc < SBinExpr > ();
942                 if ( x == NULL )
943                 {
944                     SExpressionWhack ( xp );
945                     return 0;
946                 }
947                 x -> dad . var = eCondExpr;
948                 atomic32_set ( & x -> dad . refcount, 1 );
949                 x -> left = xp;
950                 x -> right = ToExpr (  GetChild ( i + 1 ) ) -> MakeExpression ( p_builder );
951                 if ( x -> right == 0 )
952                 {
953                     SExpressionWhack ( xp );
954                     return 0;
955                 }
956                 xp = & x -> dad;
957             }
958 
959             return xp;
960         }
961 
962     case PT_UINT:
963         return MakeUnsigned ( p_builder );
964 
965     case FLOAT_:
966     case EXP_FLOAT:
967         return MakeFloat ( p_builder );
968 
969     case STRING:
970         return MakeString ( p_builder );
971     case ESCAPED_STRING:
972         return MakeEscapedString ( p_builder );
973 
974     case PT_CONSTVECT:
975         return MakeVectorConstant ( p_builder );
976 
977     case KW_true:
978     case KW_false:
979         return MakeBool ( p_builder );
980 
981     case PT_IDENT:
982         {
983             const AST_FQN * fqn = ToFQN ( GetChild ( 0 ) );
984             if ( fqn != 0 )
985             {
986                 const KSymbol * sym = p_builder . Resolve ( * fqn, false );
987                 if ( sym != 0 )
988                 {
989                     return MakeSymExpr ( p_builder, sym );
990                 }
991                 else
992                 {   // Resolve() has created all the required namespaces; create the identifier as a forward reference
993                     SSymExpr * x = p_builder . Alloc < SSymExpr > ();
994                     x -> _sym = p_builder . CreateFqnSymbol ( * fqn, eForward, NULL );
995                     if (x -> _sym != 0 )
996                     {
997                         x -> dad . var = eFwdExpr;
998                         atomic32_set ( & x -> dad . refcount, 1 );
999                         x -> alt = false;
1000                         return & x -> dad;
1001                     }
1002                     SExpressionWhack ( & x -> dad );
1003                 }
1004             }
1005         }
1006 
1007     case PHYSICAL_IDENTIFIER_1_0 :
1008         {
1009             const KSymbol * sym = p_builder . Resolve ( GetLocation (), GetTokenValue (), false );
1010             if ( sym != 0 )
1011             {
1012                 return MakeSymExpr ( p_builder, sym );
1013             }
1014             else
1015             {
1016                 SSymExpr * x = p_builder . Alloc < SSymExpr > ();
1017                 x -> _sym = p_builder . CreateConstSymbol ( GetTokenValue (), eForward, NULL );
1018                 if (x -> _sym != 0 )
1019                 {
1020                     x -> dad . var = eFwdExpr;
1021                     atomic32_set ( & x -> dad . refcount, 1 );
1022                     x -> alt = false;
1023                     return & x -> dad;
1024                 }
1025                 SExpressionWhack ( & x -> dad );
1026             }
1027         }
1028         break;
1029 
1030     case '@':
1031         return MakeSymExpr ( p_builder, p_builder . Resolve ( GetLocation (), "@" ) );
1032 
1033     case PT_FUNCEXPR:
1034         {   // schema_parms_opt fqn_opt_vers factory_parms_opt func_parms_opt
1035             assert ( ChildrenCount () == 4 );
1036 
1037             SFuncExpr * fx = p_builder . Alloc < SFuncExpr > ();
1038             if ( fx != 0 )
1039             {
1040                 /* initialize */
1041                 fx -> dad . var = eFuncExpr;
1042                 atomic32_set ( & fx -> dad . refcount, 1 );
1043                 fx -> func = NULL;
1044                 VectorInit ( & fx -> schem, 0, 4 );
1045                 VectorInit ( & fx -> pfact, 0, 8 );
1046                 VectorInit ( & fx -> pfunc, 0, 8 );
1047                 fx -> version = 0;
1048                 fx -> version_requested = false;
1049                 fx -> untyped = false;
1050 
1051                 if ( p_builder . FillSchemaParms ( * GetChild ( 0 ), fx -> schem ) &&
1052                      p_builder . FillFactoryParms ( * GetChild ( 2 ), fx -> pfact ) &&
1053                      p_builder . FillArguments ( * GetChild ( 3 ), fx -> pfunc) )
1054                 {
1055                     assert ( GetChild ( 1 ) -> GetTokenType () == PT_IDENT );
1056                     const AST_FQN & fqn = * ToFQN ( GetChild ( 1 ) );
1057                     const KSymbol * sym = p_builder . Resolve ( fqn, true );
1058                     if ( sym != 0 )
1059                     {
1060                         const SNameOverload * vf = static_cast < const SNameOverload * > ( sym -> u . obj );
1061                         switch ( vf -> name -> type )
1062                         {
1063                         case eScriptFunc:
1064                             fx -> dad . var = eScriptExpr;
1065                             // fall through
1066                         case eFunction:
1067                             fx -> func = static_cast < const SFunction * > ( p_builder . SelectVersion ( fqn, * sym, SFunctionCmp, & fx -> version ) );
1068                             if ( fx -> func != 0 )
1069                             {
1070                                 fx -> version_requested = fx -> version != 0;
1071                                 return & fx -> dad;
1072                             }
1073                             break;
1074 
1075                         default:
1076                             p_builder . ReportError ( "Not a function", fqn );
1077                             break;
1078                         }
1079                     }
1080                 }
1081                 SExpressionWhack ( & fx -> dad );
1082             }
1083         }
1084         break;
1085 
1086     case PT_NEGATE:
1087         return MakeNegate ( p_builder );
1088 
1089     case PT_CASTEXPR:
1090         return MakeCast ( p_builder );
1091 
1092     case PT_MEMBEREXPR:
1093         return MakeMember ( p_builder );
1094 
1095     case PT_JOINEXPR:
1096         return MakeJoin ( p_builder );
1097 
1098     default:
1099         p_builder . ReportError ( GetLocation (), "Not yet implemented" );
1100         break;
1101     }
1102     return 0;
1103 }
1104 
1105 AST_Expr  *
ToExpr(AST * p_ast)1106 ncbi :: SchemaParser :: ToExpr ( AST * p_ast)
1107 {
1108     assert ( p_ast != 0 );
1109     AST_Expr * ret = dynamic_cast < AST_Expr * > ( p_ast );
1110     assert ( ret != 0 );
1111     return ret;
1112 }
1113 
1114 const AST_Expr *
ToExpr(const AST * p_ast)1115 ncbi :: SchemaParser :: ToExpr ( const AST * p_ast)
1116 {
1117     assert ( p_ast != 0 );
1118     const AST_Expr * ret = dynamic_cast < const AST_Expr * > ( p_ast );
1119     assert ( ret != 0 );
1120     return ret;
1121 }
1122