1 /*-----------------------------------------------------------------------
2 
3 File  : cio_scanner.c
4 
5 Author: Stephan Schulz
6 
7 Contents
8 
9   Implementation of the scanner.
10 
11   Copyright 1998, 1999 by the author.
12   This code is released under the GNU General Public Licence and
13   the GNU Lesser General Public License.
14   See the file COPYING in the main E directory for details.
15   Run "eprover -h" for contact information.
16 
17 Changes
18 
19 <1> Sun Aug 31 13:31:42 MET DST 1997
20     New
21 
22 -----------------------------------------------------------------------*/
23 
24 #include "cio_scanner.h"
25 
26 
27 
28 /*---------------------------------------------------------------------*/
29 /*                        Global Variables                             */
30 /*---------------------------------------------------------------------*/
31 
32 static TokenRepCell token_print_rep[] =
33 {
34    {NoToken,      "No token (probably EOF)"},
35    {WhiteSpace,   "White space (spaces, tabs, newlines...)"},
36    {Comment,      "Comment"},
37    {Ident,        "Identifier not terminating in a number"},
38    {Idnum,        "Identifier terminating in a number"},
39    {SemIdent,     "Interpreted function/predicate name ('$name')"},
40    {String,       "String enclosed in double quotes (\"\")"},
41    {SQString,     "String enclosed in single quote ('')"},
42    {PosInt,       "Integer (sequence of decimal digits) "
43                   "convertable to an 'unsigned long'"},
44    /* May need LargePosInt here... */
45    {OpenBracket,  "Opening bracket ('(')"},
46    {CloseBracket, "Closing bracket (')')"},
47    {OpenCurly,    "Opening curly brace ('{')"},
48    {CloseCurly,   "Closing curly brace ('}')"},
49    {OpenSquare,   "Opening square brace ('[')"},
50    {CloseSquare,  "Closing square brace (']')"},
51    {LesserSign,   "\"Lesser than\" sign ('<')"},
52    {GreaterSign,  "\"Greater than\" sign ('>')"},
53    {EqualSign,    "Equal Predicate/Sign ('=')"},
54    {NegEqualSign, "Negated Equal Predicate ('!=')"},
55    {TildeSign,    "Tilde ('~')"},
56    {Exclamation,  "Exclamation mark ('!')"},
57    {QuestionMark, "Question mark ('?')"},
58    {Comma,        "Comma (',')"},
59    {Semicolon,    "Semicolon (';')"},
60    {Colon,        "Colon (':')"},
61    {Hyphen,       "Hyphen ('-')"},
62    {Plus,         "Plus sign ('+')"},
63    {Mult,         "Multiplication sign ('*')"},
64    {Fullstop,     "Fullstop ('.')"},
65    {Dollar,       "Dollar sign ('$')"},
66    {Slash,        "Slash ('/')"},
67    {Pipe,         "Vertical bar ('|')"},
68    {Ampersand,    "Ampersand ('&')"},
69    {FOFLRImpl,    "Implication/LRArrow ('=>')"},
70    {FOFRLImpl,    "Back Implicatin/RLArrow ('<=')"},
71    {FOFEquiv,     "Equivalence/Double arrow ('<=>')"},
72    {FOFXor,       "Negated Equivalence/Xor ('<~>')"},
73    {FOFNand,      "Nand ('~&')"},
74    {FOFNor,       "Nor ('~|'')"},
75    {NoToken,      NULL}
76 
77 };
78 
79 /*---------------------------------------------------------------------*/
80 /*                      Forward Declarations                           */
81 /*---------------------------------------------------------------------*/
82 
83 
84 /*---------------------------------------------------------------------*/
85 /*                         Internal Functions                          */
86 /*---------------------------------------------------------------------*/
87 
88 
89 /*-----------------------------------------------------------------------
90 //
91 // Function: scan_white()
92 //
93 //   Scan a continous sequence of white space characters.
94 //
95 // Global Variables: -
96 //
97 // Side Effects    : Reads input, changes the scanner state, may cause
98 //                   memory operations by by calling DStrAppendChar().
99 //
100 /----------------------------------------------------------------------*/
101 
scan_white(Scanner_p in)102 static void scan_white(Scanner_p in)
103 {
104    AktToken(in)->tok = WhiteSpace;
105    while(isspace(CurrChar(in)))
106    {
107       DStrAppendChar(AktToken(in)->literal, CurrChar(in));
108       NextChar(in);
109    }
110 }
111 
112 
113 /*-----------------------------------------------------------------------
114 //
115 // Function: scan_ident()
116 //
117 //   Scan an identifier, d.h. an ident or an idnum. Also used for
118 //   completing SemIdents.
119 //
120 // Global Variables: -
121 //
122 // Side Effects    : As for scan_white()
123 //
124 /----------------------------------------------------------------------*/
125 
scan_ident(Scanner_p in)126 static void scan_ident(Scanner_p in)
127 {
128    long numstart = 0,
129         i;
130 
131    for(i=0; isidchar(CurrChar(in)); i++)
132    {
133       if(!numstart && isdigit(CurrChar(in)))
134       {
135     numstart = i;
136       }
137       else if(!isdigit(CurrChar(in)))
138       {
139     numstart = 0;
140       }
141       DStrAppendChar(AktToken(in)->literal, CurrChar(in));
142       NextChar(in);
143    }
144    if(numstart)
145    {
146       AktToken(in)->tok = Idnum;
147       AktToken(in)->numval =
148     strtol(DStrView(AktToken(in)->literal)+numstart, NULL, 10);
149       /* Errors are intentionally ignored to allow arbitrary
150     identifiers */
151    }
152    else
153    {
154       AktToken(in)->tok = Ident;
155       AktToken(in)->numval = 0;
156    }
157 }
158 
159 
160 /*-----------------------------------------------------------------------
161 //
162 // Function: void scan_int()
163 //
164 //   Scan an unsigned integer, i.e. a sequence of digits. If this
165 //   cannot be parsed as an int, it will be interpreted as an
166 //   identifier.
167 //
168 // Global Variables: -
169 //
170 // Side Effects    : As for scan_white(), error if int is to big for
171 //                   unsigned long
172 //
173 /----------------------------------------------------------------------*/
174 
scan_int(Scanner_p in)175 static void scan_int(Scanner_p in)
176 {
177    AktToken(in)->tok = PosInt;
178 
179    while(isdigit(CurrChar(in)))
180    {
181       DStrAppendChar(AktToken(in)->literal, CurrChar(in));
182       NextChar(in);
183    }
184    errno = 0;
185    AktToken(in)->numval =
186       strtol(DStrView(AktToken(in)->literal), NULL, 10);
187    /* strtoul is not available on all systems....*/
188 
189    if(errno)
190    {
191       static char buff[10];
192       char* term=strncpy(buff, DStrView(AktToken(in)->literal), 9);
193       *term = '\0';
194       strtol(buff, NULL, 10);
195       Warning("Number truncated while reading %s. If this happens on 32 bit systems while parsing internal strings, it is harmless an can be ignored",  DStrView(AktToken(in)->literal));
196    }
197 }
198 
199 
200 /*-----------------------------------------------------------------------
201 //
202 // Function: scan_line_comment()
203 //
204 //   Scan a comment starting with # or %.
205 //
206 // Global Variables: -
207 //
208 // Side Effects    : As scan_white()
209 //
210 /----------------------------------------------------------------------*/
211 
scan_line_comment(Scanner_p in)212 static void scan_line_comment(Scanner_p in)
213 {
214    AktToken(in)->tok = Comment;
215 
216    while((CurrChar(in)) != '\n' && (CurrChar(in)!=EOF))
217    {
218       DStrAppendChar(AktToken(in)->literal, CurrChar(in));
219       NextChar(in);
220    }
221    DStrAppendChar(AktToken(in)->literal, '\n');
222    NextChar(in); /* Should be harmless even at EOF */
223 }
224 
225 
226 /*-----------------------------------------------------------------------
227 //
228 // Function: void scan_C_comment()
229 //
230 //   Scan a comment in C-Style.
231 //
232 // Global Variables: -
233 //
234 // Side Effects    : As scan_white()
235 //
236 /----------------------------------------------------------------------*/
237 
scan_C_comment(Scanner_p in)238 static void scan_C_comment(Scanner_p in)
239 {
240    AktToken(in)->tok = Comment;
241 
242    while(!((CurrChar(in) == '*') && (LookChar(in,1) == '/')))
243    {
244       DStrAppendChar(AktToken(in)->literal, CurrChar(in));
245       NextChar(in);
246    }
247    DStrAppendChar(AktToken(in)->literal, CurrChar(in));
248    NextChar(in);
249    DStrAppendChar(AktToken(in)->literal, CurrChar(in));
250    NextChar(in);
251 }
252 
253 
254 /*-----------------------------------------------------------------------
255 //
256 // Function: scan_string()
257 //
258 //   Scan a string (enclosed in "" or ''). Only the value of the strings,
259 //   not the delimiter is stored.
260 //
261 // Global Variables: -
262 //
263 // Side Effects    : As scan_white()
264 //
265 /----------------------------------------------------------------------*/
266 
scan_string(Scanner_p in,char delim)267 static void scan_string(Scanner_p in, char delim)
268 {
269    bool escape = false;
270 
271    AktToken(in)->tok = (delim=='\'')?SQString:String;
272 
273    DStrAppendChar(AktToken(in)->literal, CurrChar(in));
274    NextChar(in);
275    while(escape || (CurrChar(in) != delim))
276    {
277       if(!isprint(CurrChar(in)))
278       {
279     AktTokenError(in,
280              "Non-printable character in string constant",
281              false);
282       }
283       if(CurrChar(in)=='\\')
284       {
285          escape = !escape;
286       }
287       else
288       {
289          escape = false;
290       }
291       DStrAppendChar(AktToken(in)->literal, CurrChar(in));
292       NextChar(in);
293    }
294    DStrAppendChar(AktToken(in)->literal, CurrChar(in));
295    NextChar(in);
296 }
297 
298 
299 /*-----------------------------------------------------------------------
300 //
301 // Function: scan_token()
302 //
303 //   Scans a token into AktToken(in). Does _not_ move the
304 //   AktToken-pointer - this is done only for real (i.e. non white,
305 //   non-comment) tokens in the function NextToken().
306 //   The function assumes that *AktToken(in) is an initialized
307 //   TokenCell which does not contain any outside references.
308 //
309 //
310 // Global Variables: -
311 //
312 // Side Effects    : Reads input, may cause memory operations via
313 //                   DStr-Functions.
314 //
315 /----------------------------------------------------------------------*/
316 
scan_token(Scanner_p in)317 static Token_p scan_token(Scanner_p in)
318 {
319    DStrReset(AktToken(in)->literal);
320    DStrReleaseRef(AktToken(in)->source);
321 
322    AktToken(in)->source      = DStrGetRef(Source(in));
323    AktToken(in)->stream_type = SourceType(in);
324    AktToken(in)->line        = CurrLine(in);
325    AktToken(in)->column      = CurrColumn(in);
326 
327    if(!ischar(CurrChar(in)))
328    {
329       AktToken(in)->tok = NoToken;
330    }
331    else if(isspace(CurrChar(in)))
332    {
333       scan_white(in);
334    }
335    else if(isstartidchar(CurrChar(in)))
336    {
337       scan_ident(in);
338    }
339    else if(isdigit(CurrChar(in)))
340    {
341       scan_int(in);
342    }
343    else if(isstartcomment(CurrChar(in)))
344    {
345       scan_line_comment(in);
346    }
347    else if(CurrChar(in)=='/' && LookChar(in,1) == '*')
348    {
349       scan_C_comment(in);
350    }
351    else if((CurrChar(in)=='"') || (CurrChar(in)=='\''))
352    {
353       scan_string(in, CurrChar(in));
354    }
355    else if((CurrChar(in)=='$') && isidchar(LookChar(in,1)))
356    {
357       DStrAppendChar(AktToken(in)->literal, CurrChar(in));
358       NextChar(in);
359       scan_ident(in);
360       AktToken(in)->tok = SemIdent;
361    }
362    else
363    {
364       switch(CurrChar(in))
365       {
366       case '(':
367             AktToken(in)->tok = OpenBracket;
368             break;
369       case ')':
370             AktToken(in)->tok = CloseBracket;
371             break;
372       case '{':
373             AktToken(in)->tok = OpenCurly;
374             break;
375       case '}':
376             AktToken(in)->tok = CloseCurly;
377             break;
378       case '[':
379             AktToken(in)->tok = OpenSquare;
380             break;
381       case ']':
382             AktToken(in)->tok = CloseSquare;
383             break;
384       case '<':
385             if((LookChar(in,1) == '~' && (LookChar(in,2) == '>')))
386             {
387                DStrAppendChar(AktToken(in)->literal, CurrChar(in));
388                NextChar(in);
389                DStrAppendChar(AktToken(in)->literal, CurrChar(in));
390                NextChar(in);
391                AktToken(in)->tok = FOFXor;
392             }
393             else if(LookChar(in,1) == '=')
394             {
395                DStrAppendChar(AktToken(in)->literal, CurrChar(in));
396                NextChar(in);
397                if(LookChar(in,1) == '>')
398                {
399                   DStrAppendChar(AktToken(in)->literal, CurrChar(in));
400                   NextChar(in);
401                   AktToken(in)->tok = FOFEquiv;
402                }
403                else
404                {
405                   AktToken(in)->tok = FOFRLImpl;
406                }
407             }
408             else
409             {
410                AktToken(in)->tok = LesserSign;
411                break;
412             }
413             break;
414       case '>':
415             AktToken(in)->tok = GreaterSign;
416             break;
417       case '=':
418             if(LookChar(in,1) == '>')
419             {
420                DStrAppendChar(AktToken(in)->literal, CurrChar(in));
421                NextChar(in);
422                AktToken(in)->tok = FOFLRImpl;
423             }
424             else
425             {
426                AktToken(in)->tok = EqualSign;
427             }
428             break;
429       case '~':
430             switch(LookChar(in,1))
431             {
432             case '|':
433                   DStrAppendChar(AktToken(in)->literal, CurrChar(in));
434                   NextChar(in);
435                   AktToken(in)->tok = FOFNor;
436                   break;
437              case '&':
438                   DStrAppendChar(AktToken(in)->literal, CurrChar(in));
439                   NextChar(in);
440                   AktToken(in)->tok = FOFNand;
441                   break;
442 
443             default:
444                   AktToken(in)->tok = TildeSign;
445                   break;
446             }
447             break;
448       case '!':
449             if(LookChar(in,1) == '=')
450             {
451                DStrAppendChar(AktToken(in)->literal, CurrChar(in));
452                NextChar(in);
453                AktToken(in)->tok = NegEqualSign;
454             }
455             else
456             {
457                AktToken(in)->tok = Exclamation;
458             }
459             break;
460       case '?':
461             AktToken(in)->tok = QuestionMark;
462             break;
463       case ',':
464             AktToken(in)->tok = Comma;
465             break;
466       case ';':
467             AktToken(in)->tok = Semicolon;
468             break;
469       case ':':
470             AktToken(in)->tok = Colon;
471             break;
472       case '-':
473             AktToken(in)->tok = Hyphen;
474             break;
475       case '+':
476             AktToken(in)->tok = Plus;
477             break;
478       case '*':
479             AktToken(in)->tok = Mult;
480             break;
481       case '.':
482             AktToken(in)->tok = Fullstop;
483             break;
484       case '|':
485        AktToken(in)->tok = Pipe;
486        break;
487       case '/':
488        AktToken(in)->tok = Slash;
489        break;
490       case '&':
491        AktToken(in)->tok = Ampersand;
492        break;
493       case '$':
494        AktToken(in)->tok = Dollar;
495        break;
496       default:
497        DStrAppendChar(AktToken(in)->literal, CurrChar(in));
498        AktTokenError(in, "Illegal character", false);
499        break;
500       }
501       DStrAppendChar(AktToken(in)->literal, CurrChar(in));
502       NextChar(in);
503    }
504    return AktToken(in);
505 }
506 
507 
508 /*-----------------------------------------------------------------------
509 //
510 // Function: scan_token_follow_includes()
511 //
512 //   Scan a token, follow include directives and pop back empty input
513 /    streams.
514 //
515 // Global Variables: -
516 //
517 // Side Effects    : Read input, manipulate streams.
518 //
519 /----------------------------------------------------------------------*/
520 
scan_token_follow_includes(Scanner_p in)521 Token_p scan_token_follow_includes(Scanner_p in)
522 {
523    scan_token(in);
524    if(in->include_key && (TestInpId(in, in->include_key)))
525    {
526       DStr_p name = DStrAlloc();
527       char*  tptp_source;
528 
529       tptp_source = getenv("TPTP");
530       if(tptp_source)
531       {
532          DStrAppendStr(name, tptp_source);
533          if(DStrLen(name) && (DStrView(name)[DStrLen(name)-1] !='/'))
534          {
535             DStrAppendChar(name,'/');
536          }
537       }
538 
539       scan_token(in);
540       CheckInpTok(in, OpenBracket);
541       scan_token(in);
542       CheckInpTok(in, Identifier|String|SQString);
543       if(TestInpTok(in, Identifier))
544       {
545          DStrAppendDStr(name, AktToken(in)->literal);
546       }
547       else
548       {
549          DStrAppendStr(name, DStrView(AktToken(in)->literal)+1);
550          DStrDeleteLastChar(name);
551       }
552       OpenStackedInput(&(in->source), StreamTypeFile,
553              DStrView(name), true);
554       DStrFree(name);
555       scan_token_follow_includes(in);
556    }
557    else if(in->include_key && TestInpTok(in, NoToken))
558    {
559       if((in->source)->next)
560       {
561     CloseStackedInput(&(in->source));
562          scan_token(in);
563          CheckInpTok(in, CloseBracket);
564          scan_token(in);
565          CheckInpTok(in, Fullstop);
566     scan_token_follow_includes(in);
567       }
568    }
569    return AktToken(in);
570 }
571 
572 /*-----------------------------------------------------------------------
573 //
574 // Function: scan_real_token()
575 //
576 //   Scan tokens until a real token (i.e. not a SkipToken has been
577 //   scanned.
578 //
579 // Global Variables: -
580 //
581 // Side Effects    : Reads input, manipulates streams.
582 //
583 /----------------------------------------------------------------------*/
584 
scan_real_token(Scanner_p in)585 static Token_p scan_real_token(Scanner_p in)
586 {
587    AktToken(in)->skipped = false;
588    DStrReset(AktToken(in)->comment);
589 
590    scan_token_follow_includes(in);
591 
592    while(TestTok(AktToken(in), SkipToken))
593    {
594       AktToken(in)->skipped = true;
595       if(!in->ignore_comments && TestInpTok(in, Comment))
596       {
597     DStrAppendDStr(AktToken(in)->comment, AktToken(in)->literal);
598       }
599       scan_token_follow_includes(in);
600    }
601    return AktToken(in);
602 }
603 
604 
605 /*-----------------------------------------------------------------------
606 //
607 // Function: str_n_element()
608 //
609 //   Test whether the len lenght start of str is contained in the set
610 //   id of strings (encoded in a single string with elements separated
611 //   by |).
612 //
613 // Global Variables: -
614 //
615 // Side Effects    : -
616 //
617 /----------------------------------------------------------------------*/
618 
str_n_element(char * str,char * ids,int len)619 bool str_n_element(char* str, char* ids, int len)
620 {
621    //printf("# str_n_element(%s, %s) = ", str, ids);
622 
623    if(strncmp(str, ids, len)==0)
624    {
625       ids+=len;
626       if(!*ids || (*ids=='|'))
627       {
628          //printf("true\n");
629          return true;
630       }
631    }
632    while(*ids && (*ids != '|'))
633    {
634       ids++;
635    }
636    if(!*ids)
637    {
638       //printf("false\n");
639       return false;
640    }
641    ids++;
642 
643    return str_n_element(str, ids, len);
644 }
645 
646 
647 /*---------------------------------------------------------------------*/
648 /*                         Exported Functions                          */
649 /*---------------------------------------------------------------------*/
650 
651 
652 /*-----------------------------------------------------------------------
653 //
654 // Function: PosRep()
655 //
656 //   Return a pointer to a description of a position in a file. The
657 //   description is valid until the function is called the next time.
658 //
659 // Global Variables: -
660 //
661 // Side Effects    : Sets static variable
662 //
663 /----------------------------------------------------------------------*/
664 
PosRep(StreamType type,DStr_p source,long line,long column)665 char* PosRep(StreamType type, DStr_p source, long line, long column)
666 {
667    static char buff[MAX_ERRMSG_LEN];
668    char        tmp_str[MAX_ERRMSG_LEN];
669 
670 
671    if(type == StreamTypeFile)
672    {
673       assert(strlen(DStrView(source))<=MAXPATHLEN);
674 
675       sprintf(buff, "%s:%ld:(Column %ld):",
676          DStrView(source), line, column);
677    }
678    else
679    {
680       tmp_str[0] = '\0';
681       strcat(tmp_str, type);
682       strcat(tmp_str, ": \"");
683       strncat(tmp_str, DStrView(source), MAXPATHLEN-4);
684       if(strlen(DStrView(source))>MAXPATHLEN-4)
685       {
686     strcat(tmp_str, "...");
687       }
688       strcat(tmp_str, "\"");
689       sprintf(buff, "%s:%ld:(Column %ld):", tmp_str, line, column);
690    }
691 
692    return buff;
693 }
694 
695 
696 /*-----------------------------------------------------------------------
697 //
698 // Function: TokenPosRep()
699 //
700 //   Return a pointer to a description of the position of a token. The
701 //   description is valid until the function or PosRep() is called the
702 //   next time.
703 //
704 // Global Variables: -
705 //
706 // Side Effects    : By PosRep()
707 //
708 /----------------------------------------------------------------------*/
709 
TokenPosRep(Token_p token)710 char* TokenPosRep(Token_p token)
711 {
712   return PosRep(token->stream_type, token->source, token->line,
713       token->column);
714 }
715 
716 
717 /*-----------------------------------------------------------------------
718 //
719 // Function: DescribeToken()
720 //
721 //   Return a pointer to a description of the set of tokens described
722 //   by tok. The caller has to free the space of this description!
723 //
724 // Global Variables: token_print_rep
725 //
726 // Side Effects    : Memory operations
727 //
728 /----------------------------------------------------------------------*/
729 
DescribeToken(TokenType tok)730 char* DescribeToken(TokenType tok)
731 {
732    char*  help;
733    int    i;
734    DStr_p res = DStrAlloc();
735    bool   found = false;
736 
737    for(i=0; token_print_rep[i].rep; i++)
738    {
739       if(tok & token_print_rep[i].key)
740       {
741     DStrAppendStr(res, found ? " or " : "");
742     DStrAppendStr(res, token_print_rep[i].rep);
743     found = true;
744       }
745    }
746    if(!found)
747    {
748       DStrAppendStr(res, token_print_rep[0].rep);
749    }
750    help = DStrCopy(res);
751    DStrFree(res);
752 
753    return help;
754 }
755 
756 /*-----------------------------------------------------------------------
757 //
758 // Function: PrintToken()
759 //
760 //   Print a token (probably only for debugging purposes...
761 //
762 // Global Variables: -
763 //
764 // Side Effects    : Output
765 //
766 /----------------------------------------------------------------------*/
767 
PrintToken(FILE * out,Token_p token)768 void PrintToken(FILE* out, Token_p token)
769 {
770    char* handle;
771 
772    handle = DescribeToken(token->tok);
773    fprintf(out, "Token:    %d = %s\n", (int)token->tok, handle);
774    FREE(handle);
775    fprintf(out, "Position: %s   ", TokenPosRep(token));
776    fprintf(out, "Literal:  %s\n", token->literal?DStrView(token->literal):"");
777    fprintf(out, "Numval:   %6lu   Skipped:  %s\n", token->numval,
778       token->skipped ? "true" : "false");
779    fprintf(out, "Comment:  %s\n", DStrView(token->comment));
780 }
781 
782 
783 /*-----------------------------------------------------------------------
784 //
785 // Function: CreateScanner()
786 //
787 //   Create a new, initialized scanner from which tokens can be read
788 //   immediately.
789 //
790 // Global Variables: -
791 //
792 // Side Effects    : Opens files, memory operations, reads input.
793 //
794 /----------------------------------------------------------------------*/
795 
CreateScanner(StreamType type,char * name,bool ignore_comments,char * default_dir)796 Scanner_p CreateScanner(StreamType type, char *name, bool
797          ignore_comments, char *default_dir)
798 {
799    Scanner_p handle;
800    Stream_p  stream;
801    char      *tmp_name;
802 
803 
804    handle = ScannerCellAlloc();
805    handle->source = NULL;
806    handle->default_dir = DStrAlloc();
807    handle->accu = DStrAlloc();
808    handle->ignore_comments = ignore_comments;
809    handle->include_key = NULL;
810    handle->format = LOPFormat;
811 
812    if((type == StreamTypeFile && strcmp(name,"-")==0)||
813       (type != StreamTypeFile))
814    {
815       stream = OpenStackedInput(&handle->source, type, name, true);
816       assert(stream);
817    }
818    else
819    {
820       assert(type == StreamTypeFile);
821       if(FileNameIsAbsolute(name))
822       {
823          stream = OpenStackedInput(&handle->source, type, name, true);
824          tmp_name = FileNameDirName(name);
825          DStrAppendStr(handle->default_dir, tmp_name);
826          FREE(tmp_name);
827          assert(stream);
828       }
829       else
830       {
831          DStr_p full_file_name = DStrAlloc();
832 
833          if(default_dir)
834          {
835             DStrAppendStr(handle->default_dir, default_dir);
836             assert(!DStrLen(handle->default_dir)||
837                    DStrLastChar(handle->default_dir) =='/');
838          }
839          tmp_name = FileNameDirName(name);
840          DStrAppendStr(handle->default_dir, tmp_name);
841          assert(DStrLen(handle->default_dir)==0 ||
842                 DStrLastChar(handle->default_dir) =='/');
843          FREE(tmp_name);
844          tmp_name = FileNameBaseName(name);
845          DStrAppendStr(full_file_name,
846                        DStrView(handle->default_dir));
847          DStrAppendStr(full_file_name,
848                        tmp_name);
849          FREE(tmp_name);
850          stream = OpenStackedInput(&handle->source, type,
851                                    DStrView(full_file_name), !TPTP_dir);
852          if(!stream)
853          {
854             assert(TPTP_dir);
855             DStrSet(handle->default_dir, TPTP_dir);
856             tmp_name = FileNameDirName(name);
857             DStrAppendStr(handle->default_dir, tmp_name);
858             FREE(tmp_name);
859             tmp_name = FileNameBaseName(name);
860             DStrSet(full_file_name,
861                     DStrView(handle->default_dir));
862             DStrAppendStr(full_file_name,
863                           tmp_name);
864             FREE(tmp_name);
865             stream = OpenStackedInput(&handle->source, type,
866                                       DStrView(full_file_name), true);
867          }
868          DStrFree(full_file_name);
869       }
870    }
871 
872    for(handle->current = 0; handle->current < MAXTOKENLOOKAHEAD;
873      handle->current++)
874    {
875       handle->tok_sequence[handle->current].tok = NoToken;
876       handle->tok_sequence[handle->current].literal = DStrAlloc();
877       handle->tok_sequence[handle->current].comment = DStrAlloc();
878       handle->tok_sequence[handle->current].source      = NULL;
879       handle->tok_sequence[handle->current].stream_type = NULL;
880       scan_real_token(handle);
881    }
882    handle->current = 0;
883    handle->include_pos = NULL;
884    return handle;
885 }
886 
887 /*-----------------------------------------------------------------------
888 //
889 // Function: DestroyScanner()
890 //
891 //   Ensure that the scanner is disposed of cleanly, all files are
892 //   closed and all memory/references are released.
893 //
894 // Global Variables: -
895 //
896 // Side Effects    : Closes file, may cause error if input stack is
897 //                   not empty
898 //
899 /----------------------------------------------------------------------*/
900 
DestroyScanner(Scanner_p junk)901 void DestroyScanner(Scanner_p  junk)
902 {
903    assert(junk);
904    for(junk->current = 0; junk->current < MAXTOKENLOOKAHEAD;
905      junk->current++)
906    {
907       DStrFree(junk->tok_sequence[junk->current].literal);
908       DStrFree(junk->tok_sequence[junk->current].comment);
909       DStrReleaseRef(junk->tok_sequence[junk->current].source);
910    }
911    assert(junk->source);
912    CloseStackedInput(&junk->source);
913    assert(!junk->source);
914    DStrFree(junk->default_dir);
915    DStrFree(junk->accu);
916    if(junk->include_pos)
917    {
918       FREE(junk->include_pos);
919    }
920    ScannerCellFree(junk);
921 }
922 
923 
924 /*-----------------------------------------------------------------------
925 //
926 // Function: ScannerSetFormat()
927 //
928 //   Set the format of the scanner (in particular, guess a format if
929 //
930 // Global Variables:
931 //
932 // Side Effects    :
933 //
934 /----------------------------------------------------------------------*/
935 
ScannerSetFormat(Scanner_p scanner,IOFormat fmt)936 void ScannerSetFormat(Scanner_p scanner, IOFormat fmt)
937 {
938    if(fmt == AutoFormat)
939    {
940       if(TestInpId(scanner, "fof|cnf|tff|include"))
941       {
942          //printf("# TSTP!\n");
943          fmt = TSTPFormat;
944       }
945       else if(TestInpId(scanner, "input_clause|input_formula"))
946       {
947          //printf("# TPTP!\n");
948          fmt = TPTPFormat;
949       }
950       else
951       {
952          //printf("# LOP!\n");
953          fmt = LOPFormat;
954       }
955    }
956    scanner->format = fmt;
957 }
958 
959 
960 /*-----------------------------------------------------------------------
961 //
962 // Function: TestTok()
963 //
964 //   Compares the type of the given token with a list of possible
965 //   tokens. Possibilities are values of type TokenType, possibly
966 //   combined with the bitwise or operator '|'. The test is true if
967 //   the given token matches at least one type from the list.
968 //
969 // Global Variables: -
970 //
971 // Side Effects    : -
972 //
973 /----------------------------------------------------------------------*/
974 
TestTok(Token_p akt,TokenType toks)975 bool TestTok(Token_p akt, TokenType toks)
976 {
977    /* PrintToken(stdout, akt); */
978    return (akt->tok & toks)!=0;
979 }
980 
981 /*-----------------------------------------------------------------------
982 //
983 // Function: TestId()
984 //
985 //   Test whether a given token is of type identifier and is one of
986 //   a set of possible alternatives. This set is given as a single
987 //   C-String, alternatives are separated by the '|' character.
988 //
989 //
990 // Global Variables: -
991 //
992 // Side Effects    : -
993 //
994 /----------------------------------------------------------------------*/
995 
TestId(Token_p akt,char * ids)996 bool TestId(Token_p akt, char* ids)
997 {
998    if(!TestTok(akt, Identifier))
999    {
1000       return false;
1001    }
1002    return str_n_element(DStrView(akt->literal), ids,
1003          DStrLen(akt->literal));
1004 }
1005 
1006 
1007 /*-----------------------------------------------------------------------
1008 //
1009 // Function: TestIdNum()
1010 //
1011 //   As TestId(), but take only the non-numerical-part into account.
1012 //
1013 // Global Variables: -
1014 //
1015 // Side Effects    : -
1016 //
1017 /----------------------------------------------------------------------*/
1018 
TestIdnum(Token_p akt,char * ids)1019 bool TestIdnum(Token_p akt, char* ids)
1020 {
1021    int  i, len=0;
1022    char c;
1023 
1024    if(!TestTok(akt, Idnum))
1025    {
1026       return false;
1027    }
1028    for(i=0; (c=DStrView(akt->literal)[i]); i++)
1029    {
1030       if(!len && isdigit(DStrView(akt->literal)[i]))
1031       {
1032     len = i;
1033       }
1034       else if(!isdigit(DStrView(akt->literal)[i]))
1035       {
1036     len = 0;
1037       }
1038    }
1039    return str_n_element(DStrView(akt->literal), ids, len);
1040 }
1041 
1042 
1043 
1044 /*-----------------------------------------------------------------------
1045 //
1046 // Function: AktTokenError()
1047 //
1048 //   Produce a syntax error at the current token with the given
1049 //   message.
1050 //
1051 // Global Variables: -
1052 //
1053 // Side Effects    : Terminates programm
1054 //
1055 /----------------------------------------------------------------------*/
1056 
AktTokenError(Scanner_p in,char * msg,bool syserr)1057 void AktTokenError(Scanner_p in, char* msg, bool syserr)
1058 {
1059    DStr_p err = DStrAlloc();
1060 
1061    DStrAppendStr(err, TokenPosRep(AktToken(in)));
1062    DStrAppendStr(err, "(just read '");
1063    DStrAppendDStr(err, AktToken(in)->literal);
1064    DStrAppendStr(err, "'): ");
1065    DStrAppendStr(err, msg);
1066    if(syserr)
1067    {
1068       SysError(DStrView(err), SYNTAX_ERROR);
1069    }
1070    else
1071    {
1072       Error(DStrView(err), SYNTAX_ERROR);
1073    }
1074    DStrFree(err); /* Just for symmetry reasons */
1075 }
1076 
1077 
1078 /*-----------------------------------------------------------------------
1079 //
1080 // Function: CheckInpTok()
1081 //
1082 //   Check whether AktTok(in) is of one of the desired types. Produce
1083 //   error if not.
1084 //
1085 // Global Variables: -
1086 //
1087 // Side Effects    : Memory operations, may terminate program.
1088 //
1089 /----------------------------------------------------------------------*/
1090 
CheckInpTok(Scanner_p in,TokenType toks)1091 void CheckInpTok(Scanner_p in, TokenType toks)
1092 {
1093    if(!TestInpTok(in, toks))
1094    {
1095       char* tmp;
1096 
1097       DStrReset(in->accu);
1098       tmp = DescribeToken(toks);
1099       DStrAppendStr(in->accu, tmp);
1100       FREE(tmp);
1101       DStrAppendStr(in->accu, " expected, but ");
1102       tmp = DescribeToken(AktToken(in)->tok);
1103       DStrAppendStr(in->accu, tmp);
1104       FREE(tmp);
1105       DStrAppendStr(in->accu, " read ");
1106       AktTokenError(in, DStrView(in->accu), false);
1107    }
1108 }
1109 
1110 
1111 /*-----------------------------------------------------------------------
1112 //
1113 // Function: CheckInpTokNoSkip()
1114 //
1115 //   As CheckInpTok(), but produce an error if SkipTokens were
1116 //   present.
1117 //
1118 // Global Variables: -
1119 //
1120 // Side Effects    : As CheckInpTok()
1121 //
1122 /----------------------------------------------------------------------*/
1123 
CheckInpTokNoSkip(Scanner_p in,TokenType toks)1124 void CheckInpTokNoSkip(Scanner_p in, TokenType toks)
1125 {
1126    if(AktToken(in)->skipped)
1127    {
1128       char* tmp;
1129 
1130       DStrReset(in->accu);
1131       tmp = DescribeToken(toks);
1132       DStrAppendStr(in->accu, tmp);
1133       FREE(tmp);
1134       DStrAppendStr(in->accu, " expected, but ");
1135       tmp = DescribeToken(SkipToken);
1136       DStrAppendStr(in->accu, tmp);
1137       FREE(tmp);
1138       DStrAppendStr(in->accu, " read ");
1139       AktTokenError(in, DStrView(in->accu), false);
1140    }
1141    CheckInpTok(in, toks);
1142 }
1143 
1144 
1145 /*-----------------------------------------------------------------------
1146 //
1147 // Function:  CheckInpId()
1148 //
1149 //   Check whether AktToken(in) is an identifier with the desired
1150 //   value. Produce error if not.
1151 //
1152 // Global Variables: -
1153 //
1154 // Side Effects    : Memory operations, may terminate program.
1155 //
1156 /----------------------------------------------------------------------*/
1157 
CheckInpId(Scanner_p in,char * ids)1158 void CheckInpId(Scanner_p in, char* ids)
1159 {
1160    if(!TestInpId(in, ids))
1161    {
1162       char* tmp;
1163 
1164       DStrSet(in->accu, "Identifier (");
1165       DStrAppendStr(in->accu, ids);
1166       DStrAppendStr(in->accu, ") expected, but ");
1167       tmp = DescribeToken(AktToken(in)->tok);
1168       DStrAppendStr(in->accu, tmp);
1169       FREE(tmp);
1170       DStrAppendStr(in->accu, "('");
1171       DStrAppendStr(in->accu, DStrView(AktToken(in)->literal));
1172       DStrAppendStr(in->accu, "') read ");
1173       AktTokenError(in, DStrView(in->accu), false);
1174    }
1175 }
1176 
1177 /*-----------------------------------------------------------------------
1178 //
1179 // Function: NextToken()
1180 //
1181 //   Read a new token, switch to the next token in the queue.
1182 //
1183 // Global Variables: -
1184 //
1185 // Side Effects    : Only by calling scan_real_token()
1186 //
1187 /----------------------------------------------------------------------*/
1188 
NextToken(Scanner_p in)1189 void NextToken(Scanner_p in)
1190 {
1191    scan_real_token(in);
1192    in->current = TOKENREALPOS(in->current+1);
1193    /*
1194    printf("Current token:\n");
1195    PrintToken(stdout, AktToken(in));
1196    printf("Next token:\n");
1197    PrintToken(stdout, LookToken(in,1));
1198    printf("SuperNext token:\n");
1199    PrintToken(stdout, LookToken(in,2));
1200    printf("\n");*/
1201 }
1202 
1203 
1204 /*-----------------------------------------------------------------------
1205 //
1206 // Function: ScannerParseInclude()
1207 //
1208 //   Parse a TPTP-Style include statement. Return a scanner for the
1209 //   included file, and put (optional) selected names into
1210 //   name_selector. If the file name is in skip_includes, skip the
1211 //   rest and return NULL.
1212 //
1213 // Global Variables: -
1214 //
1215 // Side Effects    : Reads input.
1216 //
1217 /----------------------------------------------------------------------*/
1218 
ScannerParseInclude(Scanner_p in,StrTree_p * name_selector,StrTree_p * skip_includes)1219 Scanner_p ScannerParseInclude(Scanner_p in, StrTree_p *name_selector,
1220                               StrTree_p *skip_includes)
1221 {
1222    Scanner_p new_scanner = NULL;
1223    char* name;
1224    char* pos_rep;
1225 
1226    pos_rep = SecureStrdup(TokenPosRep(AktToken(in)));
1227    AcceptInpId(in, "include");
1228    AcceptInpTok(in, OpenBracket);
1229    CheckInpTok(in, SQString);
1230    name = DStrCopyCore(AktToken(in)->literal);
1231 
1232    if(!StrTreeFind(skip_includes, name))
1233    {
1234       new_scanner = CreateScanner(StreamTypeFile, name,
1235                                   in->ignore_comments,
1236                                   ScannerGetDefaultDir(in));
1237       ScannerSetFormat(new_scanner, ScannerGetFormat(in));
1238       new_scanner->include_pos = pos_rep;
1239    }
1240    else
1241    {
1242       FREE(pos_rep);
1243    }
1244    FREE(name);
1245    NextToken(in);
1246 
1247    if(TestInpTok(in, Comma))
1248    {
1249       IntOrP dummy;
1250 
1251       dummy.i_val = 0;
1252       NextToken(in);
1253       CheckInpTok(in, Name|PosInt|OpenSquare);
1254 
1255       if(TestInpTok(in, Name|PosInt))
1256       {
1257          StrTreeStore(name_selector, DStrView(AktToken(in)->literal),
1258                       dummy, dummy);
1259          NextToken(in);
1260       }
1261       else
1262       {
1263          AcceptInpTok(in, OpenSquare);
1264          if(!TestInpTok(in, CloseSquare))
1265          {
1266             StrTreeStore(name_selector, DStrView(AktToken(in)->literal),
1267                          dummy, dummy);
1268             AcceptInpTok(in, Name|PosInt);
1269             while(TestInpTok(in, Comma))
1270             {
1271                NextToken(in);
1272                StrTreeStore(name_selector, DStrView(AktToken(in)->literal),
1273                             dummy, dummy);
1274                AcceptInpTok(in, Name|PosInt);
1275             }
1276          }
1277          else /* Empty list - insert full dummy */
1278          {
1279             dummy.i_val = 1;
1280             StrTreeStore(name_selector, "** Not a legal name**",
1281                          dummy, dummy);
1282 
1283          }
1284          AcceptInpTok(in, CloseSquare);
1285       }
1286    }
1287    AcceptInpTok(in, CloseBracket);
1288    AcceptInpTok(in, Fullstop);
1289 
1290    return new_scanner;
1291 }
1292 
1293 
1294 /*---------------------------------------------------------------------*/
1295 /*                        End of File                                  */
1296 /*---------------------------------------------------------------------*/
1297