1 /*-----------------------------------------------------------------------
2
3 File : cio_scanner.c
4
5 Author: Stephan Schulz
6
7 Contents
8
9 Implementation of the scanner.
10
11 Copyright 1998, 1999 by the author.
12 This code is released under the GNU General Public Licence and
13 the GNU Lesser General Public License.
14 See the file COPYING in the main E directory for details.
15 Run "eprover -h" for contact information.
16
17 Changes
18
19 <1> Sun Aug 31 13:31:42 MET DST 1997
20 New
21
22 -----------------------------------------------------------------------*/
23
24 #include "cio_scanner.h"
25
26
27
28 /*---------------------------------------------------------------------*/
29 /* Global Variables */
30 /*---------------------------------------------------------------------*/
31
32 static TokenRepCell token_print_rep[] =
33 {
34 {NoToken, "No token (probably EOF)"},
35 {WhiteSpace, "White space (spaces, tabs, newlines...)"},
36 {Comment, "Comment"},
37 {Ident, "Identifier not terminating in a number"},
38 {Idnum, "Identifier terminating in a number"},
39 {SemIdent, "Interpreted function/predicate name ('$name')"},
40 {String, "String enclosed in double quotes (\"\")"},
41 {SQString, "String enclosed in single quote ('')"},
42 {PosInt, "Integer (sequence of decimal digits) "
43 "convertable to an 'unsigned long'"},
44 /* May need LargePosInt here... */
45 {OpenBracket, "Opening bracket ('(')"},
46 {CloseBracket, "Closing bracket (')')"},
47 {OpenCurly, "Opening curly brace ('{')"},
48 {CloseCurly, "Closing curly brace ('}')"},
49 {OpenSquare, "Opening square brace ('[')"},
50 {CloseSquare, "Closing square brace (']')"},
51 {LesserSign, "\"Lesser than\" sign ('<')"},
52 {GreaterSign, "\"Greater than\" sign ('>')"},
53 {EqualSign, "Equal Predicate/Sign ('=')"},
54 {NegEqualSign, "Negated Equal Predicate ('!=')"},
55 {TildeSign, "Tilde ('~')"},
56 {Exclamation, "Exclamation mark ('!')"},
57 {QuestionMark, "Question mark ('?')"},
58 {Comma, "Comma (',')"},
59 {Semicolon, "Semicolon (';')"},
60 {Colon, "Colon (':')"},
61 {Hyphen, "Hyphen ('-')"},
62 {Plus, "Plus sign ('+')"},
63 {Mult, "Multiplication sign ('*')"},
64 {Fullstop, "Fullstop ('.')"},
65 {Dollar, "Dollar sign ('$')"},
66 {Slash, "Slash ('/')"},
67 {Pipe, "Vertical bar ('|')"},
68 {Ampersand, "Ampersand ('&')"},
69 {FOFLRImpl, "Implication/LRArrow ('=>')"},
70 {FOFRLImpl, "Back Implicatin/RLArrow ('<=')"},
71 {FOFEquiv, "Equivalence/Double arrow ('<=>')"},
72 {FOFXor, "Negated Equivalence/Xor ('<~>')"},
73 {FOFNand, "Nand ('~&')"},
74 {FOFNor, "Nor ('~|'')"},
75 {NoToken, NULL}
76
77 };
78
79 /*---------------------------------------------------------------------*/
80 /* Forward Declarations */
81 /*---------------------------------------------------------------------*/
82
83
84 /*---------------------------------------------------------------------*/
85 /* Internal Functions */
86 /*---------------------------------------------------------------------*/
87
88
89 /*-----------------------------------------------------------------------
90 //
91 // Function: scan_white()
92 //
93 // Scan a continous sequence of white space characters.
94 //
95 // Global Variables: -
96 //
97 // Side Effects : Reads input, changes the scanner state, may cause
98 // memory operations by by calling DStrAppendChar().
99 //
100 /----------------------------------------------------------------------*/
101
scan_white(Scanner_p in)102 static void scan_white(Scanner_p in)
103 {
104 AktToken(in)->tok = WhiteSpace;
105 while(isspace(CurrChar(in)))
106 {
107 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
108 NextChar(in);
109 }
110 }
111
112
113 /*-----------------------------------------------------------------------
114 //
115 // Function: scan_ident()
116 //
117 // Scan an identifier, d.h. an ident or an idnum. Also used for
118 // completing SemIdents.
119 //
120 // Global Variables: -
121 //
122 // Side Effects : As for scan_white()
123 //
124 /----------------------------------------------------------------------*/
125
scan_ident(Scanner_p in)126 static void scan_ident(Scanner_p in)
127 {
128 long numstart = 0,
129 i;
130
131 for(i=0; isidchar(CurrChar(in)); i++)
132 {
133 if(!numstart && isdigit(CurrChar(in)))
134 {
135 numstart = i;
136 }
137 else if(!isdigit(CurrChar(in)))
138 {
139 numstart = 0;
140 }
141 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
142 NextChar(in);
143 }
144 if(numstart)
145 {
146 AktToken(in)->tok = Idnum;
147 AktToken(in)->numval =
148 strtol(DStrView(AktToken(in)->literal)+numstart, NULL, 10);
149 /* Errors are intentionally ignored to allow arbitrary
150 identifiers */
151 }
152 else
153 {
154 AktToken(in)->tok = Ident;
155 AktToken(in)->numval = 0;
156 }
157 }
158
159
160 /*-----------------------------------------------------------------------
161 //
162 // Function: void scan_int()
163 //
164 // Scan an unsigned integer, i.e. a sequence of digits. If this
165 // cannot be parsed as an int, it will be interpreted as an
166 // identifier.
167 //
168 // Global Variables: -
169 //
170 // Side Effects : As for scan_white(), error if int is to big for
171 // unsigned long
172 //
173 /----------------------------------------------------------------------*/
174
scan_int(Scanner_p in)175 static void scan_int(Scanner_p in)
176 {
177 AktToken(in)->tok = PosInt;
178
179 while(isdigit(CurrChar(in)))
180 {
181 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
182 NextChar(in);
183 }
184 errno = 0;
185 AktToken(in)->numval =
186 strtol(DStrView(AktToken(in)->literal), NULL, 10);
187 /* strtoul is not available on all systems....*/
188
189 if(errno)
190 {
191 static char buff[10];
192 char* term=strncpy(buff, DStrView(AktToken(in)->literal), 9);
193 *term = '\0';
194 strtol(buff, NULL, 10);
195 Warning("Number truncated while reading %s. If this happens on 32 bit systems while parsing internal strings, it is harmless an can be ignored", DStrView(AktToken(in)->literal));
196 }
197 }
198
199
200 /*-----------------------------------------------------------------------
201 //
202 // Function: scan_line_comment()
203 //
204 // Scan a comment starting with # or %.
205 //
206 // Global Variables: -
207 //
208 // Side Effects : As scan_white()
209 //
210 /----------------------------------------------------------------------*/
211
scan_line_comment(Scanner_p in)212 static void scan_line_comment(Scanner_p in)
213 {
214 AktToken(in)->tok = Comment;
215
216 while((CurrChar(in)) != '\n' && (CurrChar(in)!=EOF))
217 {
218 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
219 NextChar(in);
220 }
221 DStrAppendChar(AktToken(in)->literal, '\n');
222 NextChar(in); /* Should be harmless even at EOF */
223 }
224
225
226 /*-----------------------------------------------------------------------
227 //
228 // Function: void scan_C_comment()
229 //
230 // Scan a comment in C-Style.
231 //
232 // Global Variables: -
233 //
234 // Side Effects : As scan_white()
235 //
236 /----------------------------------------------------------------------*/
237
scan_C_comment(Scanner_p in)238 static void scan_C_comment(Scanner_p in)
239 {
240 AktToken(in)->tok = Comment;
241
242 while(!((CurrChar(in) == '*') && (LookChar(in,1) == '/')))
243 {
244 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
245 NextChar(in);
246 }
247 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
248 NextChar(in);
249 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
250 NextChar(in);
251 }
252
253
254 /*-----------------------------------------------------------------------
255 //
256 // Function: scan_string()
257 //
258 // Scan a string (enclosed in "" or ''). Only the value of the strings,
259 // not the delimiter is stored.
260 //
261 // Global Variables: -
262 //
263 // Side Effects : As scan_white()
264 //
265 /----------------------------------------------------------------------*/
266
scan_string(Scanner_p in,char delim)267 static void scan_string(Scanner_p in, char delim)
268 {
269 bool escape = false;
270
271 AktToken(in)->tok = (delim=='\'')?SQString:String;
272
273 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
274 NextChar(in);
275 while(escape || (CurrChar(in) != delim))
276 {
277 if(!isprint(CurrChar(in)))
278 {
279 AktTokenError(in,
280 "Non-printable character in string constant",
281 false);
282 }
283 if(CurrChar(in)=='\\')
284 {
285 escape = !escape;
286 }
287 else
288 {
289 escape = false;
290 }
291 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
292 NextChar(in);
293 }
294 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
295 NextChar(in);
296 }
297
298
299 /*-----------------------------------------------------------------------
300 //
301 // Function: scan_token()
302 //
303 // Scans a token into AktToken(in). Does _not_ move the
304 // AktToken-pointer - this is done only for real (i.e. non white,
305 // non-comment) tokens in the function NextToken().
306 // The function assumes that *AktToken(in) is an initialized
307 // TokenCell which does not contain any outside references.
308 //
309 //
310 // Global Variables: -
311 //
312 // Side Effects : Reads input, may cause memory operations via
313 // DStr-Functions.
314 //
315 /----------------------------------------------------------------------*/
316
scan_token(Scanner_p in)317 static Token_p scan_token(Scanner_p in)
318 {
319 DStrReset(AktToken(in)->literal);
320 DStrReleaseRef(AktToken(in)->source);
321
322 AktToken(in)->source = DStrGetRef(Source(in));
323 AktToken(in)->stream_type = SourceType(in);
324 AktToken(in)->line = CurrLine(in);
325 AktToken(in)->column = CurrColumn(in);
326
327 if(!ischar(CurrChar(in)))
328 {
329 AktToken(in)->tok = NoToken;
330 }
331 else if(isspace(CurrChar(in)))
332 {
333 scan_white(in);
334 }
335 else if(isstartidchar(CurrChar(in)))
336 {
337 scan_ident(in);
338 }
339 else if(isdigit(CurrChar(in)))
340 {
341 scan_int(in);
342 }
343 else if(isstartcomment(CurrChar(in)))
344 {
345 scan_line_comment(in);
346 }
347 else if(CurrChar(in)=='/' && LookChar(in,1) == '*')
348 {
349 scan_C_comment(in);
350 }
351 else if((CurrChar(in)=='"') || (CurrChar(in)=='\''))
352 {
353 scan_string(in, CurrChar(in));
354 }
355 else if((CurrChar(in)=='$') && isidchar(LookChar(in,1)))
356 {
357 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
358 NextChar(in);
359 scan_ident(in);
360 AktToken(in)->tok = SemIdent;
361 }
362 else
363 {
364 switch(CurrChar(in))
365 {
366 case '(':
367 AktToken(in)->tok = OpenBracket;
368 break;
369 case ')':
370 AktToken(in)->tok = CloseBracket;
371 break;
372 case '{':
373 AktToken(in)->tok = OpenCurly;
374 break;
375 case '}':
376 AktToken(in)->tok = CloseCurly;
377 break;
378 case '[':
379 AktToken(in)->tok = OpenSquare;
380 break;
381 case ']':
382 AktToken(in)->tok = CloseSquare;
383 break;
384 case '<':
385 if((LookChar(in,1) == '~' && (LookChar(in,2) == '>')))
386 {
387 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
388 NextChar(in);
389 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
390 NextChar(in);
391 AktToken(in)->tok = FOFXor;
392 }
393 else if(LookChar(in,1) == '=')
394 {
395 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
396 NextChar(in);
397 if(LookChar(in,1) == '>')
398 {
399 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
400 NextChar(in);
401 AktToken(in)->tok = FOFEquiv;
402 }
403 else
404 {
405 AktToken(in)->tok = FOFRLImpl;
406 }
407 }
408 else
409 {
410 AktToken(in)->tok = LesserSign;
411 break;
412 }
413 break;
414 case '>':
415 AktToken(in)->tok = GreaterSign;
416 break;
417 case '=':
418 if(LookChar(in,1) == '>')
419 {
420 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
421 NextChar(in);
422 AktToken(in)->tok = FOFLRImpl;
423 }
424 else
425 {
426 AktToken(in)->tok = EqualSign;
427 }
428 break;
429 case '~':
430 switch(LookChar(in,1))
431 {
432 case '|':
433 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
434 NextChar(in);
435 AktToken(in)->tok = FOFNor;
436 break;
437 case '&':
438 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
439 NextChar(in);
440 AktToken(in)->tok = FOFNand;
441 break;
442
443 default:
444 AktToken(in)->tok = TildeSign;
445 break;
446 }
447 break;
448 case '!':
449 if(LookChar(in,1) == '=')
450 {
451 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
452 NextChar(in);
453 AktToken(in)->tok = NegEqualSign;
454 }
455 else
456 {
457 AktToken(in)->tok = Exclamation;
458 }
459 break;
460 case '?':
461 AktToken(in)->tok = QuestionMark;
462 break;
463 case ',':
464 AktToken(in)->tok = Comma;
465 break;
466 case ';':
467 AktToken(in)->tok = Semicolon;
468 break;
469 case ':':
470 AktToken(in)->tok = Colon;
471 break;
472 case '-':
473 AktToken(in)->tok = Hyphen;
474 break;
475 case '+':
476 AktToken(in)->tok = Plus;
477 break;
478 case '*':
479 AktToken(in)->tok = Mult;
480 break;
481 case '.':
482 AktToken(in)->tok = Fullstop;
483 break;
484 case '|':
485 AktToken(in)->tok = Pipe;
486 break;
487 case '/':
488 AktToken(in)->tok = Slash;
489 break;
490 case '&':
491 AktToken(in)->tok = Ampersand;
492 break;
493 case '$':
494 AktToken(in)->tok = Dollar;
495 break;
496 default:
497 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
498 AktTokenError(in, "Illegal character", false);
499 break;
500 }
501 DStrAppendChar(AktToken(in)->literal, CurrChar(in));
502 NextChar(in);
503 }
504 return AktToken(in);
505 }
506
507
508 /*-----------------------------------------------------------------------
509 //
510 // Function: scan_token_follow_includes()
511 //
512 // Scan a token, follow include directives and pop back empty input
513 / streams.
514 //
515 // Global Variables: -
516 //
517 // Side Effects : Read input, manipulate streams.
518 //
519 /----------------------------------------------------------------------*/
520
scan_token_follow_includes(Scanner_p in)521 Token_p scan_token_follow_includes(Scanner_p in)
522 {
523 scan_token(in);
524 if(in->include_key && (TestInpId(in, in->include_key)))
525 {
526 DStr_p name = DStrAlloc();
527 char* tptp_source;
528
529 tptp_source = getenv("TPTP");
530 if(tptp_source)
531 {
532 DStrAppendStr(name, tptp_source);
533 if(DStrLen(name) && (DStrView(name)[DStrLen(name)-1] !='/'))
534 {
535 DStrAppendChar(name,'/');
536 }
537 }
538
539 scan_token(in);
540 CheckInpTok(in, OpenBracket);
541 scan_token(in);
542 CheckInpTok(in, Identifier|String|SQString);
543 if(TestInpTok(in, Identifier))
544 {
545 DStrAppendDStr(name, AktToken(in)->literal);
546 }
547 else
548 {
549 DStrAppendStr(name, DStrView(AktToken(in)->literal)+1);
550 DStrDeleteLastChar(name);
551 }
552 OpenStackedInput(&(in->source), StreamTypeFile,
553 DStrView(name), true);
554 DStrFree(name);
555 scan_token_follow_includes(in);
556 }
557 else if(in->include_key && TestInpTok(in, NoToken))
558 {
559 if((in->source)->next)
560 {
561 CloseStackedInput(&(in->source));
562 scan_token(in);
563 CheckInpTok(in, CloseBracket);
564 scan_token(in);
565 CheckInpTok(in, Fullstop);
566 scan_token_follow_includes(in);
567 }
568 }
569 return AktToken(in);
570 }
571
572 /*-----------------------------------------------------------------------
573 //
574 // Function: scan_real_token()
575 //
576 // Scan tokens until a real token (i.e. not a SkipToken has been
577 // scanned.
578 //
579 // Global Variables: -
580 //
581 // Side Effects : Reads input, manipulates streams.
582 //
583 /----------------------------------------------------------------------*/
584
scan_real_token(Scanner_p in)585 static Token_p scan_real_token(Scanner_p in)
586 {
587 AktToken(in)->skipped = false;
588 DStrReset(AktToken(in)->comment);
589
590 scan_token_follow_includes(in);
591
592 while(TestTok(AktToken(in), SkipToken))
593 {
594 AktToken(in)->skipped = true;
595 if(!in->ignore_comments && TestInpTok(in, Comment))
596 {
597 DStrAppendDStr(AktToken(in)->comment, AktToken(in)->literal);
598 }
599 scan_token_follow_includes(in);
600 }
601 return AktToken(in);
602 }
603
604
605 /*-----------------------------------------------------------------------
606 //
607 // Function: str_n_element()
608 //
609 // Test whether the len lenght start of str is contained in the set
610 // id of strings (encoded in a single string with elements separated
611 // by |).
612 //
613 // Global Variables: -
614 //
615 // Side Effects : -
616 //
617 /----------------------------------------------------------------------*/
618
str_n_element(char * str,char * ids,int len)619 bool str_n_element(char* str, char* ids, int len)
620 {
621 //printf("# str_n_element(%s, %s) = ", str, ids);
622
623 if(strncmp(str, ids, len)==0)
624 {
625 ids+=len;
626 if(!*ids || (*ids=='|'))
627 {
628 //printf("true\n");
629 return true;
630 }
631 }
632 while(*ids && (*ids != '|'))
633 {
634 ids++;
635 }
636 if(!*ids)
637 {
638 //printf("false\n");
639 return false;
640 }
641 ids++;
642
643 return str_n_element(str, ids, len);
644 }
645
646
647 /*---------------------------------------------------------------------*/
648 /* Exported Functions */
649 /*---------------------------------------------------------------------*/
650
651
652 /*-----------------------------------------------------------------------
653 //
654 // Function: PosRep()
655 //
656 // Return a pointer to a description of a position in a file. The
657 // description is valid until the function is called the next time.
658 //
659 // Global Variables: -
660 //
661 // Side Effects : Sets static variable
662 //
663 /----------------------------------------------------------------------*/
664
PosRep(StreamType type,DStr_p source,long line,long column)665 char* PosRep(StreamType type, DStr_p source, long line, long column)
666 {
667 static char buff[MAX_ERRMSG_LEN];
668 char tmp_str[MAX_ERRMSG_LEN];
669
670
671 if(type == StreamTypeFile)
672 {
673 assert(strlen(DStrView(source))<=MAXPATHLEN);
674
675 sprintf(buff, "%s:%ld:(Column %ld):",
676 DStrView(source), line, column);
677 }
678 else
679 {
680 tmp_str[0] = '\0';
681 strcat(tmp_str, type);
682 strcat(tmp_str, ": \"");
683 strncat(tmp_str, DStrView(source), MAXPATHLEN-4);
684 if(strlen(DStrView(source))>MAXPATHLEN-4)
685 {
686 strcat(tmp_str, "...");
687 }
688 strcat(tmp_str, "\"");
689 sprintf(buff, "%s:%ld:(Column %ld):", tmp_str, line, column);
690 }
691
692 return buff;
693 }
694
695
696 /*-----------------------------------------------------------------------
697 //
698 // Function: TokenPosRep()
699 //
700 // Return a pointer to a description of the position of a token. The
701 // description is valid until the function or PosRep() is called the
702 // next time.
703 //
704 // Global Variables: -
705 //
706 // Side Effects : By PosRep()
707 //
708 /----------------------------------------------------------------------*/
709
TokenPosRep(Token_p token)710 char* TokenPosRep(Token_p token)
711 {
712 return PosRep(token->stream_type, token->source, token->line,
713 token->column);
714 }
715
716
717 /*-----------------------------------------------------------------------
718 //
719 // Function: DescribeToken()
720 //
721 // Return a pointer to a description of the set of tokens described
722 // by tok. The caller has to free the space of this description!
723 //
724 // Global Variables: token_print_rep
725 //
726 // Side Effects : Memory operations
727 //
728 /----------------------------------------------------------------------*/
729
DescribeToken(TokenType tok)730 char* DescribeToken(TokenType tok)
731 {
732 char* help;
733 int i;
734 DStr_p res = DStrAlloc();
735 bool found = false;
736
737 for(i=0; token_print_rep[i].rep; i++)
738 {
739 if(tok & token_print_rep[i].key)
740 {
741 DStrAppendStr(res, found ? " or " : "");
742 DStrAppendStr(res, token_print_rep[i].rep);
743 found = true;
744 }
745 }
746 if(!found)
747 {
748 DStrAppendStr(res, token_print_rep[0].rep);
749 }
750 help = DStrCopy(res);
751 DStrFree(res);
752
753 return help;
754 }
755
756 /*-----------------------------------------------------------------------
757 //
758 // Function: PrintToken()
759 //
760 // Print a token (probably only for debugging purposes...
761 //
762 // Global Variables: -
763 //
764 // Side Effects : Output
765 //
766 /----------------------------------------------------------------------*/
767
PrintToken(FILE * out,Token_p token)768 void PrintToken(FILE* out, Token_p token)
769 {
770 char* handle;
771
772 handle = DescribeToken(token->tok);
773 fprintf(out, "Token: %d = %s\n", (int)token->tok, handle);
774 FREE(handle);
775 fprintf(out, "Position: %s ", TokenPosRep(token));
776 fprintf(out, "Literal: %s\n", token->literal?DStrView(token->literal):"");
777 fprintf(out, "Numval: %6lu Skipped: %s\n", token->numval,
778 token->skipped ? "true" : "false");
779 fprintf(out, "Comment: %s\n", DStrView(token->comment));
780 }
781
782
783 /*-----------------------------------------------------------------------
784 //
785 // Function: CreateScanner()
786 //
787 // Create a new, initialized scanner from which tokens can be read
788 // immediately.
789 //
790 // Global Variables: -
791 //
792 // Side Effects : Opens files, memory operations, reads input.
793 //
794 /----------------------------------------------------------------------*/
795
CreateScanner(StreamType type,char * name,bool ignore_comments,char * default_dir)796 Scanner_p CreateScanner(StreamType type, char *name, bool
797 ignore_comments, char *default_dir)
798 {
799 Scanner_p handle;
800 Stream_p stream;
801 char *tmp_name;
802
803
804 handle = ScannerCellAlloc();
805 handle->source = NULL;
806 handle->default_dir = DStrAlloc();
807 handle->accu = DStrAlloc();
808 handle->ignore_comments = ignore_comments;
809 handle->include_key = NULL;
810 handle->format = LOPFormat;
811
812 if((type == StreamTypeFile && strcmp(name,"-")==0)||
813 (type != StreamTypeFile))
814 {
815 stream = OpenStackedInput(&handle->source, type, name, true);
816 assert(stream);
817 }
818 else
819 {
820 assert(type == StreamTypeFile);
821 if(FileNameIsAbsolute(name))
822 {
823 stream = OpenStackedInput(&handle->source, type, name, true);
824 tmp_name = FileNameDirName(name);
825 DStrAppendStr(handle->default_dir, tmp_name);
826 FREE(tmp_name);
827 assert(stream);
828 }
829 else
830 {
831 DStr_p full_file_name = DStrAlloc();
832
833 if(default_dir)
834 {
835 DStrAppendStr(handle->default_dir, default_dir);
836 assert(!DStrLen(handle->default_dir)||
837 DStrLastChar(handle->default_dir) =='/');
838 }
839 tmp_name = FileNameDirName(name);
840 DStrAppendStr(handle->default_dir, tmp_name);
841 assert(DStrLen(handle->default_dir)==0 ||
842 DStrLastChar(handle->default_dir) =='/');
843 FREE(tmp_name);
844 tmp_name = FileNameBaseName(name);
845 DStrAppendStr(full_file_name,
846 DStrView(handle->default_dir));
847 DStrAppendStr(full_file_name,
848 tmp_name);
849 FREE(tmp_name);
850 stream = OpenStackedInput(&handle->source, type,
851 DStrView(full_file_name), !TPTP_dir);
852 if(!stream)
853 {
854 assert(TPTP_dir);
855 DStrSet(handle->default_dir, TPTP_dir);
856 tmp_name = FileNameDirName(name);
857 DStrAppendStr(handle->default_dir, tmp_name);
858 FREE(tmp_name);
859 tmp_name = FileNameBaseName(name);
860 DStrSet(full_file_name,
861 DStrView(handle->default_dir));
862 DStrAppendStr(full_file_name,
863 tmp_name);
864 FREE(tmp_name);
865 stream = OpenStackedInput(&handle->source, type,
866 DStrView(full_file_name), true);
867 }
868 DStrFree(full_file_name);
869 }
870 }
871
872 for(handle->current = 0; handle->current < MAXTOKENLOOKAHEAD;
873 handle->current++)
874 {
875 handle->tok_sequence[handle->current].tok = NoToken;
876 handle->tok_sequence[handle->current].literal = DStrAlloc();
877 handle->tok_sequence[handle->current].comment = DStrAlloc();
878 handle->tok_sequence[handle->current].source = NULL;
879 handle->tok_sequence[handle->current].stream_type = NULL;
880 scan_real_token(handle);
881 }
882 handle->current = 0;
883 handle->include_pos = NULL;
884 return handle;
885 }
886
887 /*-----------------------------------------------------------------------
888 //
889 // Function: DestroyScanner()
890 //
891 // Ensure that the scanner is disposed of cleanly, all files are
892 // closed and all memory/references are released.
893 //
894 // Global Variables: -
895 //
896 // Side Effects : Closes file, may cause error if input stack is
897 // not empty
898 //
899 /----------------------------------------------------------------------*/
900
DestroyScanner(Scanner_p junk)901 void DestroyScanner(Scanner_p junk)
902 {
903 assert(junk);
904 for(junk->current = 0; junk->current < MAXTOKENLOOKAHEAD;
905 junk->current++)
906 {
907 DStrFree(junk->tok_sequence[junk->current].literal);
908 DStrFree(junk->tok_sequence[junk->current].comment);
909 DStrReleaseRef(junk->tok_sequence[junk->current].source);
910 }
911 assert(junk->source);
912 CloseStackedInput(&junk->source);
913 assert(!junk->source);
914 DStrFree(junk->default_dir);
915 DStrFree(junk->accu);
916 if(junk->include_pos)
917 {
918 FREE(junk->include_pos);
919 }
920 ScannerCellFree(junk);
921 }
922
923
924 /*-----------------------------------------------------------------------
925 //
926 // Function: ScannerSetFormat()
927 //
928 // Set the format of the scanner (in particular, guess a format if
929 //
930 // Global Variables:
931 //
932 // Side Effects :
933 //
934 /----------------------------------------------------------------------*/
935
ScannerSetFormat(Scanner_p scanner,IOFormat fmt)936 void ScannerSetFormat(Scanner_p scanner, IOFormat fmt)
937 {
938 if(fmt == AutoFormat)
939 {
940 if(TestInpId(scanner, "fof|cnf|tff|include"))
941 {
942 //printf("# TSTP!\n");
943 fmt = TSTPFormat;
944 }
945 else if(TestInpId(scanner, "input_clause|input_formula"))
946 {
947 //printf("# TPTP!\n");
948 fmt = TPTPFormat;
949 }
950 else
951 {
952 //printf("# LOP!\n");
953 fmt = LOPFormat;
954 }
955 }
956 scanner->format = fmt;
957 }
958
959
960 /*-----------------------------------------------------------------------
961 //
962 // Function: TestTok()
963 //
964 // Compares the type of the given token with a list of possible
965 // tokens. Possibilities are values of type TokenType, possibly
966 // combined with the bitwise or operator '|'. The test is true if
967 // the given token matches at least one type from the list.
968 //
969 // Global Variables: -
970 //
971 // Side Effects : -
972 //
973 /----------------------------------------------------------------------*/
974
TestTok(Token_p akt,TokenType toks)975 bool TestTok(Token_p akt, TokenType toks)
976 {
977 /* PrintToken(stdout, akt); */
978 return (akt->tok & toks)!=0;
979 }
980
981 /*-----------------------------------------------------------------------
982 //
983 // Function: TestId()
984 //
985 // Test whether a given token is of type identifier and is one of
986 // a set of possible alternatives. This set is given as a single
987 // C-String, alternatives are separated by the '|' character.
988 //
989 //
990 // Global Variables: -
991 //
992 // Side Effects : -
993 //
994 /----------------------------------------------------------------------*/
995
TestId(Token_p akt,char * ids)996 bool TestId(Token_p akt, char* ids)
997 {
998 if(!TestTok(akt, Identifier))
999 {
1000 return false;
1001 }
1002 return str_n_element(DStrView(akt->literal), ids,
1003 DStrLen(akt->literal));
1004 }
1005
1006
1007 /*-----------------------------------------------------------------------
1008 //
1009 // Function: TestIdNum()
1010 //
1011 // As TestId(), but take only the non-numerical-part into account.
1012 //
1013 // Global Variables: -
1014 //
1015 // Side Effects : -
1016 //
1017 /----------------------------------------------------------------------*/
1018
TestIdnum(Token_p akt,char * ids)1019 bool TestIdnum(Token_p akt, char* ids)
1020 {
1021 int i, len=0;
1022 char c;
1023
1024 if(!TestTok(akt, Idnum))
1025 {
1026 return false;
1027 }
1028 for(i=0; (c=DStrView(akt->literal)[i]); i++)
1029 {
1030 if(!len && isdigit(DStrView(akt->literal)[i]))
1031 {
1032 len = i;
1033 }
1034 else if(!isdigit(DStrView(akt->literal)[i]))
1035 {
1036 len = 0;
1037 }
1038 }
1039 return str_n_element(DStrView(akt->literal), ids, len);
1040 }
1041
1042
1043
1044 /*-----------------------------------------------------------------------
1045 //
1046 // Function: AktTokenError()
1047 //
1048 // Produce a syntax error at the current token with the given
1049 // message.
1050 //
1051 // Global Variables: -
1052 //
1053 // Side Effects : Terminates programm
1054 //
1055 /----------------------------------------------------------------------*/
1056
AktTokenError(Scanner_p in,char * msg,bool syserr)1057 void AktTokenError(Scanner_p in, char* msg, bool syserr)
1058 {
1059 DStr_p err = DStrAlloc();
1060
1061 DStrAppendStr(err, TokenPosRep(AktToken(in)));
1062 DStrAppendStr(err, "(just read '");
1063 DStrAppendDStr(err, AktToken(in)->literal);
1064 DStrAppendStr(err, "'): ");
1065 DStrAppendStr(err, msg);
1066 if(syserr)
1067 {
1068 SysError(DStrView(err), SYNTAX_ERROR);
1069 }
1070 else
1071 {
1072 Error(DStrView(err), SYNTAX_ERROR);
1073 }
1074 DStrFree(err); /* Just for symmetry reasons */
1075 }
1076
1077
1078 /*-----------------------------------------------------------------------
1079 //
1080 // Function: CheckInpTok()
1081 //
1082 // Check whether AktTok(in) is of one of the desired types. Produce
1083 // error if not.
1084 //
1085 // Global Variables: -
1086 //
1087 // Side Effects : Memory operations, may terminate program.
1088 //
1089 /----------------------------------------------------------------------*/
1090
CheckInpTok(Scanner_p in,TokenType toks)1091 void CheckInpTok(Scanner_p in, TokenType toks)
1092 {
1093 if(!TestInpTok(in, toks))
1094 {
1095 char* tmp;
1096
1097 DStrReset(in->accu);
1098 tmp = DescribeToken(toks);
1099 DStrAppendStr(in->accu, tmp);
1100 FREE(tmp);
1101 DStrAppendStr(in->accu, " expected, but ");
1102 tmp = DescribeToken(AktToken(in)->tok);
1103 DStrAppendStr(in->accu, tmp);
1104 FREE(tmp);
1105 DStrAppendStr(in->accu, " read ");
1106 AktTokenError(in, DStrView(in->accu), false);
1107 }
1108 }
1109
1110
1111 /*-----------------------------------------------------------------------
1112 //
1113 // Function: CheckInpTokNoSkip()
1114 //
1115 // As CheckInpTok(), but produce an error if SkipTokens were
1116 // present.
1117 //
1118 // Global Variables: -
1119 //
1120 // Side Effects : As CheckInpTok()
1121 //
1122 /----------------------------------------------------------------------*/
1123
CheckInpTokNoSkip(Scanner_p in,TokenType toks)1124 void CheckInpTokNoSkip(Scanner_p in, TokenType toks)
1125 {
1126 if(AktToken(in)->skipped)
1127 {
1128 char* tmp;
1129
1130 DStrReset(in->accu);
1131 tmp = DescribeToken(toks);
1132 DStrAppendStr(in->accu, tmp);
1133 FREE(tmp);
1134 DStrAppendStr(in->accu, " expected, but ");
1135 tmp = DescribeToken(SkipToken);
1136 DStrAppendStr(in->accu, tmp);
1137 FREE(tmp);
1138 DStrAppendStr(in->accu, " read ");
1139 AktTokenError(in, DStrView(in->accu), false);
1140 }
1141 CheckInpTok(in, toks);
1142 }
1143
1144
1145 /*-----------------------------------------------------------------------
1146 //
1147 // Function: CheckInpId()
1148 //
1149 // Check whether AktToken(in) is an identifier with the desired
1150 // value. Produce error if not.
1151 //
1152 // Global Variables: -
1153 //
1154 // Side Effects : Memory operations, may terminate program.
1155 //
1156 /----------------------------------------------------------------------*/
1157
CheckInpId(Scanner_p in,char * ids)1158 void CheckInpId(Scanner_p in, char* ids)
1159 {
1160 if(!TestInpId(in, ids))
1161 {
1162 char* tmp;
1163
1164 DStrSet(in->accu, "Identifier (");
1165 DStrAppendStr(in->accu, ids);
1166 DStrAppendStr(in->accu, ") expected, but ");
1167 tmp = DescribeToken(AktToken(in)->tok);
1168 DStrAppendStr(in->accu, tmp);
1169 FREE(tmp);
1170 DStrAppendStr(in->accu, "('");
1171 DStrAppendStr(in->accu, DStrView(AktToken(in)->literal));
1172 DStrAppendStr(in->accu, "') read ");
1173 AktTokenError(in, DStrView(in->accu), false);
1174 }
1175 }
1176
1177 /*-----------------------------------------------------------------------
1178 //
1179 // Function: NextToken()
1180 //
1181 // Read a new token, switch to the next token in the queue.
1182 //
1183 // Global Variables: -
1184 //
1185 // Side Effects : Only by calling scan_real_token()
1186 //
1187 /----------------------------------------------------------------------*/
1188
NextToken(Scanner_p in)1189 void NextToken(Scanner_p in)
1190 {
1191 scan_real_token(in);
1192 in->current = TOKENREALPOS(in->current+1);
1193 /*
1194 printf("Current token:\n");
1195 PrintToken(stdout, AktToken(in));
1196 printf("Next token:\n");
1197 PrintToken(stdout, LookToken(in,1));
1198 printf("SuperNext token:\n");
1199 PrintToken(stdout, LookToken(in,2));
1200 printf("\n");*/
1201 }
1202
1203
1204 /*-----------------------------------------------------------------------
1205 //
1206 // Function: ScannerParseInclude()
1207 //
1208 // Parse a TPTP-Style include statement. Return a scanner for the
1209 // included file, and put (optional) selected names into
1210 // name_selector. If the file name is in skip_includes, skip the
1211 // rest and return NULL.
1212 //
1213 // Global Variables: -
1214 //
1215 // Side Effects : Reads input.
1216 //
1217 /----------------------------------------------------------------------*/
1218
ScannerParseInclude(Scanner_p in,StrTree_p * name_selector,StrTree_p * skip_includes)1219 Scanner_p ScannerParseInclude(Scanner_p in, StrTree_p *name_selector,
1220 StrTree_p *skip_includes)
1221 {
1222 Scanner_p new_scanner = NULL;
1223 char* name;
1224 char* pos_rep;
1225
1226 pos_rep = SecureStrdup(TokenPosRep(AktToken(in)));
1227 AcceptInpId(in, "include");
1228 AcceptInpTok(in, OpenBracket);
1229 CheckInpTok(in, SQString);
1230 name = DStrCopyCore(AktToken(in)->literal);
1231
1232 if(!StrTreeFind(skip_includes, name))
1233 {
1234 new_scanner = CreateScanner(StreamTypeFile, name,
1235 in->ignore_comments,
1236 ScannerGetDefaultDir(in));
1237 ScannerSetFormat(new_scanner, ScannerGetFormat(in));
1238 new_scanner->include_pos = pos_rep;
1239 }
1240 else
1241 {
1242 FREE(pos_rep);
1243 }
1244 FREE(name);
1245 NextToken(in);
1246
1247 if(TestInpTok(in, Comma))
1248 {
1249 IntOrP dummy;
1250
1251 dummy.i_val = 0;
1252 NextToken(in);
1253 CheckInpTok(in, Name|PosInt|OpenSquare);
1254
1255 if(TestInpTok(in, Name|PosInt))
1256 {
1257 StrTreeStore(name_selector, DStrView(AktToken(in)->literal),
1258 dummy, dummy);
1259 NextToken(in);
1260 }
1261 else
1262 {
1263 AcceptInpTok(in, OpenSquare);
1264 if(!TestInpTok(in, CloseSquare))
1265 {
1266 StrTreeStore(name_selector, DStrView(AktToken(in)->literal),
1267 dummy, dummy);
1268 AcceptInpTok(in, Name|PosInt);
1269 while(TestInpTok(in, Comma))
1270 {
1271 NextToken(in);
1272 StrTreeStore(name_selector, DStrView(AktToken(in)->literal),
1273 dummy, dummy);
1274 AcceptInpTok(in, Name|PosInt);
1275 }
1276 }
1277 else /* Empty list - insert full dummy */
1278 {
1279 dummy.i_val = 1;
1280 StrTreeStore(name_selector, "** Not a legal name**",
1281 dummy, dummy);
1282
1283 }
1284 AcceptInpTok(in, CloseSquare);
1285 }
1286 }
1287 AcceptInpTok(in, CloseBracket);
1288 AcceptInpTok(in, Fullstop);
1289
1290 return new_scanner;
1291 }
1292
1293
1294 /*---------------------------------------------------------------------*/
1295 /* End of File */
1296 /*---------------------------------------------------------------------*/
1297