1 /***************************************************************************
2 * aGrUM modified frames and atg files for cocoR
3 * Copyright (c) 2005-2021 by Christophe GONZALES(_at_AMU) and Pierre-Henri WUILLEMIN(_at_LIP6) *
4 * info_at_agrum_dot_org
5 ***************************************************************************/
6 /*----------------------------------------------------------------------
7 Compiler Generator Coco/R,
8 Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
9 extended by M. Loeberbauer & A. Woess, Univ. of Linz
10 ported to C++ by Csaba Balazs, University of Szeged
11 with improvements by Pat Terry, Rhodes University
12
13 This program is free software; you can redistribute it and/or modify it
14 under the terms of the GNU General Public License as published by the
15 Free Software Foundation; either version 2, or (at your option) any
16 later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 for more details.
22
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26
27 As an exception, it is allowed to write an extension of Coco/R that is
28 used as a plugin in non-free software.
29
30 If not otherwise stated, any source code generated by Coco/R (other than
31 Coco/R itself) does not fall under the GNU General Public License.
32 -----------------------------------------------------------------------*/
33
34
35 #include <memory.h>
36 #include <string.h>
37 #include "Scanner.h"
38 #include <agrum/tools/core/cocoR/common.h>
39
40 namespace gum {
41 namespace formula {
42
43
Token()44 Token::Token() {
45 kind = 0;
46 pos = 0;
47 col = 0;
48 line = 0;
49 val = nullptr;
50 next = nullptr;
51 }
52
~Token()53 Token::~Token() {
54 coco_string_delete( val );
55 }
56
Buffer(FILE * s,bool isUserStream)57 Buffer::Buffer( FILE* s, bool isUserStream ) {
58 // ensure binary read on windows
59 #if _MSC_VER >= 1300
60 _setmode( _fileno( s ), _O_BINARY );
61 #endif
62 stream = s; this->isUserStream = isUserStream;
63
64 if ( CanSeek() ) {
65 fseek( s, 0, SEEK_END );
66 fileLen = ftell( s );
67 fseek( s, 0, SEEK_SET );
68 bufLen = ( fileLen < MAX_BUFFER_LENGTH ) ? fileLen : MAX_BUFFER_LENGTH;
69 bufStart = INT_MAX; // nothing in the buffer so far
70 } else {
71 fileLen = bufLen = bufStart = 0;
72 }
73
74 bufCapacity = ( bufLen>0 ) ? bufLen : MIN_BUFFER_LENGTH;
75 buf = new unsigned char[bufCapacity];
76
77 if ( fileLen > 0 ) SetPos( 0 ); // setup buffer to position 0 (start)
78 else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid
79
80 if ( bufLen == fileLen && CanSeek() ) Close();
81 }
82
Buffer(Buffer * b)83 Buffer::Buffer( Buffer* b ) {
84 buf = b->buf;
85 bufCapacity = b->bufCapacity;
86 b->buf = nullptr;
87 bufStart = b->bufStart;
88 bufLen = b->bufLen;
89 fileLen = b->fileLen;
90 bufPos = b->bufPos;
91 stream = b->stream;
92 b->stream = nullptr;
93 isUserStream = b->isUserStream;
94 }
95
Buffer(const unsigned char * buf,int len)96 Buffer::Buffer( const unsigned char* buf, int len ) {
97 this->isUserStream = false;
98 this->buf = new unsigned char[len];
99 memcpy( this->buf, buf, len*sizeof( unsigned char ) );
100 bufStart = 0;
101 bufCapacity = bufLen = len;
102 fileLen = len;
103 bufPos = 0;
104 stream = nullptr;
105 }
106
~Buffer()107 Buffer::~Buffer() {
108 Close();
109
110 if ( buf != nullptr ) {
111 delete [] buf;
112 buf = nullptr;
113 }
114 }
115
Close()116 void Buffer::Close() {
117 if ( !isUserStream && stream != nullptr ) {
118 fclose( stream );
119 stream = nullptr;
120 }
121 }
122
GetPercent()123 int Buffer::GetPercent() {
124 return ( int )( ( 100.0*GetPos() )/fileLen );
125 }
126
Read()127 int Buffer::Read() {
128 if ( bufPos < bufLen ) {
129 return buf[bufPos++];
130 } else if ( GetPos() < fileLen ) {
131
132 SetPos( GetPos() ); // shift buffer start to Pos
133 return buf[bufPos++];
134 } else if ( ( stream != nullptr ) && !CanSeek() && ( ReadNextStreamChunk() > 0 ) ) {
135 return buf[bufPos++];
136 } else {
137 return EoF;
138 }
139 }
140
Peek()141 int Buffer::Peek() {
142 int curPos = GetPos();
143 int ch = Read();
144 SetPos( curPos );
145 return ch;
146 }
147
148 // beg .. begin, zero-based, inclusive, in byte
149 // end .. end, zero-based, exclusive, in byte
GetString(int beg,int end)150 wchar_t* Buffer::GetString( int beg, int end ) {
151 int len = 0;
152 wchar_t* buf = new wchar_t[end - beg];
153 int oldPos = GetPos();
154 SetPos( beg );
155
156 while ( GetPos() < end ) buf[len++] = ( wchar_t ) Read();
157
158 SetPos( oldPos );
159 wchar_t* res = coco_string_create( buf, 0, len );
160 coco_string_delete( buf );
161 return res;
162 }
163
GetPos()164 int Buffer::GetPos() {
165 return bufPos + bufStart;
166 }
167
SetPos(int value)168 void Buffer::SetPos( int value ) {
169 if ( ( value >= fileLen ) && ( stream != nullptr ) && !CanSeek() ) {
170 // Wanted position is after buffer and the stream
171 // is not seek-able e.g. network or console,
172 // thus we have to read the stream manually till
173 // the wanted position is in sight.
174 while ( ( value >= fileLen ) && ( ReadNextStreamChunk() > 0 ) );
175 }
176
177 if ( ( value < 0 ) || ( value > fileLen ) ) {
178 wprintf( L"--- buffer out of bounds access, position: %d\n", value );
179 exit( 1 );
180 }
181
182 if ( ( value >= bufStart ) && ( value < ( bufStart + bufLen ) ) ) { // already in buffer
183 bufPos = value - bufStart;
184 } else if ( stream != nullptr ) { // must be swapped in
185 fseek( stream, value, SEEK_SET );
186 bufLen = (int)fread( buf, int(sizeof( unsigned char )), bufCapacity, stream );
187 bufStart = value; bufPos = 0;
188 } else {
189 bufPos = fileLen - bufStart; // make Pos return fileLen
190 }
191 }
192
193 // Read the next chunk of bytes from the stream, increases the buffer
194 // if needed and updates the fields fileLen and bufLen.
195 // Returns the number of bytes read.
ReadNextStreamChunk()196 int Buffer::ReadNextStreamChunk() {
197 int free = bufCapacity - bufLen;
198
199 if ( free == 0 ) {
200 // in the case of a growing input stream
201 // we can neither seek in the stream, nor can we
202 // foresee the maximum length, thus we must adapt
203 // the buffer size on demand.
204 bufCapacity = bufLen * 2;
205 unsigned char* newBuf = new unsigned char[bufCapacity];
206 memcpy( newBuf, buf, bufLen*sizeof( unsigned char ) );
207 delete [] buf;
208 buf = newBuf;
209 free = bufLen;
210 }
211
212 int read = (int)fread( buf + bufLen, int(sizeof( unsigned char )), free, stream );
213
214 if ( read > 0 ) {
215 fileLen = bufLen = ( bufLen + read );
216 return read;
217 }
218
219 // end of stream reached
220 return 0;
221 }
222
CanSeek()223 bool Buffer::CanSeek() {
224 return ( stream != nullptr ) && ( ftell( stream ) != -1 );
225 }
226
Read()227 int UTF8Buffer::Read() {
228 int ch;
229
230 do {
231 ch = Buffer::Read();
232 // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
233 } while ( ( ch >= 128 ) && ( ( ch & 0xC0 ) != 0xC0 ) && ( ch != EoF ) );
234
235 if ( ch < 128 || ch == EoF ) {
236 // nothing to do, first 127 chars are the same in ascii and utf8
237 // 0xxxxxxx or end of file character
238 } else if ( ( ch & 0xF0 ) == 0xF0 ) {
239 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
240 int c1 = ch & 0x07; ch = Buffer::Read();
241 int c2 = ch & 0x3F; ch = Buffer::Read();
242 int c3 = ch & 0x3F; ch = Buffer::Read();
243 int c4 = ch & 0x3F;
244 ch = ( ( ( ( ( c1 << 6 ) | c2 ) << 6 ) | c3 ) << 6 ) | c4;
245 } else if ( ( ch & 0xE0 ) == 0xE0 ) {
246 // 1110xxxx 10xxxxxx 10xxxxxx
247 int c1 = ch & 0x0F; ch = Buffer::Read();
248 int c2 = ch & 0x3F; ch = Buffer::Read();
249 int c3 = ch & 0x3F;
250 ch = ( ( ( c1 << 6 ) | c2 ) << 6 ) | c3;
251 } else if ( ( ch & 0xC0 ) == 0xC0 ) {
252 // 110xxxxx 10xxxxxx
253 int c1 = ch & 0x1F; ch = Buffer::Read();
254 int c2 = ch & 0x3F;
255 ch = ( c1 << 6 ) | c2;
256 }
257
258 return ch;
259 }
260
Scanner(const unsigned char * buf,int len,std::string filename,bool trace)261 Scanner::Scanner(const unsigned char* buf, int len, std::string filename, bool trace) {
262 buffer = new Buffer( buf, len );
263 _filenamne_=widen( filename.c_str() );
264 _trace_=trace;
265 Init();
266 }
267
Scanner(const char * fileName,bool trace)268 Scanner::Scanner( const char* fileName,bool trace ) {
269 Load( widen( std::string( fileName ) ).c_str() );
270 _trace_=trace;
271 }
272
Scanner(const wchar_t * fileName,bool trace)273 Scanner::Scanner( const wchar_t* fileName,bool trace ) {
274 Load( fileName );
275 _trace_=trace;
276 }
277
Load(const wchar_t * fileName)278 void Scanner::Load( const wchar_t* fileName ) {
279 FILE* stream;
280 char* chFileName = coco_string_create_char( fileName );
281
282 if ( ( stream = fopen( chFileName, "rb" ) ) == nullptr ) {
283 std::string s( "No such file : " ); s+=chFileName;
284 GUM_ERROR( gum::IOError,s )
285 }
286
287 coco_string_delete( chFileName );
288 buffer = new Buffer( stream, false );
289 _filenamne_=std::wstring( fileName );
290 Init();
291 }
292
Scanner(FILE * s,bool trace)293 Scanner::Scanner( FILE* s,bool trace ) {
294 buffer = new Buffer( s, true );
295 _filenamne_=L"FILE";
296 Init();
297 _trace_=trace;
298 }
299
~Scanner()300 Scanner::~Scanner() {
301 char* cur = ( char* ) firstHeap;
302
303 while ( cur != nullptr ) {
304 cur = *( char** )( cur + HEAP_BLOCK_SIZE );
305 free( firstHeap );
306 firstHeap = cur;
307 }
308
309 if ( tval ) delete [] tval;
310
311 if ( buffer ) delete buffer;
312 }
313
Init()314 void Scanner::Init() {
315 percent=-1;
316 EOL = '\n';
317 eofSym = 0;
318 maxT = 10;
319 noSym = 10;
320 int i;
321 for (i = 48; i <= 57; ++i) start.set(i, 7);
322 for (i = 65; i <= 90; ++i) start.set(i, 6);
323 for (i = 95; i <= 95; ++i) start.set(i, 6);
324 for (i = 97; i <= 122; ++i) start.set(i, 6);
325 start.set(43, 8);
326 start.set(45, 9);
327 for (i = 42; i <= 42; ++i) start.set(i, 4);
328 for (i = 47; i <= 47; ++i) start.set(i, 4);
329 for (i = 60; i <= 60; ++i) start.set(i, 4);
330 for (i = 62; i <= 62; ++i) start.set(i, 4);
331 for (i = 94; i <= 94; ++i) start.set(i, 4);
332 start.set(10, 5);
333 start.set(40, 13);
334 start.set(41, 14);
335 start.set(44, 15);
336 start.set(Buffer::EoF, -1);
337
338
339 tvalLength = 128;
340 tval = new wchar_t[tvalLength]; // text of current token
341
342 // HEAP_BLOCK_SIZE byte heap + pointer to next heap block
343 heap = malloc( HEAP_BLOCK_SIZE + sizeof( void* ) );
344 firstHeap = heap;
345 heapEnd = ( void** )( ( ( char* ) heap ) + HEAP_BLOCK_SIZE );
346 *heapEnd = 0;
347 heapTop = heap;
348
349 if ( sizeof( Token ) > HEAP_BLOCK_SIZE ) {
350 wprintf( L"--- Too small HEAP_BLOCK_SIZE\n" );
351 exit( 1 );
352 }
353
354 pos = -1; line = 1; col = 0; charPos = -1;
355 oldEols = 0;
356 NextCh();
357
358 if ( ch == 0xEF ) { // check optional byte order mark for UTF-8
359 NextCh(); int ch1 = ch;
360 NextCh(); int ch2 = ch;
361
362 if ( ch1 != 0xBB || ch2 != 0xBF ) {
363 wprintf( L"Illegal byte order mark at start of file" );
364 exit( 1 );
365 }
366
367 Buffer* oldBuf = buffer;
368 buffer = new UTF8Buffer( buffer ); col = 0; charPos = -1;
369 delete oldBuf; oldBuf = nullptr;
370 NextCh();
371 }
372
373
374 pt = tokens = CreateToken(); // first token is a dummy
375 }
376
NextCh()377 void Scanner::NextCh() {
378 if ( oldEols > 0 ) { ch = EOL; oldEols--; }
379 else {
380 pos = buffer->GetPos();
381 ch = buffer->Read();
382 int p=buffer->GetPercent();
383
384 if ( ch==Buffer::EoF ) {
385 GUM_EMIT1( onLoad,200 );
386 } else {
387 if ( percent<p ) {
388 percent=p;
389 GUM_EMIT1( onLoad,percent );
390 }
391 }
392
393 col++; charPos++;
394
395 // replace isolated '\r' by '\n' in order to make
396 // eol handling uniform across Windows, Unix and Mac
397 if ( ch == L'\r' && buffer->Peek() != L'\n' ) ch = EOL;
398
399 if ( ch == EOL ) { /*if ( _trace_) std::cout<<line<<std::endl;*/ line++; col = 0; }
400 }
401
402
403 }
404
AddCh()405 void Scanner::AddCh() {
406 if ( tlen >= tvalLength ) {
407 tvalLength *= 2;
408 wchar_t* newBuf = new wchar_t[tvalLength];
409 memcpy( newBuf, tval, tlen*sizeof( wchar_t ) );
410 delete [] tval;
411 tval = newBuf;
412 }
413
414 if ( ch != Buffer::EoF ) {
415 tval[tlen++] = ch;
416 NextCh();
417 }
418 }
419
420
421
CreateHeapBlock()422 void Scanner::CreateHeapBlock() {
423 void* newHeap;
424 char* cur = ( char* ) firstHeap;
425
426 while ( ( ( char* ) tokens < cur ) || ( ( char* ) tokens > ( cur + HEAP_BLOCK_SIZE ) ) ) {
427 cur = *( ( char** )( cur + HEAP_BLOCK_SIZE ) );
428 free( firstHeap );
429 firstHeap = cur;
430 }
431
432 // HEAP_BLOCK_SIZE byte heap + pointer to next heap block
433 newHeap = malloc( HEAP_BLOCK_SIZE + sizeof( void* ) );
434 *heapEnd = newHeap;
435 heapEnd = ( void** )( ( ( char* ) newHeap ) + HEAP_BLOCK_SIZE );
436 *heapEnd = 0;
437 heap = newHeap;
438 heapTop = heap;
439 }
440
CreateToken()441 Token* Scanner::CreateToken() {
442 Token* t;
443
444 if ( ( ( char* ) heapTop + ( int ) sizeof( Token ) ) >= ( char* ) heapEnd ) {
445 CreateHeapBlock();
446 }
447
448 t = ( Token* ) heapTop;
449 heapTop = ( void* )( ( char* ) heapTop + sizeof( Token ) );
450 t->val = nullptr;
451 t->next = nullptr;
452 return t;
453 }
454
AppendVal(Token * t)455 void Scanner::AppendVal( Token* t ) {
456 int reqMem = ( tlen + 1 ) * sizeof( wchar_t );
457
458 if ( ( ( char* ) heapTop + reqMem ) >= ( char* ) heapEnd ) {
459 if ( reqMem > HEAP_BLOCK_SIZE ) {
460 wprintf( L"--- Too long token value\n" );
461 exit( 1 );
462 }
463
464 CreateHeapBlock();
465 }
466
467 t->val = ( wchar_t* ) heapTop;
468 heapTop = ( void* )( ( char* ) heapTop + reqMem );
469
470 wcsncpy( t->val, tval, tlen );
471 t->val[tlen] = L'\0';
472 }
473
NextToken()474 Token* Scanner::NextToken() {
475 while ( ch == ' ' ||
476 (ch >= 9 && ch <= 10) || ch == 13
477 ) NextCh();
478
479
480 int recKind = noSym;
481 int recEnd = pos;
482 t = CreateToken();
483 t->pos = pos; t->col = col; t->line = line; t->charPos = charPos;
484 int state = start.state( ch );
485 tlen = 0; AddCh();
486
487 switch ( state ) {
488 case -1: { t->kind = eofSym; break; } // NextCh already done
489
490 case 0: {
491 case_0:
492
493 if ( recKind != noSym ) {
494 tlen = recEnd - t->pos;
495 SetScannerBehindT();
496 }
497
498 t->kind = recKind; break;
499 } // NextCh already done
500
501 case 1:
502 case_1:
503 if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
504 else if (ch == L'+' || ch == L'-') {AddCh(); goto case_2;}
505 else {goto case_0;}
506 case 2:
507 case_2:
508 if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
509 else {goto case_0;}
510 case 3:
511 case_3:
512 recEnd = pos; recKind = 3;
513 if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
514 else {t->kind = 3; break;}
515 case 4:
516 {t->kind = 4; break;}
517 case 5:
518 {t->kind = 5; break;}
519 case 6:
520 case_6:
521 recEnd = pos; recKind = 6;
522 if (ch == L'.' || (ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_6;}
523 else {t->kind = 6; break;}
524 case 7:
525 case_7:
526 recEnd = pos; recKind = 1;
527 if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_7;}
528 else if (ch == L'.') {AddCh(); goto case_10;}
529 else if (ch == L'E' || ch == L'e') {AddCh(); goto case_1;}
530 else {t->kind = 1; break;}
531 case 8:
532 recEnd = pos; recKind = 4;
533 if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_11;}
534 else {t->kind = 4; break;}
535 case 9:
536 recEnd = pos; recKind = 4;
537 if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_11;}
538 else {t->kind = 4; break;}
539 case 10:
540 case_10:
541 if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_12;}
542 else {goto case_0;}
543 case 11:
544 case_11:
545 if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_11;}
546 else if (ch == L'.') {AddCh(); goto case_10;}
547 else if (ch == L'E' || ch == L'e') {AddCh(); goto case_1;}
548 else {goto case_0;}
549 case 12:
550 case_12:
551 recEnd = pos; recKind = 2;
552 if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_12;}
553 else if (ch == L'E' || ch == L'e') {AddCh(); goto case_1;}
554 else {t->kind = 2; break;}
555 case 13:
556 {t->kind = 7; break;}
557 case 14:
558 {t->kind = 8; break;}
559 case 15:
560 {t->kind = 9; break;}
561
562 }
563
564 AppendVal( t );
565 return t;
566 }
567
SetScannerBehindT()568 void Scanner::SetScannerBehindT() {
569 buffer->SetPos( t->pos );
570 NextCh();
571 line = t->line; col = t->col; charPos = t->charPos;
572
573 for ( int i = 0; i < tlen; i++ ) NextCh();
574 }
575
576 // get the next token (possibly a token already seen during peeking)
Scan()577 Token* Scanner::Scan() {
578 if ( tokens->next == nullptr ) {
579 return pt = tokens = NextToken();
580 } else {
581 pt = tokens = tokens->next;
582 return tokens;
583 }
584 }
585
586 // peek for the next token, ignore pragmas
Peek()587 Token* Scanner::Peek() {
588 do {
589 if ( pt->next == nullptr ) {
590 pt->next = NextToken();
591 }
592
593 pt = pt->next;
594 } while ( pt->kind > maxT ); // skip pragmas
595
596 return pt;
597 }
598
599 // make sure that peeking starts at the current scan position
ResetPeek()600 void Scanner::ResetPeek() {
601 pt = tokens;
602 }
603
604 } // namespace
605 } // namespace
606
607
608
609