1 /***************************************************************************
2  *  aGrUM modified frames and atg files for cocoR
3  *   Copyright (c) 2005-2021 by Christophe GONZALES(_at_AMU) and Pierre-Henri WUILLEMIN(_at_LIP6)  *
4  *   info_at_agrum_dot_org
5 ***************************************************************************/
6 /*----------------------------------------------------------------------
7 Compiler Generator Coco/R,
8 Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
9 extended by M. Loeberbauer & A. Woess, Univ. of Linz
10 ported to C++ by Csaba Balazs, University of Szeged
11 with improvements by Pat Terry, Rhodes University
12 
13 This program is free software; you can redistribute it and/or modify it
14 under the terms of the GNU General Public License as published by the
15 Free Software Foundation; either version 2, or (at your option) any
16 later version.
17 
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
21 for more details.
22 
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 
27 As an exception, it is allowed to write an extension of Coco/R that is
28 used as a plugin in non-free software.
29 
30 If not otherwise stated, any source code generated by Coco/R (other than
31 Coco/R itself) does not fall under the GNU General Public License.
32 -----------------------------------------------------------------------*/
33 
34 
35 #include <memory.h>
36 #include <string.h>
37 #include "Scanner.h"
38 #include <agrum/tools/core/cocoR/common.h>
39 
40    namespace gum {
41 namespace formula {
42 
43 
Token()44 Token::Token() {
45   kind = 0;
46   pos  = 0;
47   col  = 0;
48   line = 0;
49   val  = nullptr;
50   next = nullptr;
51 }
52 
~Token()53 Token::~Token() {
54   coco_string_delete( val );
55 }
56 
Buffer(FILE * s,bool isUserStream)57 Buffer::Buffer( FILE* s, bool isUserStream ) {
58 // ensure binary read on windows
59 #if _MSC_VER >= 1300
60   _setmode( _fileno( s ), _O_BINARY );
61 #endif
62   stream = s; this->isUserStream = isUserStream;
63 
64   if ( CanSeek() ) {
65     fseek( s, 0, SEEK_END );
66     fileLen = ftell( s );
67     fseek( s, 0, SEEK_SET );
68     bufLen = ( fileLen < MAX_BUFFER_LENGTH ) ? fileLen : MAX_BUFFER_LENGTH;
69     bufStart = INT_MAX; // nothing in the buffer so far
70   } else {
71     fileLen = bufLen = bufStart = 0;
72   }
73 
74   bufCapacity = ( bufLen>0 ) ? bufLen : MIN_BUFFER_LENGTH;
75   buf = new unsigned char[bufCapacity];
76 
77   if ( fileLen > 0 ) SetPos( 0 );      // setup  buffer to position 0 (start)
78   else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid
79 
80   if ( bufLen == fileLen && CanSeek() ) Close();
81 }
82 
Buffer(Buffer * b)83 Buffer::Buffer( Buffer* b ) {
84   buf = b->buf;
85   bufCapacity = b->bufCapacity;
86   b->buf = nullptr;
87   bufStart = b->bufStart;
88   bufLen = b->bufLen;
89   fileLen = b->fileLen;
90   bufPos = b->bufPos;
91   stream = b->stream;
92   b->stream = nullptr;
93   isUserStream = b->isUserStream;
94 }
95 
Buffer(const unsigned char * buf,int len)96 Buffer::Buffer( const unsigned char* buf, int len ) {
97   this->isUserStream = false;
98   this->buf = new unsigned char[len];
99   memcpy( this->buf, buf, len*sizeof( unsigned char ) );
100   bufStart = 0;
101   bufCapacity = bufLen = len;
102   fileLen = len;
103   bufPos = 0;
104   stream = nullptr;
105 }
106 
~Buffer()107 Buffer::~Buffer() {
108   Close();
109 
110   if ( buf != nullptr ) {
111     delete [] buf;
112     buf = nullptr;
113   }
114 }
115 
Close()116 void Buffer::Close() {
117   if ( !isUserStream && stream != nullptr ) {
118     fclose( stream );
119     stream = nullptr;
120   }
121 }
122 
GetPercent()123 int Buffer::GetPercent() {
124   return ( int )( ( 100.0*GetPos() )/fileLen );
125 }
126 
Read()127 int Buffer::Read() {
128   if ( bufPos < bufLen ) {
129     return buf[bufPos++];
130   } else if ( GetPos() < fileLen ) {
131 
132     SetPos( GetPos() ); // shift buffer start to Pos
133     return buf[bufPos++];
134   } else if ( ( stream != nullptr ) && !CanSeek() && ( ReadNextStreamChunk() > 0 ) ) {
135     return buf[bufPos++];
136   } else {
137     return EoF;
138   }
139 }
140 
Peek()141 int Buffer::Peek() {
142   int curPos = GetPos();
143   int ch = Read();
144   SetPos( curPos );
145   return ch;
146 }
147 
148 // beg .. begin, zero-based, inclusive, in byte
149 // end .. end, zero-based, exclusive, in byte
GetString(int beg,int end)150 wchar_t* Buffer::GetString( int beg, int end ) {
151   int len = 0;
152   wchar_t* buf = new wchar_t[end - beg];
153   int oldPos = GetPos();
154   SetPos( beg );
155 
156   while ( GetPos() < end ) buf[len++] = ( wchar_t ) Read();
157 
158   SetPos( oldPos );
159   wchar_t* res = coco_string_create( buf, 0, len );
160   coco_string_delete( buf );
161   return res;
162 }
163 
GetPos()164 int Buffer::GetPos() {
165   return bufPos + bufStart;
166 }
167 
SetPos(int value)168 void Buffer::SetPos( int value ) {
169   if ( ( value >= fileLen ) && ( stream != nullptr ) && !CanSeek() ) {
170     // Wanted position is after buffer and the stream
171     // is not seek-able e.g. network or console,
172     // thus we have to read the stream manually till
173     // the wanted position is in sight.
174     while ( ( value >= fileLen ) && ( ReadNextStreamChunk() > 0 ) );
175   }
176 
177   if ( ( value < 0 ) || ( value > fileLen ) ) {
178     wprintf( L"--- buffer out of bounds access, position: %d\n", value );
179     exit( 1 );
180   }
181 
182   if ( ( value >= bufStart ) && ( value < ( bufStart + bufLen ) ) ) { // already in buffer
183     bufPos = value - bufStart;
184   } else if ( stream != nullptr ) { // must be swapped in
185     fseek( stream, value, SEEK_SET );
186     bufLen = (int)fread( buf, int(sizeof( unsigned char )), bufCapacity, stream );
187     bufStart = value; bufPos = 0;
188   } else {
189     bufPos = fileLen - bufStart; // make Pos return fileLen
190   }
191 }
192 
193 // Read the next chunk of bytes from the stream, increases the buffer
194 // if needed and updates the fields fileLen and bufLen.
195 // Returns the number of bytes read.
ReadNextStreamChunk()196 int Buffer::ReadNextStreamChunk() {
197   int free = bufCapacity - bufLen;
198 
199   if ( free == 0 ) {
200     // in the case of a growing input stream
201     // we can neither seek in the stream, nor can we
202     // foresee the maximum length, thus we must adapt
203     // the buffer size on demand.
204     bufCapacity = bufLen * 2;
205     unsigned char* newBuf = new unsigned char[bufCapacity];
206     memcpy( newBuf, buf, bufLen*sizeof( unsigned char ) );
207     delete [] buf;
208     buf = newBuf;
209     free = bufLen;
210   }
211 
212   int read = (int)fread( buf + bufLen, int(sizeof( unsigned char )), free, stream );
213 
214   if ( read > 0 ) {
215     fileLen = bufLen = ( bufLen + read );
216     return read;
217   }
218 
219   // end of stream reached
220   return 0;
221 }
222 
CanSeek()223 bool Buffer::CanSeek() {
224   return ( stream != nullptr ) && ( ftell( stream ) != -1 );
225 }
226 
Read()227 int UTF8Buffer::Read() {
228   int ch;
229 
230   do {
231     ch = Buffer::Read();
232     // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
233   } while ( ( ch >= 128 ) && ( ( ch & 0xC0 ) != 0xC0 ) && ( ch != EoF ) );
234 
235   if ( ch < 128 || ch == EoF ) {
236     // nothing to do, first 127 chars are the same in ascii and utf8
237     // 0xxxxxxx or end of file character
238   } else if ( ( ch & 0xF0 ) == 0xF0 ) {
239     // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
240     int c1 = ch & 0x07; ch = Buffer::Read();
241     int c2 = ch & 0x3F; ch = Buffer::Read();
242     int c3 = ch & 0x3F; ch = Buffer::Read();
243     int c4 = ch & 0x3F;
244     ch = ( ( ( ( ( c1 << 6 ) | c2 ) << 6 ) | c3 ) << 6 ) | c4;
245   } else if ( ( ch & 0xE0 ) == 0xE0 ) {
246     // 1110xxxx 10xxxxxx 10xxxxxx
247     int c1 = ch & 0x0F; ch = Buffer::Read();
248     int c2 = ch & 0x3F; ch = Buffer::Read();
249     int c3 = ch & 0x3F;
250     ch = ( ( ( c1 << 6 ) | c2 ) << 6 ) | c3;
251   } else if ( ( ch & 0xC0 ) == 0xC0 ) {
252     // 110xxxxx 10xxxxxx
253     int c1 = ch & 0x1F; ch = Buffer::Read();
254     int c2 = ch & 0x3F;
255     ch = ( c1 << 6 ) | c2;
256   }
257 
258   return ch;
259 }
260 
Scanner(const unsigned char * buf,int len,std::string filename,bool trace)261 Scanner::Scanner(const unsigned char* buf, int len, std::string filename, bool trace) {
262   buffer = new Buffer( buf, len );
263    _filenamne_=widen( filename.c_str() );
264    _trace_=trace;
265   Init();
266 }
267 
Scanner(const char * fileName,bool trace)268 Scanner::Scanner( const char* fileName,bool trace ) {
269   Load( widen( std::string( fileName ) ).c_str() );
270    _trace_=trace;
271 }
272 
Scanner(const wchar_t * fileName,bool trace)273 Scanner::Scanner( const wchar_t* fileName,bool trace ) {
274   Load( fileName );
275    _trace_=trace;
276 }
277 
Load(const wchar_t * fileName)278 void Scanner::Load( const wchar_t* fileName ) {
279   FILE* stream;
280   char* chFileName = coco_string_create_char( fileName );
281 
282   if ( ( stream = fopen( chFileName, "rb" ) ) == nullptr ) {
283     std::string s( "No such file : " ); s+=chFileName;
284     GUM_ERROR( gum::IOError,s )
285   }
286 
287   coco_string_delete( chFileName );
288   buffer = new Buffer( stream, false );
289    _filenamne_=std::wstring( fileName );
290   Init();
291 }
292 
Scanner(FILE * s,bool trace)293 Scanner::Scanner( FILE* s,bool trace ) {
294   buffer = new Buffer( s, true );
295    _filenamne_=L"FILE";
296   Init();
297    _trace_=trace;
298 }
299 
~Scanner()300 Scanner::~Scanner() {
301   char* cur = ( char* ) firstHeap;
302 
303   while ( cur != nullptr ) {
304     cur = *( char** )( cur + HEAP_BLOCK_SIZE );
305     free( firstHeap );
306     firstHeap = cur;
307   }
308 
309   if ( tval ) delete [] tval;
310 
311   if ( buffer ) delete buffer;
312 }
313 
Init()314 void Scanner::Init() {
315   percent=-1;
316   EOL    = '\n';
317   eofSym = 0;
318   	maxT = 10;
319 	noSym = 10;
320 	int i;
321 	for (i = 48; i <= 57; ++i) start.set(i, 7);
322 	for (i = 65; i <= 90; ++i) start.set(i, 6);
323 	for (i = 95; i <= 95; ++i) start.set(i, 6);
324 	for (i = 97; i <= 122; ++i) start.set(i, 6);
325 	start.set(43, 8);
326 	start.set(45, 9);
327 	for (i = 42; i <= 42; ++i) start.set(i, 4);
328 	for (i = 47; i <= 47; ++i) start.set(i, 4);
329 	for (i = 60; i <= 60; ++i) start.set(i, 4);
330 	for (i = 62; i <= 62; ++i) start.set(i, 4);
331 	for (i = 94; i <= 94; ++i) start.set(i, 4);
332 	start.set(10, 5);
333 	start.set(40, 13);
334 	start.set(41, 14);
335 	start.set(44, 15);
336 		start.set(Buffer::EoF, -1);
337 
338 
339   tvalLength = 128;
340   tval = new wchar_t[tvalLength]; // text of current token
341 
342   // HEAP_BLOCK_SIZE byte heap + pointer to next heap block
343   heap = malloc( HEAP_BLOCK_SIZE + sizeof( void* ) );
344   firstHeap = heap;
345   heapEnd = ( void** )( ( ( char* ) heap ) + HEAP_BLOCK_SIZE );
346   *heapEnd = 0;
347   heapTop = heap;
348 
349   if ( sizeof( Token ) > HEAP_BLOCK_SIZE ) {
350     wprintf( L"--- Too small HEAP_BLOCK_SIZE\n" );
351     exit( 1 );
352   }
353 
354   pos = -1; line = 1; col = 0; charPos = -1;
355   oldEols = 0;
356   NextCh();
357 
358   if ( ch == 0xEF ) { // check optional byte order mark for UTF-8
359     NextCh(); int ch1 = ch;
360     NextCh(); int ch2 = ch;
361 
362     if ( ch1 != 0xBB || ch2 != 0xBF ) {
363       wprintf( L"Illegal byte order mark at start of file" );
364       exit( 1 );
365     }
366 
367     Buffer* oldBuf = buffer;
368     buffer = new UTF8Buffer( buffer ); col = 0; charPos = -1;
369     delete oldBuf; oldBuf = nullptr;
370     NextCh();
371   }
372 
373 
374   pt = tokens = CreateToken(); // first token is a dummy
375 }
376 
NextCh()377 void Scanner::NextCh() {
378   if ( oldEols > 0 ) { ch = EOL; oldEols--; }
379   else {
380     pos = buffer->GetPos();
381     ch = buffer->Read();
382     int p=buffer->GetPercent();
383 
384     if ( ch==Buffer::EoF ) {
385       GUM_EMIT1( onLoad,200 );
386     } else {
387       if ( percent<p ) {
388         percent=p;
389         GUM_EMIT1( onLoad,percent );
390       }
391     }
392 
393     col++; charPos++;
394 
395     // replace isolated '\r' by '\n' in order to make
396     // eol handling uniform across Windows, Unix and Mac
397     if ( ch == L'\r' && buffer->Peek() != L'\n' ) ch = EOL;
398 
399     if ( ch == EOL ) { /*if ( _trace_) std::cout<<line<<std::endl;*/ line++; col = 0; }
400   }
401 
402 
403 }
404 
AddCh()405 void Scanner::AddCh() {
406   if ( tlen >= tvalLength ) {
407     tvalLength *= 2;
408     wchar_t* newBuf = new wchar_t[tvalLength];
409     memcpy( newBuf, tval, tlen*sizeof( wchar_t ) );
410     delete [] tval;
411     tval = newBuf;
412   }
413 
414   if ( ch != Buffer::EoF ) {
415     		tval[tlen++] = ch;
416     NextCh();
417   }
418 }
419 
420 
421 
CreateHeapBlock()422 void Scanner::CreateHeapBlock() {
423   void* newHeap;
424   char* cur = ( char* ) firstHeap;
425 
426   while ( ( ( char* ) tokens < cur ) || ( ( char* ) tokens > ( cur + HEAP_BLOCK_SIZE ) ) ) {
427     cur = *( ( char** )( cur + HEAP_BLOCK_SIZE ) );
428     free( firstHeap );
429     firstHeap = cur;
430   }
431 
432   // HEAP_BLOCK_SIZE byte heap + pointer to next heap block
433   newHeap = malloc( HEAP_BLOCK_SIZE + sizeof( void* ) );
434   *heapEnd = newHeap;
435   heapEnd = ( void** )( ( ( char* ) newHeap ) + HEAP_BLOCK_SIZE );
436   *heapEnd = 0;
437   heap = newHeap;
438   heapTop = heap;
439 }
440 
CreateToken()441 Token* Scanner::CreateToken() {
442   Token* t;
443 
444   if ( ( ( char* ) heapTop + ( int ) sizeof( Token ) ) >= ( char* ) heapEnd ) {
445     CreateHeapBlock();
446   }
447 
448   t = ( Token* ) heapTop;
449   heapTop = ( void* )( ( char* ) heapTop + sizeof( Token ) );
450   t->val = nullptr;
451   t->next = nullptr;
452   return t;
453 }
454 
AppendVal(Token * t)455 void Scanner::AppendVal( Token* t ) {
456   int reqMem = ( tlen + 1 ) * sizeof( wchar_t );
457 
458   if ( ( ( char* ) heapTop + reqMem ) >= ( char* ) heapEnd ) {
459     if ( reqMem > HEAP_BLOCK_SIZE ) {
460       wprintf( L"--- Too long token value\n" );
461       exit( 1 );
462     }
463 
464     CreateHeapBlock();
465   }
466 
467   t->val = ( wchar_t* ) heapTop;
468   heapTop = ( void* )( ( char* ) heapTop + reqMem );
469 
470   wcsncpy( t->val, tval, tlen );
471   t->val[tlen] = L'\0';
472 }
473 
NextToken()474 Token* Scanner::NextToken() {
475   while ( ch == ' ' ||
476           			(ch >= 9 && ch <= 10) || ch == 13
477         ) NextCh();
478 
479 
480   int recKind = noSym;
481   int recEnd = pos;
482   t = CreateToken();
483   t->pos = pos; t->col = col; t->line = line; t->charPos = charPos;
484   int state = start.state( ch );
485   tlen = 0; AddCh();
486 
487   switch ( state ) {
488     case -1: { t->kind = eofSym; break; } // NextCh already done
489 
490     case 0: {
491 case_0:
492 
493       if ( recKind != noSym ) {
494         tlen = recEnd - t->pos;
495         SetScannerBehindT();
496       }
497 
498       t->kind = recKind; break;
499     } // NextCh already done
500 
501     		case 1:
502 			case_1:
503 			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
504 			else if (ch == L'+' || ch == L'-') {AddCh(); goto case_2;}
505 			else {goto case_0;}
506 		case 2:
507 			case_2:
508 			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
509 			else {goto case_0;}
510 		case 3:
511 			case_3:
512 			recEnd = pos; recKind = 3;
513 			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
514 			else {t->kind = 3; break;}
515 		case 4:
516 			{t->kind = 4; break;}
517 		case 5:
518 			{t->kind = 5; break;}
519 		case 6:
520 			case_6:
521 			recEnd = pos; recKind = 6;
522 			if (ch == L'.' || (ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_6;}
523 			else {t->kind = 6; break;}
524 		case 7:
525 			case_7:
526 			recEnd = pos; recKind = 1;
527 			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_7;}
528 			else if (ch == L'.') {AddCh(); goto case_10;}
529 			else if (ch == L'E' || ch == L'e') {AddCh(); goto case_1;}
530 			else {t->kind = 1; break;}
531 		case 8:
532 			recEnd = pos; recKind = 4;
533 			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_11;}
534 			else {t->kind = 4; break;}
535 		case 9:
536 			recEnd = pos; recKind = 4;
537 			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_11;}
538 			else {t->kind = 4; break;}
539 		case 10:
540 			case_10:
541 			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_12;}
542 			else {goto case_0;}
543 		case 11:
544 			case_11:
545 			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_11;}
546 			else if (ch == L'.') {AddCh(); goto case_10;}
547 			else if (ch == L'E' || ch == L'e') {AddCh(); goto case_1;}
548 			else {goto case_0;}
549 		case 12:
550 			case_12:
551 			recEnd = pos; recKind = 2;
552 			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_12;}
553 			else if (ch == L'E' || ch == L'e') {AddCh(); goto case_1;}
554 			else {t->kind = 2; break;}
555 		case 13:
556 			{t->kind = 7; break;}
557 		case 14:
558 			{t->kind = 8; break;}
559 		case 15:
560 			{t->kind = 9; break;}
561 
562   }
563 
564   AppendVal( t );
565   return t;
566 }
567 
SetScannerBehindT()568 void Scanner::SetScannerBehindT() {
569   buffer->SetPos( t->pos );
570   NextCh();
571   line = t->line; col = t->col; charPos = t->charPos;
572 
573   for ( int i = 0; i < tlen; i++ ) NextCh();
574 }
575 
576 // get the next token (possibly a token already seen during peeking)
Scan()577 Token* Scanner::Scan() {
578   if ( tokens->next == nullptr ) {
579     return pt = tokens = NextToken();
580   } else {
581     pt = tokens = tokens->next;
582     return tokens;
583   }
584 }
585 
586 // peek for the next token, ignore pragmas
Peek()587 Token* Scanner::Peek() {
588   do {
589     if ( pt->next == nullptr ) {
590       pt->next = NextToken();
591     }
592 
593     pt = pt->next;
594   } while ( pt->kind > maxT ); // skip pragmas
595 
596   return pt;
597 }
598 
599 // make sure that peeking starts at the current scan position
ResetPeek()600 void Scanner::ResetPeek() {
601   pt = tokens;
602 }
603 
604 } // namespace
605 } // namespace
606 
607 
608 
609