1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <basiccharclass.hxx>
21 #include <scanner.hxx>
22 #include <sbintern.hxx>
23 #include <runtime.hxx>
24 
25 #include <basic/sberrors.hxx>
26 #include <i18nlangtag/lang.h>
27 #include <svl/zforlist.hxx>
28 #include <rtl/character.hxx>
29 
SbiScanner(const OUString & rBuf,StarBASIC * p)30 SbiScanner::SbiScanner( const OUString& rBuf, StarBASIC* p ) : aBuf( rBuf )
31 {
32     pBasic   = p;
33     nLineIdx = -1;
34     nVal     = 0;
35     eScanType = SbxVARIANT;
36     nErrors  = 0;
37     nBufPos  = 0;
38     nSavedCol1 = 0;
39     nColLock = 0;
40     nLine    = 0;
41     nCol1    = 0;
42     nCol2    = 0;
43     nCol     = 0;
44     bError   =
45     bAbort   =
46     bSpaces  =
47     bNumber  =
48     bSymbol  =
49     bCompatible =
50     bVBASupportOn =
51     bInStatement =
52     bPrevLineExtentsComment = false;
53     bHash    = true;
54     nSaveLineIdx = -1;
55 }
56 
LockColumn()57 void SbiScanner::LockColumn()
58 {
59     if( !nColLock++ )
60         nSavedCol1 = nCol1;
61 }
62 
UnlockColumn()63 void SbiScanner::UnlockColumn()
64 {
65     if( nColLock )
66         nColLock--;
67 }
68 
GenError(ErrCode code)69 void SbiScanner::GenError( ErrCode code )
70 {
71     if( GetSbData()->bBlockCompilerError )
72     {
73         bAbort = true;
74         return;
75     }
76     if( !bError )
77     {
78         bool bRes = true;
79         // report only one error per statement
80         bError = true;
81         if( pBasic )
82         {
83             // in case of EXPECTED or UNEXPECTED it always refers
84             // to the last token, so take the Col1 over
85             sal_Int32 nc = nColLock ? nSavedCol1 : nCol1;
86             if ( code.anyOf(
87                     ERRCODE_BASIC_EXPECTED,
88                     ERRCODE_BASIC_UNEXPECTED,
89                     ERRCODE_BASIC_SYMBOL_EXPECTED,
90                     ERRCODE_BASIC_LABEL_EXPECTED) )
91             {
92                     nc = nCol1;
93                     if( nc > nCol2 ) nCol2 = nc;
94             }
95             bRes = pBasic->CError( code, aError, nLine, nc, nCol2 );
96         }
97         bAbort = bAbort || !bRes  || ( code == ERRCODE_BASIC_NO_MEMORY || code == ERRCODE_BASIC_PROG_TOO_LARGE );
98     }
99     nErrors++;
100 }
101 
102 
103 // used by SbiTokenizer::MayBeLabel() to detect a label
DoesColonFollow()104 bool SbiScanner::DoesColonFollow()
105 {
106     if(nCol < aLine.getLength() && aLine[nCol] == ':')
107     {
108         ++nLineIdx; ++nCol;
109         return true;
110     }
111     else
112         return false;
113 }
114 
115 // test for legal suffix
GetSuffixType(sal_Unicode c)116 static SbxDataType GetSuffixType( sal_Unicode c )
117 {
118     switch (c)
119     {
120     case '%':
121         return SbxINTEGER;
122     case '&':
123         return SbxLONG;
124     case '!':
125         return SbxSINGLE;
126     case '#':
127         return SbxDOUBLE;
128     case '@':
129         return SbxCURRENCY;
130     case '$':
131         return SbxSTRING;
132     default:
133         return SbxVARIANT;
134     }
135 }
136 
137 // reading the next symbol into the variables aSym, nVal and eType
138 // return value is sal_False at EOF or errors
139 #define BUF_SIZE 80
140 
scanAlphanumeric()141 void SbiScanner::scanAlphanumeric()
142 {
143     sal_Int32 n = nCol;
144     while(nCol < aLine.getLength() && (BasicCharClass::isAlphaNumeric(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
145     {
146         ++nLineIdx;
147         ++nCol;
148     }
149     aSym = aLine.copy(n, nCol - n);
150 }
151 
scanGoto()152 void SbiScanner::scanGoto()
153 {
154     sal_Int32 n = nCol;
155     while(n < aLine.getLength() && BasicCharClass::isWhitespace(aLine[n]))
156         ++n;
157 
158     if(n + 1 < aLine.getLength())
159     {
160         OUString aTemp = aLine.copy(n, 2);
161         if(aTemp.equalsIgnoreAsciiCase("to"))
162         {
163             aSym = "goto";
164             nLineIdx += n + 2 - nCol;
165             nCol = n + 2;
166         }
167     }
168 }
169 
readLine()170 bool SbiScanner::readLine()
171 {
172     if(nBufPos >= aBuf.getLength())
173         return false;
174 
175     sal_Int32 n = nBufPos;
176     sal_Int32 nLen = aBuf.getLength();
177 
178     while(n < nLen && aBuf[n] != '\r' && aBuf[n] != '\n')
179         ++n;
180 
181     // Trim trailing whitespace
182     sal_Int32 nEnd = n;
183     while(nBufPos < nEnd && BasicCharClass::isWhitespace(aBuf[nEnd - 1]))
184         --nEnd;
185 
186     aLine = aBuf.copy(nBufPos, nEnd - nBufPos);
187 
188     // Fast-forward past the line ending
189     if(n + 1 < nLen && aBuf[n] == '\r' && aBuf[n + 1] == '\n')
190         n += 2;
191     else if(n < nLen)
192         ++n;
193 
194     nBufPos = n;
195     nLineIdx = 0;
196 
197     ++nLine;
198     nCol = nCol1 = nCol2 = 0;
199     nColLock = 0;
200 
201     return true;
202 }
203 
NextSym()204 bool SbiScanner::NextSym()
205 {
206     // memorize for the EOLN-case
207     sal_Int32 nOldLine = nLine;
208     sal_Int32 nOldCol1 = nCol1;
209     sal_Int32 nOldCol2 = nCol2;
210     sal_Unicode buf[ BUF_SIZE ], *p = buf;
211 
212     eScanType = SbxVARIANT;
213     aSym.clear();
214     bHash = bSymbol = bNumber = bSpaces = false;
215     bool bCompilerDirective = false;
216 
217     // read in line?
218     if (nLineIdx == -1)
219     {
220         if(!readLine())
221             return false;
222 
223         nOldLine = nLine;
224         nOldCol1 = nOldCol2 = 0;
225     }
226 
227     const sal_Int32 nLineIdxScanStart = nLineIdx;
228 
229     if(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
230     {
231         bSpaces = true;
232         while(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
233         {
234             ++nLineIdx;
235             ++nCol;
236         }
237     }
238 
239     nCol1 = nCol;
240 
241     // only blank line?
242     if(nCol >= aLine.getLength())
243         goto eoln;
244 
245     if( bPrevLineExtentsComment )
246         goto PrevLineCommentLbl;
247 
248     if(nCol < aLine.getLength() && aLine[nCol] == '#')
249     {
250         sal_Int32 nLineTempIdx = nLineIdx;
251         do
252         {
253             nLineTempIdx++;
254         } while (nLineTempIdx < aLine.getLength() && !BasicCharClass::isWhitespace(aLine[nLineTempIdx])
255             && aLine[nLineTempIdx] != '#' && aLine[nLineTempIdx] != ',');
256         // leave it if it is a date literal - it will be handled later
257         if (nLineTempIdx >= aLine.getLength() || aLine[nLineTempIdx] != '#')
258         {
259             ++nLineIdx;
260             ++nCol;
261             //ignore compiler directives (# is first non-space character)
262             if (nOldCol2 == 0)
263                 bCompilerDirective = true;
264             else
265                 bHash = true;
266         }
267     }
268 
269     // copy character if symbol
270     if(nCol < aLine.getLength() && (BasicCharClass::isAlpha(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
271     {
272         // if there's nothing behind '_' , it's the end of a line!
273         if(nCol + 1 == aLine.getLength() && aLine[nCol] == '_')
274         {
275             // Note that nCol is not incremented here...
276             ++nLineIdx;
277             goto eoln;
278         }
279 
280         bSymbol = true;
281 
282         scanAlphanumeric();
283 
284         // Special handling for "go to"
285         if(nCol < aLine.getLength() && bCompatible && aSym.equalsIgnoreAsciiCase("go"))
286             scanGoto();
287 
288         // replace closing '_' by space when end of line is following
289         // (wrong line continuation otherwise)
290         if (nCol == aLine.getLength() && aLine[nCol - 1] == '_')
291         {
292             // We are going to modify a potentially shared string, so force
293             // a copy, so that aSym is not modified by the following operation
294             OUString aSymCopy( aSym.getStr(), aSym.getLength() );
295             aSym = aSymCopy;
296 
297             // HACK: modifying a potentially shared string here!
298             const_cast<sal_Unicode*>(aLine.getStr())[nLineIdx - 1] = ' ';
299         }
300 
301         // type recognition?
302         // don't test the exclamation mark
303         // if there's a symbol behind it
304         else if((nCol >= aLine.getLength() || aLine[nCol] != '!') ||
305                 (nCol + 1 >= aLine.getLength() || !BasicCharClass::isAlpha(aLine[nCol + 1], bCompatible)))
306         {
307             if(nCol < aLine.getLength())
308             {
309                 SbxDataType t(GetSuffixType(aLine[nCol]));
310                 if( t != SbxVARIANT )
311                 {
312                     eScanType = t;
313                     ++nLineIdx;
314                     ++nCol;
315                 }
316             }
317         }
318     }
319 
320     // read in and convert if number
321     else if((nCol < aLine.getLength() && rtl::isAsciiDigit(aLine[nCol])) ||
322             (nCol + 1 < aLine.getLength() && aLine[nCol] == '.' && rtl::isAsciiDigit(aLine[nCol + 1])))
323     {
324         short exp = 0;
325         short dec = 0;
326         eScanType = SbxDOUBLE;
327         bool bScanError = false;
328         bool bBufOverflow = false;
329         // All this because of 'D' or 'd' floating point type, sigh...
330         while(!bScanError && nCol < aLine.getLength() && strchr("0123456789.DEde", aLine[nCol]))
331         {
332             // from 4.1.1996: buffer full? -> go on scanning empty
333             if( (p-buf) == (BUF_SIZE-1) )
334             {
335                 bBufOverflow = true;
336                 ++nLineIdx;
337                 ++nCol;
338                 continue;
339             }
340             // point or exponent?
341             if(aLine[nCol] == '.')
342             {
343                 if( ++dec > 1 )
344                     bScanError = true;
345                 else
346                     *p++ = '.';
347             }
348             else if(strchr("DdEe", aLine[nCol]))
349             {
350                 if (++exp > 1)
351                     bScanError = true;
352                 else
353                 {
354                     *p++ = 'E';
355                     if (nCol + 1 < aLine.getLength() && (aLine[nCol+1] == '+' || aLine[nCol+1] == '-'))
356                     {
357                         ++nLineIdx;
358                         ++nCol;
359                         if( (p-buf) == (BUF_SIZE-1) )
360                         {
361                             bBufOverflow = true;
362                             continue;
363                         }
364                         *p++ = aLine[nCol];
365                     }
366                 }
367             }
368             else
369             {
370                 *p++ = aLine[nCol];
371             }
372             ++nLineIdx;
373             ++nCol;
374         }
375         *p = 0;
376         aSym = p; bNumber = true;
377 
378         // For bad characters, scan and parse errors generate only one error.
379         ErrCode nError = ERRCODE_NONE;
380         if (bScanError)
381         {
382             --nLineIdx;
383             --nCol;
384             aError = OUString( aLine[nCol]);
385             nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
386         }
387 
388         rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
389         const sal_Unicode* pParseEnd = buf;
390         nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', &eStatus, &pParseEnd );
391         if (pParseEnd != buf+(p-buf))
392         {
393             // e.g. "12e" or "12e+", or with bScanError "12d"+"E".
394             sal_Int32 nChars = buf+(p-buf) - pParseEnd;
395             nLineIdx -= nChars;
396             nCol -= nChars;
397             // For bScanError, nLineIdx and nCol were already decremented, just
398             // add that character to the parse end.
399             if (bScanError)
400                 ++nChars;
401             // Copy error position from original string, not the buffer
402             // replacement where "12dE" => "12EE".
403             aError = aLine.copy( nCol, nChars);
404             nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
405         }
406         else if (eStatus != rtl_math_ConversionStatus_Ok)
407         {
408             // Keep the scan error and character at position, if any.
409             if (!nError)
410                 nError = ERRCODE_BASIC_MATH_OVERFLOW;
411         }
412 
413         if (nError)
414             GenError( nError );
415 
416         if( !dec && !exp )
417         {
418             if( nVal >= SbxMININT && nVal <= SbxMAXINT )
419                 eScanType = SbxINTEGER;
420             else if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG )
421                     eScanType = SbxLONG;
422         }
423 
424         if( bBufOverflow )
425             GenError( ERRCODE_BASIC_MATH_OVERFLOW );
426 
427         // type recognition?
428         if( nCol < aLine.getLength() )
429         {
430             SbxDataType t(GetSuffixType(aLine[nCol]));
431             if( t != SbxVARIANT )
432             {
433                 eScanType = t;
434                 ++nLineIdx;
435                 ++nCol;
436             }
437        }
438     }
439 
440     // Hex/octal number? Read in and convert:
441     else if(aLine.getLength() - nCol > 1 && aLine[nCol] == '&')
442     {
443         ++nLineIdx; ++nCol;
444         sal_Unicode base = 16;
445         sal_Unicode xch  = aLine[nCol];
446         ++nLineIdx; ++nCol;
447         switch( rtl::toAsciiUpperCase( xch ) )
448         {
449             case 'O':
450                 base = 8;
451                 break;
452             case 'H':
453                 break;
454             default :
455                 // treated as an operator
456                 --nLineIdx; --nCol; nCol1 = nCol-1;
457                 aSym = "&";
458                 return true;
459         }
460         bNumber = true;
461         // Hex literals are signed Integers ( as defined by basic
462         // e.g. -2,147,483,648 through 2,147,483,647 (signed)
463         sal_uInt64 lu = 0;
464         bool bOverflow = false;
465         while(nCol < aLine.getLength() && BasicCharClass::isAlphaNumeric(aLine[nCol], false))
466         {
467             sal_Unicode ch = rtl::toAsciiUpperCase(aLine[nCol]);
468             ++nLineIdx; ++nCol;
469             if( ((base == 16 ) && rtl::isAsciiHexDigit( ch ) ) ||
470                      ((base == 8) && rtl::isAsciiOctalDigit( ch )))
471             {
472                 int i = ch  - '0';
473                 if( i > 9 ) i -= 7;
474                 lu = ( lu * base ) + i;
475                 if( lu > SAL_MAX_UINT32 )
476                 {
477                     bOverflow = true;
478                 }
479             }
480             else
481             {
482                 aError = OUString(ch);
483                 GenError( ERRCODE_BASIC_BAD_CHAR_IN_NUMBER );
484             }
485         }
486         if(nCol < aLine.getLength() && aLine[nCol] == '&')
487         {
488             ++nLineIdx;
489             ++nCol;
490         }
491         // tdf#62326 - If the value of the hex string lies within the range of 0x8000 (SbxMAXINT + 1)
492         // and 0xFFFF (SbxMAXUINT) inclusive, cast the value to 16 bit in order to get
493         // signed integers, e.g., SbxMININT through SbxMAXINT
494         sal_Int32 ls = (lu > SbxMAXINT && lu <= SbxMAXUINT) ? static_cast<sal_Int16>(lu) : static_cast<sal_Int32>(lu);
495         nVal = static_cast<double>(ls);
496         eScanType = ( ls >= SbxMININT && ls <= SbxMAXINT ) ? SbxINTEGER : SbxLONG;
497         if( bOverflow )
498             GenError( ERRCODE_BASIC_MATH_OVERFLOW );
499     }
500 
501     // Strings:
502     else if (nLineIdx < aLine.getLength() && (aLine[nLineIdx] == '"' || aLine[nLineIdx] == '['))
503     {
504         sal_Unicode cSep = aLine[nLineIdx];
505         if( cSep == '[' )
506         {
507             bSymbol = true;
508             cSep = ']';
509         }
510         sal_Int32 n = nCol + 1;
511         while (nLineIdx < aLine.getLength())
512         {
513             do
514             {
515                 nLineIdx++;
516                 nCol++;
517             }
518             while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != cSep));
519             if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == cSep)
520             {
521                 nLineIdx++; nCol++;
522                 if (nLineIdx >= aLine.getLength() || aLine[nLineIdx] != cSep || cSep == ']')
523                 {
524                     // If VBA Interop then doesn't eat the [] chars
525                     if ( cSep == ']' && bVBASupportOn )
526                         aSym = aLine.copy( n - 1, nCol - n  + 1);
527                     else
528                         aSym = aLine.copy( n, nCol - n - 1 );
529                     // get out duplicate string delimiters
530                     OUStringBuffer aSymBuf(aSym.getLength());
531                     for ( sal_Int32 i = 0, len = aSym.getLength(); i < len; ++i )
532                     {
533                         aSymBuf.append( aSym[i] );
534                         if ( aSym[i] == cSep && ( i+1 < len ) && aSym[i+1] == cSep )
535                             ++i;
536                     }
537                     aSym = aSymBuf.makeStringAndClear();
538                     if( cSep != ']' )
539                         eScanType = SbxSTRING;
540                     break;
541                 }
542             }
543             else
544             {
545                 aError = OUString(cSep);
546                 GenError( ERRCODE_BASIC_EXPECTED );
547             }
548         }
549     }
550 
551     // Date:
552     else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
553     {
554         sal_Int32 n = nCol + 1;
555         do
556         {
557             nLineIdx++;
558             nCol++;
559         }
560         while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != '#'));
561         if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
562         {
563             nLineIdx++; nCol++;
564             aSym = aLine.copy( n, nCol - n - 1 );
565 
566             // parse date literal
567             std::shared_ptr<SvNumberFormatter> pFormatter;
568             if (GetSbData()->pInst)
569             {
570                 pFormatter = GetSbData()->pInst->GetNumberFormatter();
571             }
572             else
573             {
574                 sal_uInt32 nDummy;
575                 pFormatter = SbiInstance::PrepareNumberFormatter( nDummy, nDummy, nDummy );
576             }
577             sal_uInt32 nIndex = pFormatter->GetStandardIndex( LANGUAGE_ENGLISH_US);
578             bool bSuccess = pFormatter->IsNumberFormat(aSym, nIndex, nVal);
579             if( bSuccess )
580             {
581                 SvNumFormatType nType_ = pFormatter->GetType(nIndex);
582                 if( !(nType_ & SvNumFormatType::DATE) )
583                     bSuccess = false;
584             }
585 
586             if (!bSuccess)
587                 GenError( ERRCODE_BASIC_CONVERSION );
588 
589             bNumber = true;
590             eScanType = SbxDOUBLE;
591         }
592         else
593         {
594             aError = OUString('#');
595             GenError( ERRCODE_BASIC_EXPECTED );
596         }
597     }
598     // invalid characters:
599     else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] >= 0x7F)
600     {
601         GenError( ERRCODE_BASIC_SYNTAX ); nLineIdx++; nCol++;
602     }
603     // other groups:
604     else
605     {
606         sal_Int32 n = 1;
607         auto nChar = nLineIdx < aLine.getLength() ? aLine[nLineIdx] : 0;
608         ++nLineIdx;
609         if (nLineIdx < aLine.getLength())
610         {
611             switch (nChar)
612             {
613                 case '<': if( aLine[nLineIdx] == '>' || aLine[nLineIdx] == '=' ) n = 2; break;
614                 case '>': if( aLine[nLineIdx] == '=' ) n = 2; break;
615                 case ':': if( aLine[nLineIdx] == '=' ) n = 2; break;
616             }
617         }
618         aSym = aLine.copy(nCol, std::min(n, aLine.getLength() - nCol));
619         nLineIdx += n-1; nCol = nCol + n;
620     }
621 
622     nCol2 = nCol-1;
623 
624 PrevLineCommentLbl:
625 
626     if( bPrevLineExtentsComment || (eScanType != SbxSTRING &&
627                                     ( bCompilerDirective ||
628                                       aSym.startsWith("'") ||
629                                       aSym.equalsIgnoreAsciiCase( "REM" ) ) ) )
630     {
631         bPrevLineExtentsComment = false;
632         aSym = "REM";
633         sal_Int32 nLen = aLine.getLength() - nLineIdx;
634         if( bCompatible && aLine[nLineIdx + nLen - 1] == '_' && aLine[nLineIdx + nLen - 2] == ' ' )
635             bPrevLineExtentsComment = true;
636         nCol2 = nCol2 + nLen;
637         nLineIdx = -1;
638     }
639 
640     if (nLineIdx == nLineIdxScanStart)
641     {
642         GenError( ERRCODE_BASIC_SYMBOL_EXPECTED );
643         return false;
644     }
645 
646     return true;
647 
648 
649 eoln:
650     if( nCol && aLine[--nLineIdx] == '_' )
651     {
652         nLineIdx = -1;
653         bool bRes = NextSym();
654         if( aSym.startsWith(".") )
655         {
656             // object _
657             //    .Method
658             // ^^^  <- spaces is legal in MSO VBA
659             bSpaces = false;
660         }
661         return bRes;
662     }
663     else
664     {
665         nLineIdx = -1;
666         nLine = nOldLine;
667         nCol1 = nOldCol1;
668         nCol2 = nOldCol2;
669         aSym = "\n";
670         nColLock = 0;
671         return true;
672     }
673 }
674 
675 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
676