1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <basiccharclass.hxx>
21 #include <scanner.hxx>
22 #include <sbintern.hxx>
23 #include <runtime.hxx>
24 
25 #include <basic/sberrors.hxx>
26 #include <i18nlangtag/lang.h>
27 #include <svl/zforlist.hxx>
28 #include <rtl/character.hxx>
29 
SbiScanner(const OUString & rBuf,StarBASIC * p)30 SbiScanner::SbiScanner(const OUString& rBuf, StarBASIC* p)
31     : aBuf(rBuf)
32     , nLineIdx(-1)
33     , nSaveLineIdx(-1)
34     , pBasic(p)
35     , eScanType(SbxVARIANT)
36     , nVal(0)
37     , nSavedCol1(0)
38     , nCol(0)
39     , nErrors(0)
40     , nColLock(0)
41     , nBufPos(0)
42     , nLine(0)
43     , nCol1(0)
44     , nCol2(0)
45     , bSymbol(false)
46     , bNumber(false)
47     , bSpaces(false)
48     , bAbort(false)
49     , bHash(true)
50     , bError(false)
51     , bCompatible(false)
52     , bVBASupportOn(false)
53     , bPrevLineExtentsComment(false)
54     , bClosingUnderscore(false)
55     , bInStatement(false)
56 {
57 }
58 
LockColumn()59 void SbiScanner::LockColumn()
60 {
61     if( !nColLock++ )
62         nSavedCol1 = nCol1;
63 }
64 
UnlockColumn()65 void SbiScanner::UnlockColumn()
66 {
67     if( nColLock )
68         nColLock--;
69 }
70 
GenError(ErrCode code)71 void SbiScanner::GenError( ErrCode code )
72 {
73     if( GetSbData()->bBlockCompilerError )
74     {
75         bAbort = true;
76         return;
77     }
78     if( !bError )
79     {
80         bool bRes = true;
81         // report only one error per statement
82         bError = true;
83         if( pBasic )
84         {
85             // in case of EXPECTED or UNEXPECTED it always refers
86             // to the last token, so take the Col1 over
87             sal_Int32 nc = nColLock ? nSavedCol1 : nCol1;
88             if ( code.anyOf(
89                     ERRCODE_BASIC_EXPECTED,
90                     ERRCODE_BASIC_UNEXPECTED,
91                     ERRCODE_BASIC_SYMBOL_EXPECTED,
92                     ERRCODE_BASIC_LABEL_EXPECTED) )
93             {
94                     nc = nCol1;
95                     if( nc > nCol2 ) nCol2 = nc;
96             }
97             bRes = pBasic->CError( code, aError, nLine, nc, nCol2 );
98         }
99         bAbort = bAbort || !bRes  || ( code == ERRCODE_BASIC_NO_MEMORY || code == ERRCODE_BASIC_PROG_TOO_LARGE );
100     }
101     nErrors++;
102 }
103 
104 
105 // used by SbiTokenizer::MayBeLabel() to detect a label
DoesColonFollow()106 bool SbiScanner::DoesColonFollow()
107 {
108     if(nCol < aLine.getLength() && aLine[nCol] == ':')
109     {
110         ++nLineIdx; ++nCol;
111         return true;
112     }
113     else
114         return false;
115 }
116 
117 // test for legal suffix
GetSuffixType(sal_Unicode c)118 static SbxDataType GetSuffixType( sal_Unicode c )
119 {
120     switch (c)
121     {
122     case '%':
123         return SbxINTEGER;
124     case '&':
125         return SbxLONG;
126     case '!':
127         return SbxSINGLE;
128     case '#':
129         return SbxDOUBLE;
130     case '@':
131         return SbxCURRENCY;
132     case '$':
133         return SbxSTRING;
134     default:
135         return SbxVARIANT;
136     }
137 }
138 
139 // reading the next symbol into the variables aSym, nVal and eType
140 // return value is sal_False at EOF or errors
141 #define BUF_SIZE 80
142 
scanAlphanumeric()143 void SbiScanner::scanAlphanumeric()
144 {
145     sal_Int32 n = nCol;
146     while(nCol < aLine.getLength() && (BasicCharClass::isAlphaNumeric(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
147     {
148         ++nLineIdx;
149         ++nCol;
150     }
151     aSym = aLine.copy(n, nCol - n);
152 }
153 
scanGoto()154 void SbiScanner::scanGoto()
155 {
156     sal_Int32 n = nCol;
157     while(n < aLine.getLength() && BasicCharClass::isWhitespace(aLine[n]))
158         ++n;
159 
160     if(n + 1 < aLine.getLength())
161     {
162         OUString aTemp = aLine.copy(n, 2);
163         if(aTemp.equalsIgnoreAsciiCase("to"))
164         {
165             aSym = "goto";
166             nLineIdx += n + 2 - nCol;
167             nCol = n + 2;
168         }
169     }
170 }
171 
readLine()172 bool SbiScanner::readLine()
173 {
174     if(nBufPos >= aBuf.getLength())
175         return false;
176 
177     sal_Int32 n = nBufPos;
178     sal_Int32 nLen = aBuf.getLength();
179 
180     while(n < nLen && aBuf[n] != '\r' && aBuf[n] != '\n')
181         ++n;
182 
183     // Trim trailing whitespace
184     sal_Int32 nEnd = n;
185     while(nBufPos < nEnd && BasicCharClass::isWhitespace(aBuf[nEnd - 1]))
186         --nEnd;
187 
188     aLine = aBuf.copy(nBufPos, nEnd - nBufPos);
189 
190     // Fast-forward past the line ending
191     if(n + 1 < nLen && aBuf[n] == '\r' && aBuf[n + 1] == '\n')
192         n += 2;
193     else if(n < nLen)
194         ++n;
195 
196     nBufPos = n;
197     nLineIdx = 0;
198 
199     ++nLine;
200     nCol = nCol1 = nCol2 = 0;
201     nColLock = 0;
202 
203     return true;
204 }
205 
NextSym()206 bool SbiScanner::NextSym()
207 {
208     // memorize for the EOLN-case
209     sal_Int32 nOldLine = nLine;
210     sal_Int32 nOldCol1 = nCol1;
211     sal_Int32 nOldCol2 = nCol2;
212     sal_Unicode buf[ BUF_SIZE ], *p = buf;
213 
214     eScanType = SbxVARIANT;
215     aSym.clear();
216     bHash = bSymbol = bNumber = bSpaces = false;
217     bool bCompilerDirective = false;
218 
219     // read in line?
220     if (nLineIdx == -1)
221     {
222         if(!readLine())
223             return false;
224 
225         nOldLine = nLine;
226         nOldCol1 = nOldCol2 = 0;
227     }
228 
229     const sal_Int32 nLineIdxScanStart = nLineIdx;
230 
231     if(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
232     {
233         bSpaces = true;
234         while(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
235         {
236             ++nLineIdx;
237             ++nCol;
238         }
239     }
240 
241     nCol1 = nCol;
242 
243     // only blank line?
244     if(nCol >= aLine.getLength())
245         goto eoln;
246 
247     if( bPrevLineExtentsComment )
248         goto PrevLineCommentLbl;
249 
250     if(nCol < aLine.getLength() && aLine[nCol] == '#')
251     {
252         sal_Int32 nLineTempIdx = nLineIdx;
253         do
254         {
255             nLineTempIdx++;
256         } while (nLineTempIdx < aLine.getLength() && !BasicCharClass::isWhitespace(aLine[nLineTempIdx])
257             && aLine[nLineTempIdx] != '#' && aLine[nLineTempIdx] != ',');
258         // leave it if it is a date literal - it will be handled later
259         if (nLineTempIdx >= aLine.getLength() || aLine[nLineTempIdx] != '#')
260         {
261             ++nLineIdx;
262             ++nCol;
263             //ignore compiler directives (# is first non-space character)
264             if (nOldCol2 == 0)
265                 bCompilerDirective = true;
266             else
267                 bHash = true;
268         }
269     }
270 
271     // copy character if symbol
272     if(nCol < aLine.getLength() && (BasicCharClass::isAlpha(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
273     {
274         // if there's nothing behind '_' , it's the end of a line!
275         if(nCol + 1 == aLine.getLength() && aLine[nCol] == '_')
276         {
277             // Note that nCol is not incremented here...
278             ++nLineIdx;
279             goto eoln;
280         }
281 
282         bSymbol = true;
283 
284         scanAlphanumeric();
285 
286         // Special handling for "go to"
287         if(nCol < aLine.getLength() && bCompatible && aSym.equalsIgnoreAsciiCase("go"))
288             scanGoto();
289 
290         // tdf#125637 - check for closing underscore
291         if (nCol == aLine.getLength() && aLine[nCol - 1] == '_')
292         {
293             bClosingUnderscore = true;
294         }
295         // type recognition?
296         // don't test the exclamation mark
297         // if there's a symbol behind it
298         else if((nCol >= aLine.getLength() || aLine[nCol] != '!') ||
299                 (nCol + 1 >= aLine.getLength() || !BasicCharClass::isAlpha(aLine[nCol + 1], bCompatible)))
300         {
301             if(nCol < aLine.getLength())
302             {
303                 SbxDataType t(GetSuffixType(aLine[nCol]));
304                 if( t != SbxVARIANT )
305                 {
306                     eScanType = t;
307                     ++nLineIdx;
308                     ++nCol;
309                 }
310             }
311         }
312     }
313 
314     // read in and convert if number
315     else if((nCol < aLine.getLength() && rtl::isAsciiDigit(aLine[nCol])) ||
316             (nCol + 1 < aLine.getLength() && aLine[nCol] == '.' && rtl::isAsciiDigit(aLine[nCol + 1])))
317     {
318         short exp = 0;
319         short dec = 0;
320         eScanType = SbxDOUBLE;
321         bool bScanError = false;
322         bool bBufOverflow = false;
323         // All this because of 'D' or 'd' floating point type, sigh...
324         while(!bScanError && nCol < aLine.getLength() && strchr("0123456789.DEde", aLine[nCol]))
325         {
326             // from 4.1.1996: buffer full? -> go on scanning empty
327             if( (p-buf) == (BUF_SIZE-1) )
328             {
329                 bBufOverflow = true;
330                 ++nLineIdx;
331                 ++nCol;
332                 continue;
333             }
334             // point or exponent?
335             if(aLine[nCol] == '.')
336             {
337                 if( ++dec > 1 )
338                     bScanError = true;
339                 else
340                     *p++ = '.';
341             }
342             else if(strchr("DdEe", aLine[nCol]))
343             {
344                 if (++exp > 1)
345                     bScanError = true;
346                 else
347                 {
348                     *p++ = 'E';
349                     if (nCol + 1 < aLine.getLength() && (aLine[nCol+1] == '+' || aLine[nCol+1] == '-'))
350                     {
351                         ++nLineIdx;
352                         ++nCol;
353                         if( (p-buf) == (BUF_SIZE-1) )
354                         {
355                             bBufOverflow = true;
356                             continue;
357                         }
358                         *p++ = aLine[nCol];
359                     }
360                 }
361             }
362             else
363             {
364                 *p++ = aLine[nCol];
365             }
366             ++nLineIdx;
367             ++nCol;
368         }
369         *p = 0;
370         aSym = p; bNumber = true;
371 
372         // For bad characters, scan and parse errors generate only one error.
373         ErrCode nError = ERRCODE_NONE;
374         if (bScanError)
375         {
376             --nLineIdx;
377             --nCol;
378             aError = OUString( aLine[nCol]);
379             nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
380         }
381 
382         rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
383         const sal_Unicode* pParseEnd = buf;
384         nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', &eStatus, &pParseEnd );
385         if (pParseEnd != buf+(p-buf))
386         {
387             // e.g. "12e" or "12e+", or with bScanError "12d"+"E".
388             sal_Int32 nChars = buf+(p-buf) - pParseEnd;
389             nLineIdx -= nChars;
390             nCol -= nChars;
391             // For bScanError, nLineIdx and nCol were already decremented, just
392             // add that character to the parse end.
393             if (bScanError)
394                 ++nChars;
395             // Copy error position from original string, not the buffer
396             // replacement where "12dE" => "12EE".
397             aError = aLine.copy( nCol, nChars);
398             nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
399         }
400         else if (eStatus != rtl_math_ConversionStatus_Ok)
401         {
402             // Keep the scan error and character at position, if any.
403             if (!nError)
404                 nError = ERRCODE_BASIC_MATH_OVERFLOW;
405         }
406 
407         if (nError)
408             GenError( nError );
409 
410         if( !dec && !exp )
411         {
412             if( nVal >= SbxMININT && nVal <= SbxMAXINT )
413                 eScanType = SbxINTEGER;
414             else if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG )
415                     eScanType = SbxLONG;
416         }
417 
418         if( bBufOverflow )
419             GenError( ERRCODE_BASIC_MATH_OVERFLOW );
420 
421         // type recognition?
422         if( nCol < aLine.getLength() )
423         {
424             SbxDataType t(GetSuffixType(aLine[nCol]));
425             if( t != SbxVARIANT )
426             {
427                 eScanType = t;
428                 ++nLineIdx;
429                 ++nCol;
430             }
431             // tdf#130476 - don't allow String trailing data type character with numbers
432             if ( t == SbxSTRING )
433             {
434                 GenError( ERRCODE_BASIC_SYNTAX );
435             }
436        }
437     }
438 
439     // Hex/octal number? Read in and convert:
440     else if(aLine.getLength() - nCol > 1 && aLine[nCol] == '&')
441     {
442         ++nLineIdx; ++nCol;
443         sal_Unicode base = 16;
444         sal_Unicode xch  = aLine[nCol];
445         ++nLineIdx; ++nCol;
446         switch( rtl::toAsciiUpperCase( xch ) )
447         {
448             case 'O':
449                 base = 8;
450                 break;
451             case 'H':
452                 break;
453             default :
454                 // treated as an operator
455                 --nLineIdx; --nCol; nCol1 = nCol-1;
456                 aSym = "&";
457                 return true;
458         }
459         bNumber = true;
460         // Hex literals are signed Integers ( as defined by basic
461         // e.g. -2,147,483,648 through 2,147,483,647 (signed)
462         sal_uInt64 lu = 0;
463         bool bOverflow = false;
464         while(nCol < aLine.getLength() && BasicCharClass::isAlphaNumeric(aLine[nCol], false))
465         {
466             sal_Unicode ch = rtl::toAsciiUpperCase(aLine[nCol]);
467             ++nLineIdx; ++nCol;
468             if( ((base == 16 ) && rtl::isAsciiHexDigit( ch ) ) ||
469                      ((base == 8) && rtl::isAsciiOctalDigit( ch )))
470             {
471                 int i = ch  - '0';
472                 if( i > 9 ) i -= 7;
473                 lu = ( lu * base ) + i;
474                 if( lu > SAL_MAX_UINT32 )
475                 {
476                     bOverflow = true;
477                 }
478             }
479             else
480             {
481                 aError = OUString(ch);
482                 GenError( ERRCODE_BASIC_BAD_CHAR_IN_NUMBER );
483             }
484         }
485 
486         // tdf#130476 - take into account trailing data type characters
487         if( nCol < aLine.getLength() )
488         {
489             SbxDataType t(GetSuffixType(aLine[nCol]));
490             if( t != SbxVARIANT )
491             {
492                 eScanType = t;
493                 ++nLineIdx;
494                 ++nCol;
495             }
496             // tdf#130476 - don't allow String trailing data type character with numbers
497             if ( t == SbxSTRING )
498             {
499                 GenError( ERRCODE_BASIC_SYNTAX );
500             }
501         }
502 
503         // tdf#130476 - take into account trailing data type characters
504         switch ( eScanType )
505         {
506             case SbxINTEGER:
507                 nVal = static_cast<double>( static_cast<sal_Int16>(lu) );
508                 if ( lu > SbxMAXUINT )
509                 {
510                     bOverflow = true;
511                 }
512                 break;
513             case SbxLONG: nVal = static_cast<double>( static_cast<sal_Int32>(lu) ); break;
514             case SbxVARIANT:
515             {
516                 // tdf#62326 - If the value of the hex string without explicit type character lies within
517                 // the range of 0x8000 (SbxMAXINT + 1) and 0xFFFF (SbxMAXUINT) inclusive, cast the value
518                 // to 16 bit in order to get signed integers, e.g., SbxMININT through SbxMAXINT
519                 sal_Int32 ls = (lu > SbxMAXINT && lu <= SbxMAXUINT) ? static_cast<sal_Int16>(lu) : static_cast<sal_Int32>(lu);
520                 eScanType = ( ls >= SbxMININT && ls <= SbxMAXINT ) ? SbxINTEGER : SbxLONG;
521                 nVal = static_cast<double>(ls);
522                 break;
523             }
524             default:
525                 nVal = static_cast<double>(lu);
526                 break;
527         }
528         if( bOverflow )
529             GenError( ERRCODE_BASIC_MATH_OVERFLOW );
530     }
531 
532     // Strings:
533     else if (nLineIdx < aLine.getLength() && (aLine[nLineIdx] == '"' || aLine[nLineIdx] == '['))
534     {
535         sal_Unicode cSep = aLine[nLineIdx];
536         if( cSep == '[' )
537         {
538             bSymbol = true;
539             cSep = ']';
540         }
541         sal_Int32 n = nCol + 1;
542         while (nLineIdx < aLine.getLength())
543         {
544             do
545             {
546                 nLineIdx++;
547                 nCol++;
548             }
549             while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != cSep));
550             if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == cSep)
551             {
552                 nLineIdx++; nCol++;
553                 if (nLineIdx >= aLine.getLength() || aLine[nLineIdx] != cSep || cSep == ']')
554                 {
555                     // If VBA Interop then doesn't eat the [] chars
556                     if ( cSep == ']' && bVBASupportOn )
557                         aSym = aLine.copy( n - 1, nCol - n  + 1);
558                     else
559                         aSym = aLine.copy( n, nCol - n - 1 );
560                     // get out duplicate string delimiters
561                     OUStringBuffer aSymBuf(aSym.getLength());
562                     for ( sal_Int32 i = 0, len = aSym.getLength(); i < len; ++i )
563                     {
564                         aSymBuf.append( aSym[i] );
565                         if ( aSym[i] == cSep && ( i+1 < len ) && aSym[i+1] == cSep )
566                             ++i;
567                     }
568                     aSym = aSymBuf.makeStringAndClear();
569                     if( cSep != ']' )
570                         eScanType = SbxSTRING;
571                     break;
572                 }
573             }
574             else
575             {
576                 aError = OUString(cSep);
577                 GenError( ERRCODE_BASIC_EXPECTED );
578             }
579         }
580     }
581 
582     // Date:
583     else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
584     {
585         sal_Int32 n = nCol + 1;
586         do
587         {
588             nLineIdx++;
589             nCol++;
590         }
591         while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != '#'));
592         if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
593         {
594             nLineIdx++; nCol++;
595             aSym = aLine.copy( n, nCol - n - 1 );
596 
597             // parse date literal
598             std::shared_ptr<SvNumberFormatter> pFormatter;
599             if (GetSbData()->pInst)
600             {
601                 pFormatter = GetSbData()->pInst->GetNumberFormatter();
602             }
603             else
604             {
605                 sal_uInt32 nDummy;
606                 pFormatter = SbiInstance::PrepareNumberFormatter( nDummy, nDummy, nDummy );
607             }
608             sal_uInt32 nIndex = pFormatter->GetStandardIndex( LANGUAGE_ENGLISH_US);
609             bool bSuccess = pFormatter->IsNumberFormat(aSym, nIndex, nVal);
610             if( bSuccess )
611             {
612                 SvNumFormatType nType_ = pFormatter->GetType(nIndex);
613                 if( !(nType_ & SvNumFormatType::DATE) )
614                     bSuccess = false;
615             }
616 
617             if (!bSuccess)
618                 GenError( ERRCODE_BASIC_CONVERSION );
619 
620             bNumber = true;
621             eScanType = SbxDOUBLE;
622         }
623         else
624         {
625             aError = OUString('#');
626             GenError( ERRCODE_BASIC_EXPECTED );
627         }
628     }
629     // invalid characters:
630     else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] >= 0x7F)
631     {
632         GenError( ERRCODE_BASIC_SYNTAX ); nLineIdx++; nCol++;
633     }
634     // other groups:
635     else
636     {
637         sal_Int32 n = 1;
638         auto nChar = nLineIdx < aLine.getLength() ? aLine[nLineIdx] : 0;
639         ++nLineIdx;
640         if (nLineIdx < aLine.getLength())
641         {
642             switch (nChar)
643             {
644                 case '<': if( aLine[nLineIdx] == '>' || aLine[nLineIdx] == '=' ) n = 2; break;
645                 case '>': if( aLine[nLineIdx] == '=' ) n = 2; break;
646                 case ':': if( aLine[nLineIdx] == '=' ) n = 2; break;
647             }
648         }
649         aSym = aLine.copy(nCol, std::min(n, aLine.getLength() - nCol));
650         nLineIdx += n-1; nCol = nCol + n;
651     }
652 
653     nCol2 = nCol-1;
654 
655 PrevLineCommentLbl:
656 
657     if( bPrevLineExtentsComment || (eScanType != SbxSTRING &&
658                                     ( bCompilerDirective ||
659                                       aSym.startsWith("'") ||
660                                       aSym.equalsIgnoreAsciiCase( "REM" ) ) ) )
661     {
662         bPrevLineExtentsComment = false;
663         aSym = "REM";
664         sal_Int32 nLen = aLine.getLength() - nLineIdx;
665         if( bCompatible && aLine[nLineIdx + nLen - 1] == '_' && aLine[nLineIdx + nLen - 2] == ' ' )
666             bPrevLineExtentsComment = true;
667         nCol2 = nCol2 + nLen;
668         nLineIdx = -1;
669     }
670 
671     if (nLineIdx == nLineIdxScanStart)
672     {
673         GenError( ERRCODE_BASIC_SYMBOL_EXPECTED );
674         return false;
675     }
676 
677     return true;
678 
679 
680 eoln:
681     if (nCol && aLine[--nLineIdx] == '_' && !bClosingUnderscore)
682     {
683         nLineIdx = -1;
684         bool bRes = NextSym();
685         if( aSym.startsWith(".") )
686         {
687             // object _
688             //    .Method
689             // ^^^  <- spaces is legal in MSO VBA
690             bSpaces = false;
691         }
692         return bRes;
693     }
694     else
695     {
696         nLineIdx = -1;
697         nLine = nOldLine;
698         nCol1 = nOldCol1;
699         nCol2 = nOldCol2;
700         aSym = "\n";
701         nColLock = 0;
702         bClosingUnderscore = false;
703         return true;
704     }
705 }
706 
707 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
708