1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <basiccharclass.hxx>
21 #include <scanner.hxx>
22 #include <sbintern.hxx>
23 #include <runtime.hxx>
24
25 #include <basic/sberrors.hxx>
26 #include <i18nlangtag/lang.h>
27 #include <svl/zforlist.hxx>
28 #include <rtl/character.hxx>
29
SbiScanner(const OUString & rBuf,StarBASIC * p)30 SbiScanner::SbiScanner(const OUString& rBuf, StarBASIC* p)
31 : aBuf(rBuf)
32 , nLineIdx(-1)
33 , nSaveLineIdx(-1)
34 , pBasic(p)
35 , eScanType(SbxVARIANT)
36 , nVal(0)
37 , nSavedCol1(0)
38 , nCol(0)
39 , nErrors(0)
40 , nColLock(0)
41 , nBufPos(0)
42 , nLine(0)
43 , nCol1(0)
44 , nCol2(0)
45 , bSymbol(false)
46 , bNumber(false)
47 , bSpaces(false)
48 , bAbort(false)
49 , bHash(true)
50 , bError(false)
51 , bCompatible(false)
52 , bVBASupportOn(false)
53 , bPrevLineExtentsComment(false)
54 , bClosingUnderscore(false)
55 , bInStatement(false)
56 {
57 }
58
LockColumn()59 void SbiScanner::LockColumn()
60 {
61 if( !nColLock++ )
62 nSavedCol1 = nCol1;
63 }
64
UnlockColumn()65 void SbiScanner::UnlockColumn()
66 {
67 if( nColLock )
68 nColLock--;
69 }
70
GenError(ErrCode code)71 void SbiScanner::GenError( ErrCode code )
72 {
73 if( GetSbData()->bBlockCompilerError )
74 {
75 bAbort = true;
76 return;
77 }
78 if( !bError )
79 {
80 bool bRes = true;
81 // report only one error per statement
82 bError = true;
83 if( pBasic )
84 {
85 // in case of EXPECTED or UNEXPECTED it always refers
86 // to the last token, so take the Col1 over
87 sal_Int32 nc = nColLock ? nSavedCol1 : nCol1;
88 if ( code.anyOf(
89 ERRCODE_BASIC_EXPECTED,
90 ERRCODE_BASIC_UNEXPECTED,
91 ERRCODE_BASIC_SYMBOL_EXPECTED,
92 ERRCODE_BASIC_LABEL_EXPECTED) )
93 {
94 nc = nCol1;
95 if( nc > nCol2 ) nCol2 = nc;
96 }
97 bRes = pBasic->CError( code, aError, nLine, nc, nCol2 );
98 }
99 bAbort = bAbort || !bRes || ( code == ERRCODE_BASIC_NO_MEMORY || code == ERRCODE_BASIC_PROG_TOO_LARGE );
100 }
101 nErrors++;
102 }
103
104
105 // used by SbiTokenizer::MayBeLabel() to detect a label
DoesColonFollow()106 bool SbiScanner::DoesColonFollow()
107 {
108 if(nCol < aLine.getLength() && aLine[nCol] == ':')
109 {
110 ++nLineIdx; ++nCol;
111 return true;
112 }
113 else
114 return false;
115 }
116
117 // test for legal suffix
GetSuffixType(sal_Unicode c)118 static SbxDataType GetSuffixType( sal_Unicode c )
119 {
120 switch (c)
121 {
122 case '%':
123 return SbxINTEGER;
124 case '&':
125 return SbxLONG;
126 case '!':
127 return SbxSINGLE;
128 case '#':
129 return SbxDOUBLE;
130 case '@':
131 return SbxCURRENCY;
132 case '$':
133 return SbxSTRING;
134 default:
135 return SbxVARIANT;
136 }
137 }
138
139 // reading the next symbol into the variables aSym, nVal and eType
140 // return value is sal_False at EOF or errors
141 #define BUF_SIZE 80
142
scanAlphanumeric()143 void SbiScanner::scanAlphanumeric()
144 {
145 sal_Int32 n = nCol;
146 while(nCol < aLine.getLength() && (BasicCharClass::isAlphaNumeric(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
147 {
148 ++nLineIdx;
149 ++nCol;
150 }
151 aSym = aLine.copy(n, nCol - n);
152 }
153
scanGoto()154 void SbiScanner::scanGoto()
155 {
156 sal_Int32 n = nCol;
157 while(n < aLine.getLength() && BasicCharClass::isWhitespace(aLine[n]))
158 ++n;
159
160 if(n + 1 < aLine.getLength())
161 {
162 OUString aTemp = aLine.copy(n, 2);
163 if(aTemp.equalsIgnoreAsciiCase("to"))
164 {
165 aSym = "goto";
166 nLineIdx += n + 2 - nCol;
167 nCol = n + 2;
168 }
169 }
170 }
171
readLine()172 bool SbiScanner::readLine()
173 {
174 if(nBufPos >= aBuf.getLength())
175 return false;
176
177 sal_Int32 n = nBufPos;
178 sal_Int32 nLen = aBuf.getLength();
179
180 while(n < nLen && aBuf[n] != '\r' && aBuf[n] != '\n')
181 ++n;
182
183 // Trim trailing whitespace
184 sal_Int32 nEnd = n;
185 while(nBufPos < nEnd && BasicCharClass::isWhitespace(aBuf[nEnd - 1]))
186 --nEnd;
187
188 aLine = aBuf.copy(nBufPos, nEnd - nBufPos);
189
190 // Fast-forward past the line ending
191 if(n + 1 < nLen && aBuf[n] == '\r' && aBuf[n + 1] == '\n')
192 n += 2;
193 else if(n < nLen)
194 ++n;
195
196 nBufPos = n;
197 nLineIdx = 0;
198
199 ++nLine;
200 nCol = nCol1 = nCol2 = 0;
201 nColLock = 0;
202
203 return true;
204 }
205
NextSym()206 bool SbiScanner::NextSym()
207 {
208 // memorize for the EOLN-case
209 sal_Int32 nOldLine = nLine;
210 sal_Int32 nOldCol1 = nCol1;
211 sal_Int32 nOldCol2 = nCol2;
212 sal_Unicode buf[ BUF_SIZE ], *p = buf;
213
214 eScanType = SbxVARIANT;
215 aSym.clear();
216 bHash = bSymbol = bNumber = bSpaces = false;
217 bool bCompilerDirective = false;
218
219 // read in line?
220 if (nLineIdx == -1)
221 {
222 if(!readLine())
223 return false;
224
225 nOldLine = nLine;
226 nOldCol1 = nOldCol2 = 0;
227 }
228
229 const sal_Int32 nLineIdxScanStart = nLineIdx;
230
231 if(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
232 {
233 bSpaces = true;
234 while(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
235 {
236 ++nLineIdx;
237 ++nCol;
238 }
239 }
240
241 nCol1 = nCol;
242
243 // only blank line?
244 if(nCol >= aLine.getLength())
245 goto eoln;
246
247 if( bPrevLineExtentsComment )
248 goto PrevLineCommentLbl;
249
250 if(nCol < aLine.getLength() && aLine[nCol] == '#')
251 {
252 sal_Int32 nLineTempIdx = nLineIdx;
253 do
254 {
255 nLineTempIdx++;
256 } while (nLineTempIdx < aLine.getLength() && !BasicCharClass::isWhitespace(aLine[nLineTempIdx])
257 && aLine[nLineTempIdx] != '#' && aLine[nLineTempIdx] != ',');
258 // leave it if it is a date literal - it will be handled later
259 if (nLineTempIdx >= aLine.getLength() || aLine[nLineTempIdx] != '#')
260 {
261 ++nLineIdx;
262 ++nCol;
263 //ignore compiler directives (# is first non-space character)
264 if (nOldCol2 == 0)
265 bCompilerDirective = true;
266 else
267 bHash = true;
268 }
269 }
270
271 // copy character if symbol
272 if(nCol < aLine.getLength() && (BasicCharClass::isAlpha(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
273 {
274 // if there's nothing behind '_' , it's the end of a line!
275 if(nCol + 1 == aLine.getLength() && aLine[nCol] == '_')
276 {
277 // Note that nCol is not incremented here...
278 ++nLineIdx;
279 goto eoln;
280 }
281
282 bSymbol = true;
283
284 scanAlphanumeric();
285
286 // Special handling for "go to"
287 if(nCol < aLine.getLength() && bCompatible && aSym.equalsIgnoreAsciiCase("go"))
288 scanGoto();
289
290 // tdf#125637 - check for closing underscore
291 if (nCol == aLine.getLength() && aLine[nCol - 1] == '_')
292 {
293 bClosingUnderscore = true;
294 }
295 // type recognition?
296 // don't test the exclamation mark
297 // if there's a symbol behind it
298 else if((nCol >= aLine.getLength() || aLine[nCol] != '!') ||
299 (nCol + 1 >= aLine.getLength() || !BasicCharClass::isAlpha(aLine[nCol + 1], bCompatible)))
300 {
301 if(nCol < aLine.getLength())
302 {
303 SbxDataType t(GetSuffixType(aLine[nCol]));
304 if( t != SbxVARIANT )
305 {
306 eScanType = t;
307 ++nLineIdx;
308 ++nCol;
309 }
310 }
311 }
312 }
313
314 // read in and convert if number
315 else if((nCol < aLine.getLength() && rtl::isAsciiDigit(aLine[nCol])) ||
316 (nCol + 1 < aLine.getLength() && aLine[nCol] == '.' && rtl::isAsciiDigit(aLine[nCol + 1])))
317 {
318 short exp = 0;
319 short dec = 0;
320 eScanType = SbxDOUBLE;
321 bool bScanError = false;
322 bool bBufOverflow = false;
323 // All this because of 'D' or 'd' floating point type, sigh...
324 while(!bScanError && nCol < aLine.getLength() && strchr("0123456789.DEde", aLine[nCol]))
325 {
326 // from 4.1.1996: buffer full? -> go on scanning empty
327 if( (p-buf) == (BUF_SIZE-1) )
328 {
329 bBufOverflow = true;
330 ++nLineIdx;
331 ++nCol;
332 continue;
333 }
334 // point or exponent?
335 if(aLine[nCol] == '.')
336 {
337 if( ++dec > 1 )
338 bScanError = true;
339 else
340 *p++ = '.';
341 }
342 else if(strchr("DdEe", aLine[nCol]))
343 {
344 if (++exp > 1)
345 bScanError = true;
346 else
347 {
348 *p++ = 'E';
349 if (nCol + 1 < aLine.getLength() && (aLine[nCol+1] == '+' || aLine[nCol+1] == '-'))
350 {
351 ++nLineIdx;
352 ++nCol;
353 if( (p-buf) == (BUF_SIZE-1) )
354 {
355 bBufOverflow = true;
356 continue;
357 }
358 *p++ = aLine[nCol];
359 }
360 }
361 }
362 else
363 {
364 *p++ = aLine[nCol];
365 }
366 ++nLineIdx;
367 ++nCol;
368 }
369 *p = 0;
370 aSym = p; bNumber = true;
371
372 // For bad characters, scan and parse errors generate only one error.
373 ErrCode nError = ERRCODE_NONE;
374 if (bScanError)
375 {
376 --nLineIdx;
377 --nCol;
378 aError = OUString( aLine[nCol]);
379 nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
380 }
381
382 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
383 const sal_Unicode* pParseEnd = buf;
384 nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', &eStatus, &pParseEnd );
385 if (pParseEnd != buf+(p-buf))
386 {
387 // e.g. "12e" or "12e+", or with bScanError "12d"+"E".
388 sal_Int32 nChars = buf+(p-buf) - pParseEnd;
389 nLineIdx -= nChars;
390 nCol -= nChars;
391 // For bScanError, nLineIdx and nCol were already decremented, just
392 // add that character to the parse end.
393 if (bScanError)
394 ++nChars;
395 // Copy error position from original string, not the buffer
396 // replacement where "12dE" => "12EE".
397 aError = aLine.copy( nCol, nChars);
398 nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
399 }
400 else if (eStatus != rtl_math_ConversionStatus_Ok)
401 {
402 // Keep the scan error and character at position, if any.
403 if (!nError)
404 nError = ERRCODE_BASIC_MATH_OVERFLOW;
405 }
406
407 if (nError)
408 GenError( nError );
409
410 if( !dec && !exp )
411 {
412 if( nVal >= SbxMININT && nVal <= SbxMAXINT )
413 eScanType = SbxINTEGER;
414 else if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG )
415 eScanType = SbxLONG;
416 }
417
418 if( bBufOverflow )
419 GenError( ERRCODE_BASIC_MATH_OVERFLOW );
420
421 // type recognition?
422 if( nCol < aLine.getLength() )
423 {
424 SbxDataType t(GetSuffixType(aLine[nCol]));
425 if( t != SbxVARIANT )
426 {
427 eScanType = t;
428 ++nLineIdx;
429 ++nCol;
430 }
431 // tdf#130476 - don't allow String trailing data type character with numbers
432 if ( t == SbxSTRING )
433 {
434 GenError( ERRCODE_BASIC_SYNTAX );
435 }
436 }
437 }
438
439 // Hex/octal number? Read in and convert:
440 else if(aLine.getLength() - nCol > 1 && aLine[nCol] == '&')
441 {
442 ++nLineIdx; ++nCol;
443 sal_Unicode base = 16;
444 sal_Unicode xch = aLine[nCol];
445 ++nLineIdx; ++nCol;
446 switch( rtl::toAsciiUpperCase( xch ) )
447 {
448 case 'O':
449 base = 8;
450 break;
451 case 'H':
452 break;
453 default :
454 // treated as an operator
455 --nLineIdx; --nCol; nCol1 = nCol-1;
456 aSym = "&";
457 return true;
458 }
459 bNumber = true;
460 // Hex literals are signed Integers ( as defined by basic
461 // e.g. -2,147,483,648 through 2,147,483,647 (signed)
462 sal_uInt64 lu = 0;
463 bool bOverflow = false;
464 while(nCol < aLine.getLength() && BasicCharClass::isAlphaNumeric(aLine[nCol], false))
465 {
466 sal_Unicode ch = rtl::toAsciiUpperCase(aLine[nCol]);
467 ++nLineIdx; ++nCol;
468 if( ((base == 16 ) && rtl::isAsciiHexDigit( ch ) ) ||
469 ((base == 8) && rtl::isAsciiOctalDigit( ch )))
470 {
471 int i = ch - '0';
472 if( i > 9 ) i -= 7;
473 lu = ( lu * base ) + i;
474 if( lu > SAL_MAX_UINT32 )
475 {
476 bOverflow = true;
477 }
478 }
479 else
480 {
481 aError = OUString(ch);
482 GenError( ERRCODE_BASIC_BAD_CHAR_IN_NUMBER );
483 }
484 }
485
486 // tdf#130476 - take into account trailing data type characters
487 if( nCol < aLine.getLength() )
488 {
489 SbxDataType t(GetSuffixType(aLine[nCol]));
490 if( t != SbxVARIANT )
491 {
492 eScanType = t;
493 ++nLineIdx;
494 ++nCol;
495 }
496 // tdf#130476 - don't allow String trailing data type character with numbers
497 if ( t == SbxSTRING )
498 {
499 GenError( ERRCODE_BASIC_SYNTAX );
500 }
501 }
502
503 // tdf#130476 - take into account trailing data type characters
504 switch ( eScanType )
505 {
506 case SbxINTEGER:
507 nVal = static_cast<double>( static_cast<sal_Int16>(lu) );
508 if ( lu > SbxMAXUINT )
509 {
510 bOverflow = true;
511 }
512 break;
513 case SbxLONG: nVal = static_cast<double>( static_cast<sal_Int32>(lu) ); break;
514 case SbxVARIANT:
515 {
516 // tdf#62326 - If the value of the hex string without explicit type character lies within
517 // the range of 0x8000 (SbxMAXINT + 1) and 0xFFFF (SbxMAXUINT) inclusive, cast the value
518 // to 16 bit in order to get signed integers, e.g., SbxMININT through SbxMAXINT
519 sal_Int32 ls = (lu > SbxMAXINT && lu <= SbxMAXUINT) ? static_cast<sal_Int16>(lu) : static_cast<sal_Int32>(lu);
520 eScanType = ( ls >= SbxMININT && ls <= SbxMAXINT ) ? SbxINTEGER : SbxLONG;
521 nVal = static_cast<double>(ls);
522 break;
523 }
524 default:
525 nVal = static_cast<double>(lu);
526 break;
527 }
528 if( bOverflow )
529 GenError( ERRCODE_BASIC_MATH_OVERFLOW );
530 }
531
532 // Strings:
533 else if (nLineIdx < aLine.getLength() && (aLine[nLineIdx] == '"' || aLine[nLineIdx] == '['))
534 {
535 sal_Unicode cSep = aLine[nLineIdx];
536 if( cSep == '[' )
537 {
538 bSymbol = true;
539 cSep = ']';
540 }
541 sal_Int32 n = nCol + 1;
542 while (nLineIdx < aLine.getLength())
543 {
544 do
545 {
546 nLineIdx++;
547 nCol++;
548 }
549 while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != cSep));
550 if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == cSep)
551 {
552 nLineIdx++; nCol++;
553 if (nLineIdx >= aLine.getLength() || aLine[nLineIdx] != cSep || cSep == ']')
554 {
555 // If VBA Interop then doesn't eat the [] chars
556 if ( cSep == ']' && bVBASupportOn )
557 aSym = aLine.copy( n - 1, nCol - n + 1);
558 else
559 aSym = aLine.copy( n, nCol - n - 1 );
560 // get out duplicate string delimiters
561 OUStringBuffer aSymBuf(aSym.getLength());
562 for ( sal_Int32 i = 0, len = aSym.getLength(); i < len; ++i )
563 {
564 aSymBuf.append( aSym[i] );
565 if ( aSym[i] == cSep && ( i+1 < len ) && aSym[i+1] == cSep )
566 ++i;
567 }
568 aSym = aSymBuf.makeStringAndClear();
569 if( cSep != ']' )
570 eScanType = SbxSTRING;
571 break;
572 }
573 }
574 else
575 {
576 aError = OUString(cSep);
577 GenError( ERRCODE_BASIC_EXPECTED );
578 }
579 }
580 }
581
582 // Date:
583 else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
584 {
585 sal_Int32 n = nCol + 1;
586 do
587 {
588 nLineIdx++;
589 nCol++;
590 }
591 while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != '#'));
592 if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
593 {
594 nLineIdx++; nCol++;
595 aSym = aLine.copy( n, nCol - n - 1 );
596
597 // parse date literal
598 std::shared_ptr<SvNumberFormatter> pFormatter;
599 if (GetSbData()->pInst)
600 {
601 pFormatter = GetSbData()->pInst->GetNumberFormatter();
602 }
603 else
604 {
605 sal_uInt32 nDummy;
606 pFormatter = SbiInstance::PrepareNumberFormatter( nDummy, nDummy, nDummy );
607 }
608 sal_uInt32 nIndex = pFormatter->GetStandardIndex( LANGUAGE_ENGLISH_US);
609 bool bSuccess = pFormatter->IsNumberFormat(aSym, nIndex, nVal);
610 if( bSuccess )
611 {
612 SvNumFormatType nType_ = pFormatter->GetType(nIndex);
613 if( !(nType_ & SvNumFormatType::DATE) )
614 bSuccess = false;
615 }
616
617 if (!bSuccess)
618 GenError( ERRCODE_BASIC_CONVERSION );
619
620 bNumber = true;
621 eScanType = SbxDOUBLE;
622 }
623 else
624 {
625 aError = OUString('#');
626 GenError( ERRCODE_BASIC_EXPECTED );
627 }
628 }
629 // invalid characters:
630 else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] >= 0x7F)
631 {
632 GenError( ERRCODE_BASIC_SYNTAX ); nLineIdx++; nCol++;
633 }
634 // other groups:
635 else
636 {
637 sal_Int32 n = 1;
638 auto nChar = nLineIdx < aLine.getLength() ? aLine[nLineIdx] : 0;
639 ++nLineIdx;
640 if (nLineIdx < aLine.getLength())
641 {
642 switch (nChar)
643 {
644 case '<': if( aLine[nLineIdx] == '>' || aLine[nLineIdx] == '=' ) n = 2; break;
645 case '>': if( aLine[nLineIdx] == '=' ) n = 2; break;
646 case ':': if( aLine[nLineIdx] == '=' ) n = 2; break;
647 }
648 }
649 aSym = aLine.copy(nCol, std::min(n, aLine.getLength() - nCol));
650 nLineIdx += n-1; nCol = nCol + n;
651 }
652
653 nCol2 = nCol-1;
654
655 PrevLineCommentLbl:
656
657 if( bPrevLineExtentsComment || (eScanType != SbxSTRING &&
658 ( bCompilerDirective ||
659 aSym.startsWith("'") ||
660 aSym.equalsIgnoreAsciiCase( "REM" ) ) ) )
661 {
662 bPrevLineExtentsComment = false;
663 aSym = "REM";
664 sal_Int32 nLen = aLine.getLength() - nLineIdx;
665 if( bCompatible && aLine[nLineIdx + nLen - 1] == '_' && aLine[nLineIdx + nLen - 2] == ' ' )
666 bPrevLineExtentsComment = true;
667 nCol2 = nCol2 + nLen;
668 nLineIdx = -1;
669 }
670
671 if (nLineIdx == nLineIdxScanStart)
672 {
673 GenError( ERRCODE_BASIC_SYMBOL_EXPECTED );
674 return false;
675 }
676
677 return true;
678
679
680 eoln:
681 if (nCol && aLine[--nLineIdx] == '_' && !bClosingUnderscore)
682 {
683 nLineIdx = -1;
684 bool bRes = NextSym();
685 if( aSym.startsWith(".") )
686 {
687 // object _
688 // .Method
689 // ^^^ <- spaces is legal in MSO VBA
690 bSpaces = false;
691 }
692 return bRes;
693 }
694 else
695 {
696 nLineIdx = -1;
697 nLine = nOldLine;
698 nCol1 = nOldCol1;
699 nCol2 = nOldCol2;
700 aSym = "\n";
701 nColLock = 0;
702 bClosingUnderscore = false;
703 return true;
704 }
705 }
706
707 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
708