1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <basiccharclass.hxx>
21 #include <scanner.hxx>
22 #include <sbintern.hxx>
23 #include <runtime.hxx>
24
25 #include <basic/sberrors.hxx>
26 #include <i18nlangtag/lang.h>
27 #include <svl/zforlist.hxx>
28 #include <rtl/character.hxx>
29
SbiScanner(const OUString & rBuf,StarBASIC * p)30 SbiScanner::SbiScanner( const OUString& rBuf, StarBASIC* p ) : aBuf( rBuf )
31 {
32 pBasic = p;
33 nLineIdx = -1;
34 nVal = 0;
35 eScanType = SbxVARIANT;
36 nErrors = 0;
37 nBufPos = 0;
38 nSavedCol1 = 0;
39 nColLock = 0;
40 nLine = 0;
41 nCol1 = 0;
42 nCol2 = 0;
43 nCol = 0;
44 bError =
45 bAbort =
46 bSpaces =
47 bNumber =
48 bSymbol =
49 bCompatible =
50 bVBASupportOn =
51 bInStatement =
52 bPrevLineExtentsComment = false;
53 bHash = true;
54 nSaveLineIdx = -1;
55 }
56
LockColumn()57 void SbiScanner::LockColumn()
58 {
59 if( !nColLock++ )
60 nSavedCol1 = nCol1;
61 }
62
UnlockColumn()63 void SbiScanner::UnlockColumn()
64 {
65 if( nColLock )
66 nColLock--;
67 }
68
GenError(ErrCode code)69 void SbiScanner::GenError( ErrCode code )
70 {
71 if( GetSbData()->bBlockCompilerError )
72 {
73 bAbort = true;
74 return;
75 }
76 if( !bError )
77 {
78 bool bRes = true;
79 // report only one error per statement
80 bError = true;
81 if( pBasic )
82 {
83 // in case of EXPECTED or UNEXPECTED it always refers
84 // to the last token, so take the Col1 over
85 sal_Int32 nc = nColLock ? nSavedCol1 : nCol1;
86 if ( code.anyOf(
87 ERRCODE_BASIC_EXPECTED,
88 ERRCODE_BASIC_UNEXPECTED,
89 ERRCODE_BASIC_SYMBOL_EXPECTED,
90 ERRCODE_BASIC_LABEL_EXPECTED) )
91 {
92 nc = nCol1;
93 if( nc > nCol2 ) nCol2 = nc;
94 }
95 bRes = pBasic->CError( code, aError, nLine, nc, nCol2 );
96 }
97 bAbort = bAbort || !bRes || ( code == ERRCODE_BASIC_NO_MEMORY || code == ERRCODE_BASIC_PROG_TOO_LARGE );
98 }
99 nErrors++;
100 }
101
102
103 // used by SbiTokenizer::MayBeLabel() to detect a label
DoesColonFollow()104 bool SbiScanner::DoesColonFollow()
105 {
106 if(nCol < aLine.getLength() && aLine[nCol] == ':')
107 {
108 ++nLineIdx; ++nCol;
109 return true;
110 }
111 else
112 return false;
113 }
114
115 // test for legal suffix
GetSuffixType(sal_Unicode c)116 static SbxDataType GetSuffixType( sal_Unicode c )
117 {
118 switch (c)
119 {
120 case '%':
121 return SbxINTEGER;
122 case '&':
123 return SbxLONG;
124 case '!':
125 return SbxSINGLE;
126 case '#':
127 return SbxDOUBLE;
128 case '@':
129 return SbxCURRENCY;
130 case '$':
131 return SbxSTRING;
132 default:
133 return SbxVARIANT;
134 }
135 }
136
137 // reading the next symbol into the variables aSym, nVal and eType
138 // return value is sal_False at EOF or errors
139 #define BUF_SIZE 80
140
scanAlphanumeric()141 void SbiScanner::scanAlphanumeric()
142 {
143 sal_Int32 n = nCol;
144 while(nCol < aLine.getLength() && (BasicCharClass::isAlphaNumeric(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
145 {
146 ++nLineIdx;
147 ++nCol;
148 }
149 aSym = aLine.copy(n, nCol - n);
150 }
151
scanGoto()152 void SbiScanner::scanGoto()
153 {
154 sal_Int32 n = nCol;
155 while(n < aLine.getLength() && BasicCharClass::isWhitespace(aLine[n]))
156 ++n;
157
158 if(n + 1 < aLine.getLength())
159 {
160 OUString aTemp = aLine.copy(n, 2);
161 if(aTemp.equalsIgnoreAsciiCase("to"))
162 {
163 aSym = "goto";
164 nLineIdx += n + 2 - nCol;
165 nCol = n + 2;
166 }
167 }
168 }
169
readLine()170 bool SbiScanner::readLine()
171 {
172 if(nBufPos >= aBuf.getLength())
173 return false;
174
175 sal_Int32 n = nBufPos;
176 sal_Int32 nLen = aBuf.getLength();
177
178 while(n < nLen && aBuf[n] != '\r' && aBuf[n] != '\n')
179 ++n;
180
181 // Trim trailing whitespace
182 sal_Int32 nEnd = n;
183 while(nBufPos < nEnd && BasicCharClass::isWhitespace(aBuf[nEnd - 1]))
184 --nEnd;
185
186 aLine = aBuf.copy(nBufPos, nEnd - nBufPos);
187
188 // Fast-forward past the line ending
189 if(n + 1 < nLen && aBuf[n] == '\r' && aBuf[n + 1] == '\n')
190 n += 2;
191 else if(n < nLen)
192 ++n;
193
194 nBufPos = n;
195 nLineIdx = 0;
196
197 ++nLine;
198 nCol = nCol1 = nCol2 = 0;
199 nColLock = 0;
200
201 return true;
202 }
203
NextSym()204 bool SbiScanner::NextSym()
205 {
206 // memorize for the EOLN-case
207 sal_Int32 nOldLine = nLine;
208 sal_Int32 nOldCol1 = nCol1;
209 sal_Int32 nOldCol2 = nCol2;
210 sal_Unicode buf[ BUF_SIZE ], *p = buf;
211
212 eScanType = SbxVARIANT;
213 aSym.clear();
214 bHash = bSymbol = bNumber = bSpaces = false;
215 bool bCompilerDirective = false;
216
217 // read in line?
218 if (nLineIdx == -1)
219 {
220 if(!readLine())
221 return false;
222
223 nOldLine = nLine;
224 nOldCol1 = nOldCol2 = 0;
225 }
226
227 const sal_Int32 nLineIdxScanStart = nLineIdx;
228
229 if(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
230 {
231 bSpaces = true;
232 while(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
233 {
234 ++nLineIdx;
235 ++nCol;
236 }
237 }
238
239 nCol1 = nCol;
240
241 // only blank line?
242 if(nCol >= aLine.getLength())
243 goto eoln;
244
245 if( bPrevLineExtentsComment )
246 goto PrevLineCommentLbl;
247
248 if(nCol < aLine.getLength() && aLine[nCol] == '#')
249 {
250 sal_Int32 nLineTempIdx = nLineIdx;
251 do
252 {
253 nLineTempIdx++;
254 } while (nLineTempIdx < aLine.getLength() && !BasicCharClass::isWhitespace(aLine[nLineTempIdx])
255 && aLine[nLineTempIdx] != '#' && aLine[nLineTempIdx] != ',');
256 // leave it if it is a date literal - it will be handled later
257 if (nLineTempIdx >= aLine.getLength() || aLine[nLineTempIdx] != '#')
258 {
259 ++nLineIdx;
260 ++nCol;
261 //ignore compiler directives (# is first non-space character)
262 if (nOldCol2 == 0)
263 bCompilerDirective = true;
264 else
265 bHash = true;
266 }
267 }
268
269 // copy character if symbol
270 if(nCol < aLine.getLength() && (BasicCharClass::isAlpha(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
271 {
272 // if there's nothing behind '_' , it's the end of a line!
273 if(nCol + 1 == aLine.getLength() && aLine[nCol] == '_')
274 {
275 // Note that nCol is not incremented here...
276 ++nLineIdx;
277 goto eoln;
278 }
279
280 bSymbol = true;
281
282 scanAlphanumeric();
283
284 // Special handling for "go to"
285 if(nCol < aLine.getLength() && bCompatible && aSym.equalsIgnoreAsciiCase("go"))
286 scanGoto();
287
288 // replace closing '_' by space when end of line is following
289 // (wrong line continuation otherwise)
290 if (nCol == aLine.getLength() && aLine[nCol - 1] == '_')
291 {
292 // We are going to modify a potentially shared string, so force
293 // a copy, so that aSym is not modified by the following operation
294 OUString aSymCopy( aSym.getStr(), aSym.getLength() );
295 aSym = aSymCopy;
296
297 // HACK: modifying a potentially shared string here!
298 const_cast<sal_Unicode*>(aLine.getStr())[nLineIdx - 1] = ' ';
299 }
300
301 // type recognition?
302 // don't test the exclamation mark
303 // if there's a symbol behind it
304 else if((nCol >= aLine.getLength() || aLine[nCol] != '!') ||
305 (nCol + 1 >= aLine.getLength() || !BasicCharClass::isAlpha(aLine[nCol + 1], bCompatible)))
306 {
307 if(nCol < aLine.getLength())
308 {
309 SbxDataType t(GetSuffixType(aLine[nCol]));
310 if( t != SbxVARIANT )
311 {
312 eScanType = t;
313 ++nLineIdx;
314 ++nCol;
315 }
316 }
317 }
318 }
319
320 // read in and convert if number
321 else if((nCol < aLine.getLength() && rtl::isAsciiDigit(aLine[nCol])) ||
322 (nCol + 1 < aLine.getLength() && aLine[nCol] == '.' && rtl::isAsciiDigit(aLine[nCol + 1])))
323 {
324 short exp = 0;
325 short dec = 0;
326 eScanType = SbxDOUBLE;
327 bool bScanError = false;
328 bool bBufOverflow = false;
329 // All this because of 'D' or 'd' floating point type, sigh...
330 while(!bScanError && nCol < aLine.getLength() && strchr("0123456789.DEde", aLine[nCol]))
331 {
332 // from 4.1.1996: buffer full? -> go on scanning empty
333 if( (p-buf) == (BUF_SIZE-1) )
334 {
335 bBufOverflow = true;
336 ++nLineIdx;
337 ++nCol;
338 continue;
339 }
340 // point or exponent?
341 if(aLine[nCol] == '.')
342 {
343 if( ++dec > 1 )
344 bScanError = true;
345 else
346 *p++ = '.';
347 }
348 else if(strchr("DdEe", aLine[nCol]))
349 {
350 if (++exp > 1)
351 bScanError = true;
352 else
353 {
354 *p++ = 'E';
355 if (nCol + 1 < aLine.getLength() && (aLine[nCol+1] == '+' || aLine[nCol+1] == '-'))
356 {
357 ++nLineIdx;
358 ++nCol;
359 if( (p-buf) == (BUF_SIZE-1) )
360 {
361 bBufOverflow = true;
362 continue;
363 }
364 *p++ = aLine[nCol];
365 }
366 }
367 }
368 else
369 {
370 *p++ = aLine[nCol];
371 }
372 ++nLineIdx;
373 ++nCol;
374 }
375 *p = 0;
376 aSym = p; bNumber = true;
377
378 // For bad characters, scan and parse errors generate only one error.
379 ErrCode nError = ERRCODE_NONE;
380 if (bScanError)
381 {
382 --nLineIdx;
383 --nCol;
384 aError = OUString( aLine[nCol]);
385 nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
386 }
387
388 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
389 const sal_Unicode* pParseEnd = buf;
390 nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', &eStatus, &pParseEnd );
391 if (pParseEnd != buf+(p-buf))
392 {
393 // e.g. "12e" or "12e+", or with bScanError "12d"+"E".
394 sal_Int32 nChars = buf+(p-buf) - pParseEnd;
395 nLineIdx -= nChars;
396 nCol -= nChars;
397 // For bScanError, nLineIdx and nCol were already decremented, just
398 // add that character to the parse end.
399 if (bScanError)
400 ++nChars;
401 // Copy error position from original string, not the buffer
402 // replacement where "12dE" => "12EE".
403 aError = aLine.copy( nCol, nChars);
404 nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
405 }
406 else if (eStatus != rtl_math_ConversionStatus_Ok)
407 {
408 // Keep the scan error and character at position, if any.
409 if (!nError)
410 nError = ERRCODE_BASIC_MATH_OVERFLOW;
411 }
412
413 if (nError)
414 GenError( nError );
415
416 if( !dec && !exp )
417 {
418 if( nVal >= SbxMININT && nVal <= SbxMAXINT )
419 eScanType = SbxINTEGER;
420 else if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG )
421 eScanType = SbxLONG;
422 }
423
424 if( bBufOverflow )
425 GenError( ERRCODE_BASIC_MATH_OVERFLOW );
426
427 // type recognition?
428 if( nCol < aLine.getLength() )
429 {
430 SbxDataType t(GetSuffixType(aLine[nCol]));
431 if( t != SbxVARIANT )
432 {
433 eScanType = t;
434 ++nLineIdx;
435 ++nCol;
436 }
437 }
438 }
439
440 // Hex/octal number? Read in and convert:
441 else if(aLine.getLength() - nCol > 1 && aLine[nCol] == '&')
442 {
443 ++nLineIdx; ++nCol;
444 sal_Unicode base = 16;
445 sal_Unicode xch = aLine[nCol];
446 ++nLineIdx; ++nCol;
447 switch( rtl::toAsciiUpperCase( xch ) )
448 {
449 case 'O':
450 base = 8;
451 break;
452 case 'H':
453 break;
454 default :
455 // treated as an operator
456 --nLineIdx; --nCol; nCol1 = nCol-1;
457 aSym = "&";
458 return true;
459 }
460 bNumber = true;
461 // Hex literals are signed Integers ( as defined by basic
462 // e.g. -2,147,483,648 through 2,147,483,647 (signed)
463 sal_uInt64 lu = 0;
464 bool bOverflow = false;
465 while(nCol < aLine.getLength() && BasicCharClass::isAlphaNumeric(aLine[nCol], false))
466 {
467 sal_Unicode ch = rtl::toAsciiUpperCase(aLine[nCol]);
468 ++nLineIdx; ++nCol;
469 if( ((base == 16 ) && rtl::isAsciiHexDigit( ch ) ) ||
470 ((base == 8) && rtl::isAsciiOctalDigit( ch )))
471 {
472 int i = ch - '0';
473 if( i > 9 ) i -= 7;
474 lu = ( lu * base ) + i;
475 if( lu > SAL_MAX_UINT32 )
476 {
477 bOverflow = true;
478 }
479 }
480 else
481 {
482 aError = OUString(ch);
483 GenError( ERRCODE_BASIC_BAD_CHAR_IN_NUMBER );
484 }
485 }
486 if(nCol < aLine.getLength() && aLine[nCol] == '&')
487 {
488 ++nLineIdx;
489 ++nCol;
490 }
491 // tdf#62326 - If the value of the hex string lies within the range of 0x8000 (SbxMAXINT + 1)
492 // and 0xFFFF (SbxMAXUINT) inclusive, cast the value to 16 bit in order to get
493 // signed integers, e.g., SbxMININT through SbxMAXINT
494 sal_Int32 ls = (lu > SbxMAXINT && lu <= SbxMAXUINT) ? static_cast<sal_Int16>(lu) : static_cast<sal_Int32>(lu);
495 nVal = static_cast<double>(ls);
496 eScanType = ( ls >= SbxMININT && ls <= SbxMAXINT ) ? SbxINTEGER : SbxLONG;
497 if( bOverflow )
498 GenError( ERRCODE_BASIC_MATH_OVERFLOW );
499 }
500
501 // Strings:
502 else if (nLineIdx < aLine.getLength() && (aLine[nLineIdx] == '"' || aLine[nLineIdx] == '['))
503 {
504 sal_Unicode cSep = aLine[nLineIdx];
505 if( cSep == '[' )
506 {
507 bSymbol = true;
508 cSep = ']';
509 }
510 sal_Int32 n = nCol + 1;
511 while (nLineIdx < aLine.getLength())
512 {
513 do
514 {
515 nLineIdx++;
516 nCol++;
517 }
518 while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != cSep));
519 if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == cSep)
520 {
521 nLineIdx++; nCol++;
522 if (nLineIdx >= aLine.getLength() || aLine[nLineIdx] != cSep || cSep == ']')
523 {
524 // If VBA Interop then doesn't eat the [] chars
525 if ( cSep == ']' && bVBASupportOn )
526 aSym = aLine.copy( n - 1, nCol - n + 1);
527 else
528 aSym = aLine.copy( n, nCol - n - 1 );
529 // get out duplicate string delimiters
530 OUStringBuffer aSymBuf(aSym.getLength());
531 for ( sal_Int32 i = 0, len = aSym.getLength(); i < len; ++i )
532 {
533 aSymBuf.append( aSym[i] );
534 if ( aSym[i] == cSep && ( i+1 < len ) && aSym[i+1] == cSep )
535 ++i;
536 }
537 aSym = aSymBuf.makeStringAndClear();
538 if( cSep != ']' )
539 eScanType = SbxSTRING;
540 break;
541 }
542 }
543 else
544 {
545 aError = OUString(cSep);
546 GenError( ERRCODE_BASIC_EXPECTED );
547 }
548 }
549 }
550
551 // Date:
552 else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
553 {
554 sal_Int32 n = nCol + 1;
555 do
556 {
557 nLineIdx++;
558 nCol++;
559 }
560 while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != '#'));
561 if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
562 {
563 nLineIdx++; nCol++;
564 aSym = aLine.copy( n, nCol - n - 1 );
565
566 // parse date literal
567 std::shared_ptr<SvNumberFormatter> pFormatter;
568 if (GetSbData()->pInst)
569 {
570 pFormatter = GetSbData()->pInst->GetNumberFormatter();
571 }
572 else
573 {
574 sal_uInt32 nDummy;
575 pFormatter = SbiInstance::PrepareNumberFormatter( nDummy, nDummy, nDummy );
576 }
577 sal_uInt32 nIndex = pFormatter->GetStandardIndex( LANGUAGE_ENGLISH_US);
578 bool bSuccess = pFormatter->IsNumberFormat(aSym, nIndex, nVal);
579 if( bSuccess )
580 {
581 SvNumFormatType nType_ = pFormatter->GetType(nIndex);
582 if( !(nType_ & SvNumFormatType::DATE) )
583 bSuccess = false;
584 }
585
586 if (!bSuccess)
587 GenError( ERRCODE_BASIC_CONVERSION );
588
589 bNumber = true;
590 eScanType = SbxDOUBLE;
591 }
592 else
593 {
594 aError = OUString('#');
595 GenError( ERRCODE_BASIC_EXPECTED );
596 }
597 }
598 // invalid characters:
599 else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] >= 0x7F)
600 {
601 GenError( ERRCODE_BASIC_SYNTAX ); nLineIdx++; nCol++;
602 }
603 // other groups:
604 else
605 {
606 sal_Int32 n = 1;
607 auto nChar = nLineIdx < aLine.getLength() ? aLine[nLineIdx] : 0;
608 ++nLineIdx;
609 if (nLineIdx < aLine.getLength())
610 {
611 switch (nChar)
612 {
613 case '<': if( aLine[nLineIdx] == '>' || aLine[nLineIdx] == '=' ) n = 2; break;
614 case '>': if( aLine[nLineIdx] == '=' ) n = 2; break;
615 case ':': if( aLine[nLineIdx] == '=' ) n = 2; break;
616 }
617 }
618 aSym = aLine.copy(nCol, std::min(n, aLine.getLength() - nCol));
619 nLineIdx += n-1; nCol = nCol + n;
620 }
621
622 nCol2 = nCol-1;
623
624 PrevLineCommentLbl:
625
626 if( bPrevLineExtentsComment || (eScanType != SbxSTRING &&
627 ( bCompilerDirective ||
628 aSym.startsWith("'") ||
629 aSym.equalsIgnoreAsciiCase( "REM" ) ) ) )
630 {
631 bPrevLineExtentsComment = false;
632 aSym = "REM";
633 sal_Int32 nLen = aLine.getLength() - nLineIdx;
634 if( bCompatible && aLine[nLineIdx + nLen - 1] == '_' && aLine[nLineIdx + nLen - 2] == ' ' )
635 bPrevLineExtentsComment = true;
636 nCol2 = nCol2 + nLen;
637 nLineIdx = -1;
638 }
639
640 if (nLineIdx == nLineIdxScanStart)
641 {
642 GenError( ERRCODE_BASIC_SYMBOL_EXPECTED );
643 return false;
644 }
645
646 return true;
647
648
649 eoln:
650 if( nCol && aLine[--nLineIdx] == '_' )
651 {
652 nLineIdx = -1;
653 bool bRes = NextSym();
654 if( aSym.startsWith(".") )
655 {
656 // object _
657 // .Method
658 // ^^^ <- spaces is legal in MSO VBA
659 bSpaces = false;
660 }
661 return bRes;
662 }
663 else
664 {
665 nLineIdx = -1;
666 nLine = nOldLine;
667 nCol1 = nOldCol1;
668 nCol2 = nOldCol2;
669 aSym = "\n";
670 nColLock = 0;
671 return true;
672 }
673 }
674
675 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
676