1 /* This file is part of the KDE project
2    Copyright (C) 2008-2010 Marijn Kruisselbrink <mkruisselbrink@kde.org>
3    Copyright (C) 2003,2004 Ariya Hidayat <ariya@kde.org>
4    Copyright (C) 2005 Tomas Mecir <mecirt@gmail.com>
5 
6    This library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public
8    License as published by the Free Software Foundation; only
9    version 2 of the License.
10 
11    This library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15 
16    You should have received a copy of the GNU Library General Public License
17    along with this library; see the file COPYING.LIB.  If not, write to
18    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19    Boston, MA 02110-1301, USA.
20 */
21 
22 #include "Formula.h"
23 
24 #include "CalculationSettings.h"
25 #include "Cell.h"
26 #include "CellStorage.h"
27 #include "Function.h"
28 #include "FunctionRepository.h"
29 #include "Sheet.h"
30 #include "Map.h"
31 #include "NamedAreaManager.h"
32 #include "Region.h"
33 #include "Value.h"
34 #include "Util.h"
35 
36 #include "ValueCalc.h"
37 #include "ValueConverter.h"
38 #include "ValueParser.h"
39 
40 #include <limits.h>
41 
42 #include <QStack>
43 #include <QString>
44 #include <QTextStream>
45 
46 #include <klocale.h>
47 
48 #define CALLIGRA_SHEETS_UNICODE_OPERATORS
49 
50 /*
51   To understand how this formula engine works, please refer to the documentation
52   in file DESIGN.html.
53 
54   Useful references:
55    - "Principles of Compiler Design", A.V.Aho, J.D.Ullman, Addison Wesley, 1978
56    - "Writing Interactive Compilers and Interpreters", P.J. Brown,
57      John Wiley and Sons, 1979.
58    - "The Theory and Practice of Compiler Writing", J.Tremblay, P.G.Sorenson,
59      McGraw-Hill, 1985.
60    - "The Java(TM) Virtual Machine Specification", T.Lindholm, F.Yellin,
61      Addison-Wesley, 1997.
62    - "Java Virtual Machine", J.Meyer, T.Downing, O'Reilly, 1997.
63 
64  */
65 
66 
67 /*
68 TODO - features:
69 - handle Intersection
70 - cell reference is made relative (absolute now)
71 - shared formula (different owner, same data)
72 - relative internal representation (independent of owner)
73 - OASIS support
74 TODO - optimizations:
75 - handle initial formula marker = (and +)
76 - reuse constant already in the pool
77 - reuse references already in the pool
78 - expression optimization (e.g. 1+2+A1 becomes 3+A1)
79 */
80 
81 namespace Calligra
82 {
83 namespace Sheets
84 {
85 
86 class Opcode
87 {
88 public:
89 
90     enum { Nop = 0, Load, Ref, Cell, Range, Function, Add, Sub, Neg, Mul, Div,
91            Pow, Concat, Intersect, Not, Equal, Less, Greater, Array, Union
92          };
93 
94     unsigned type;
95     unsigned index;
96 
Opcode()97     Opcode(): type(Nop), index(0) {}
Opcode(unsigned t)98     Opcode(unsigned t): type(t), index(0) {}
Opcode(unsigned t,unsigned i)99     Opcode(unsigned t, unsigned i): type(t), index(i) {}
100 };
101 
102 // used when evaluation formulas
103 struct stackEntry {
resetCalligra::Sheets::stackEntry104     void reset() {
105         row1 = col1 = row2 = col2 = -1;
106         reg = Calligra::Sheets::Region();
107         regIsNamedOrLabeled = false;
108     }
109     Value val;
110     Calligra::Sheets::Region reg;
111     bool regIsNamedOrLabeled;
112     int row1, col1, row2, col2;
113 };
114 
115 class Q_DECL_HIDDEN Formula::Private : public QSharedData
116 {
117 public:
118     Cell cell;
119     Sheet *sheet;
120     mutable bool dirty;
121     mutable bool valid;
122     QString expression;
123     mutable QVector<Opcode> codes;
124     mutable QVector<Value> constants;
125 
126     Value valueOrElement(FuncExtra &fe, const stackEntry& entry) const;
127 };
128 
129 class TokenStack : public QVector<Token>
130 {
131 public:
132     TokenStack();
133     unsigned itemCount() const;
134     void push(const Token& token);
135     Token pop();
136     const Token& top();
137     const Token& top(unsigned index);
138 };
139 
140 } // namespace Sheets
141 } // namespace Calligra
142 
143 using namespace Calligra::Sheets;
144 
145 // for null token
146 const Token Token::null;
147 
148 // helper function: return operator of given token text
149 // e.g. '*' yields Operator::Asterisk, and so on
matchOperator(const QString & text)150 Token::Op Calligra::Sheets::matchOperator(const QString& text)
151 {
152     Token::Op result = Token::InvalidOp;
153 
154     if (text.length() == 1) {
155         QChar p = text[0];
156         switch (p.unicode()) {
157         case '+': result = Token::Plus; break;
158         case '-': result = Token::Minus; break;
159         case '*': result = Token::Asterisk; break;
160         case '/': result = Token::Slash; break;
161         case '^': result = Token::Caret; break;
162         case ',': result = Token::Comma; break;
163         case ';': result = Token::Semicolon; break;
164         case ' ': result = Token::Intersect; break;
165         case '(': result = Token::LeftPar; break;
166         case ')': result = Token::RightPar; break;
167         case '&': result = Token::Ampersand; break;
168         case '=': result = Token::Equal; break;
169         case '<': result = Token::Less; break;
170         case '>': result = Token::Greater; break;
171         case '%': result = Token::Percent; break;
172         case '~': result = Token::Union; break;
173 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
174         case '{': result = Token::CurlyBra; break;
175         case '}': result = Token::CurlyKet; break;
176         case '|': result = Token::Pipe; break;
177 #endif
178 #ifdef CALLIGRA_SHEETS_UNICODE_OPERATORS
179         case 0x2212: result = Token::Minus; break;
180         case 0x00D7: result = Token::Asterisk; break;
181         case 0x00F7: result = Token::Slash; break;
182         case 0x2215: result = Token::Slash; break;
183 #endif
184         default : result = Token::InvalidOp; break;
185         }
186     }
187 
188     if (text.length() == 2) {
189         if (text == "<>") result = Token::NotEqual;
190         if (text == "!=") result = Token::NotEqual;
191         if (text == "<=") result = Token::LessEqual;
192         if (text == ">=") result = Token::GreaterEqual;
193         if (text == "==") result = Token::Equal;
194     }
195 
196     return result;
197 }
198 
parseOperator(const QChar * & data,QChar * & out)199 bool Calligra::Sheets::parseOperator(const QChar *&data, QChar *&out)
200 {
201     bool retval = true;
202     switch(data->unicode()) {
203     case '+':
204     case '-':
205     case '*':
206     case '/':
207     case '^':
208     case ',':
209     case ';':
210     case ' ':
211     case '(':
212     case ')':
213     case '&':
214     case '%':
215     case '~':
216 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
217     case '{':
218     case '}':
219     case '|':
220 #endif
221 #ifdef CALLIGRA_SHEETS_UNICODE_OPERATORS
222     case 0x2212:
223     case 0x00D7:
224     case 0x00F7:
225     case 0x2215:
226 #endif
227         *out = *data;
228         ++out;
229         ++data;
230         break;
231     case '<':
232         *out = *data;
233         ++out;
234         ++data;
235         if (!data->isNull()) {
236             if (*data == QChar('>', 0) || *data == QChar('=', 0)) {
237                 *out = *data;
238                 ++out;
239                 ++data;
240             }
241         }
242         break;
243     case '>':
244         *out = *data;
245         ++out;
246         ++data;
247         if (!data->isNull() && *data == QChar('=', 0)) {
248             *out = *data;
249             ++out;
250             ++data;
251         }
252         break;
253     case '=':
254         *out++ = *data++;
255         if (!data->isNull() && *data == QChar('=', 0)) {
256             *out++ = *data++;
257         }
258         break;
259     case '!': {
260         const QChar * next = data + 1;
261         if (!next->isNull() && *next == QChar('=', 0)) {
262             *out = *data;
263             ++out;
264             ++data;
265             *out = *data;
266             ++out;
267             ++data;
268         }
269         else {
270             retval = false;
271         }
272     }   break;
273     default:
274         retval = false;
275         break;
276     }
277     return retval;
278 }
279 
280 // helper function: give operator precedence
281 // e.g. '+' is 1 while '*' is 3
opPrecedence(Token::Op op)282 static int opPrecedence(Token::Op op)
283 {
284     int prec = -1;
285     switch (op) {
286     case Token::Percent      : prec = 8; break;
287     case Token::Caret        : prec = 7; break;
288     case Token::Asterisk     : prec = 5; break;
289     case Token::Slash        : prec = 6; break;
290     case Token::Plus         : prec = 3; break;
291     case Token::Minus        : prec = 3; break;
292     case Token::Union        : prec = 2; break;
293     case Token::Ampersand    : prec = 2; break;
294     case Token::Intersect    : prec = 2; break;
295     case Token::Equal        : prec = 1; break;
296     case Token::NotEqual     : prec = 1; break;
297     case Token::Less         : prec = 1; break;
298     case Token::Greater      : prec = 1; break;
299     case Token::LessEqual    : prec = 1; break;
300     case Token::GreaterEqual : prec = 1; break;
301 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
302         // FIXME Stefan: I don't know whether zero is right for this case. :-(
303     case Token::CurlyBra     : prec = 0; break;
304     case Token::CurlyKet     : prec = 0; break;
305     case Token::Pipe         : prec = 0; break;
306 #endif
307     case Token::Semicolon    : prec = 0; break;
308     case Token::RightPar     : prec = 0; break;
309     case Token::LeftPar      : prec = -1; break;
310     default: prec = -1; break;
311     }
312     return prec;
313 }
314 
315 // helper function
tokenAsValue(const Token & token)316 static Value tokenAsValue(const Token& token)
317 {
318     Value value;
319     if (token.isBoolean()) value = Value(token.asBoolean());
320     else if (token.isInteger()) value = Value(token.asInteger());
321     else if (token.isFloat()) value = Value(token.asFloat());
322     else if (token.isString()) value = Value(token.asString());
323     else if (token.isError()) {
324         const QString error = token.asError();
325         if (error == Value::errorCIRCLE().errorMessage())
326             value = Value::errorCIRCLE();
327         else if (error == Value::errorDEPEND().errorMessage())
328             value = Value::errorDEPEND();
329         else if (error == Value::errorDIV0().errorMessage())
330             value = Value::errorDIV0();
331         else if (error == Value::errorNA().errorMessage())
332             value = Value::errorNA();
333         else if (error == Value::errorNAME().errorMessage())
334             value = Value::errorNAME();
335         else if (error == Value::errorNUM().errorMessage())
336             value = Value::errorNUM();
337         else if (error == Value::errorNULL().errorMessage())
338             value = Value::errorNULL();
339         else if (error == Value::errorPARSE().errorMessage())
340             value = Value::errorPARSE();
341         else if (error == Value::errorREF().errorMessage())
342             value = Value::errorREF();
343         else if (error == Value::errorVALUE().errorMessage())
344             value = Value::errorVALUE();
345         else {
346             value = Value(Value::Error);
347             value.setError(error);
348         }
349     }
350     return value;
351 }
352 
353 /**********************
354     Token
355  **********************/
356 
357 // creates a token
Token(Type type,const QString & text,int pos)358 Token::Token(Type type, const QString& text, int pos)
359 : m_type(type)
360 , m_text(text)
361 , m_pos(pos)
362 {
363     // the detach is needed as we manipulate the string we use as input afterwards
364     // by writing to QChar * data point which does nto detach automatically.
365     m_text.detach();
366 }
367 
368 // copy constructor
Token(const Token & token)369 Token::Token(const Token& token)
370 : m_type(token.m_type)
371 , m_text(token.m_text)
372 , m_pos(token.m_pos)
373 {
374 }
375 
376 // assignment operator
operator =(const Token & token)377 Token& Token::operator=(const Token & token)
378 {
379     m_type = token.m_type;
380     m_text = token.m_text;
381     m_pos = token.m_pos;
382     return *this;
383 }
384 
asBoolean() const385 bool Token::asBoolean() const
386 {
387     if (!isBoolean()) return false;
388     return m_text.toLower() == "true";
389     // FIXME check also for i18n version
390 }
391 
asInteger() const392 qint64 Token::asInteger() const
393 {
394     if (isInteger()) return m_text.toLongLong();
395     else return 0;
396 }
397 
asFloat() const398 double Token::asFloat() const
399 {
400     if (isFloat()) return m_text.toDouble();
401     else return 0.0;
402 }
403 
asString() const404 QString Token::asString() const
405 {
406     if (!isString()) return QString();
407     QString res = m_text.mid(1, m_text.length() - 2);
408     res = res.replace("\"\"", "\"");   // double quotes to single quotes
409     return res;
410 }
411 
asError() const412 QString Token::asError() const
413 {
414     if (isError())
415         return m_text;
416     else
417         return QString();
418 }
419 
asOperator() const420 Token::Op Token::asOperator() const
421 {
422     if (isOperator()) return matchOperator(m_text);
423     else return InvalidOp;
424 }
425 
sheetName() const426 QString Token::sheetName() const
427 {
428     if (!isCell() && !isRange()) return QString();
429     int i = m_text.indexOf('!');
430     if (i < 0) return QString();
431     QString sheet = m_text.left(i);
432     return sheet;
433 }
434 
description() const435 QString Token::description() const
436 {
437     QString desc;
438 
439     switch (m_type) {
440     case  Boolean:    desc = "Boolean"; break;
441     case  Integer:    desc = "Integer"; break;
442     case  Float:      desc = "Float"; break;
443     case  String:     desc = "String"; break;
444     case  Identifier: desc = "Identifier"; break;
445     case  Cell:       desc = "Cell"; break;
446     case  Range:      desc = "Range"; break;
447     case  Operator:   desc = "Operator"; break;
448     case  Error:      desc = "Error"; break;
449     default:          desc = "Unknown"; break;
450     }
451 
452     while (desc.length() < 10) desc.prepend(' ');
453     desc.prepend("  ");
454     desc.prepend(QString::number(m_pos));
455     desc.append(" : ").append(m_text);
456 
457     return desc;
458 }
459 
460 
461 /**********************
462     TokenStack
463  **********************/
464 
TokenStack()465 TokenStack::TokenStack(): QVector<Token>()
466 {
467 }
468 
itemCount() const469 unsigned TokenStack::itemCount() const
470 {
471     return size();
472 }
473 
push(const Token & token)474 void TokenStack::push(const Token& token)
475 {
476     append(token);
477 }
478 
pop()479 Token TokenStack::pop()
480 {
481     if (!isEmpty()) {
482         Token token = last();
483         pop_back();
484         return token;
485     }
486     return Token();
487 }
488 
top()489 const Token& TokenStack::top()
490 {
491     return top(0);
492 }
493 
top(unsigned index)494 const Token& TokenStack::top(unsigned index)
495 {
496     unsigned top = size();
497     if (top > index)
498         return at(top - index - 1);
499     return Token::null;
500 }
501 
502 
503 /**********************
504     FormulaPrivate
505  **********************/
506 
507 // helper function: return true for valid identifier character
isIdentifier(QChar ch)508 bool Calligra::Sheets::isIdentifier(QChar ch)
509 {
510     switch(ch.unicode()) {
511     case '_':
512     case '$':
513     case '.':
514         return true;
515     default:
516         return ch.isLetter();
517     }
518 }
519 
520 
521 
522 
523 /**********************
524     Formula
525  **********************/
526 
527 // Constructor
528 
Formula(Sheet * sheet,const Cell & cell)529 Formula::Formula(Sheet *sheet, const Cell& cell)
530         : d(new Private)
531 {
532     d->cell = cell;
533     d->sheet = sheet;
534     clear();
535 }
536 
Formula(Sheet * sheet)537 Formula::Formula(Sheet *sheet)
538         : d(new Private)
539 {
540     d->cell = Cell();
541     d->sheet = sheet;
542     clear();
543 }
544 
Formula()545 Formula::Formula()
546         : d(new Private)
547 {
548     d->cell = Cell();
549     d->sheet = 0;
550     clear();
551 }
552 
empty()553 Formula Formula::empty()
554 {
555     static Formula f;
556     return f;
557 }
558 
Formula(const Formula & other)559 Formula::Formula(const Formula& other)
560         : d(other.d)
561 {
562 }
563 
564 // Destructor
565 
~Formula()566 Formula::~Formula()
567 {
568 }
569 
cell() const570 const Cell& Formula::cell() const
571 {
572     return d->cell;
573 }
574 
sheet() const575 Sheet* Formula::sheet() const
576 {
577     return d->sheet;
578 }
579 
580 // Sets a new expression for this formula.
581 // note that both the real lex and parse processes will happen later on
582 // when needed (i.e. "lazy parse"), for example during formula evaluation.
583 
setExpression(const QString & expr)584 void Formula::setExpression(const QString& expr)
585 {
586     d->expression = expr;
587     d->dirty = true;
588     d->valid = false;
589 }
590 
591 // Returns the expression associated with this formula.
592 
expression() const593 QString Formula::expression() const
594 {
595     return d->expression;
596 }
597 
598 // Returns the validity of the formula.
599 // note: empty formula is always invalid.
600 
isValid() const601 bool Formula::isValid() const
602 {
603     if (d->dirty) {
604         KLocale* locale = !d->cell.isNull() ? d->cell.locale() : 0;
605         if ((!locale) && d->sheet)
606             locale = d->sheet->map()->calculationSettings()->locale();
607         Tokens tokens = scan(d->expression, locale);
608 
609         if (tokens.valid())
610             compile(tokens);
611         else
612             d->valid = false;
613     }
614     return d->valid;
615 }
616 
617 // Clears everything, also mark the formula as invalid.
618 
clear()619 void Formula::clear()
620 {
621     d->expression.clear();
622     d->dirty = true;
623     d->valid = false;
624     d->constants.clear();
625     d->codes.clear();
626 }
627 
628 // Returns list of token for the expression.
629 // this triggers again the lexical analysis step. it is however preferable
630 // (even when there's small performance penalty) because otherwise we need to
631 // store parsed tokens all the time which serves no good purpose.
632 
tokens() const633 Tokens Formula::tokens() const
634 {
635     KLocale* locale = !d->cell.isNull() ? d->cell.locale() : 0;
636     if ((!locale) && d->sheet)
637         locale = d->sheet->map()->calculationSettings()->locale();
638     return scan(d->expression, locale);
639 }
640 
scan(const QString & expr,const KLocale * locale) const641 Tokens Formula::scan(const QString &expr, const KLocale* locale) const
642 {
643     // parsing state
644     enum { Start, Finish, InNumber, InDecimal, InExpIndicator, InExponent,
645            InString, InIdentifier, InCell, InRange, InSheetOrAreaName, InError
646          } state;
647 
648     // use locale settings if specified
649     QString thousand = locale ? locale->thousandsSeparator() : "";
650     QString decimal = locale ? locale->decimalSymbol() : ".";
651 
652     const QChar *data = expr.constData();
653 
654     Tokens tokens;
655     if (data->isNull() || *data != QChar('=', 0)) {
656         return tokens;
657     }
658     tokens.reserve(50);
659 
660     ++data;
661     const QChar * const start = data;
662     const QChar * const end = start + expr.length();
663     const QChar *tokenStart = data;
664     const QChar *cellStart = data;
665 
666     state = Start;
667     bool parseError = false;
668 
669     int length = expr.length() * 1.1; // TODO check if that is needed at all
670     QString token(length, QChar());
671     token.reserve(length); // needed to not realloc at the resize at the end
672     QChar * out = token.data();
673     QChar * const outStart = token.data();
674 
675     while (state != Finish && data < end) {
676         switch (state) {
677         case Start:
678             tokenStart = data;
679             // Whitespaces can be used as intersect-operator for two arrays.
680             if (data->isSpace()) {
681                 ++data;
682             }
683             // check for number
684             else if (data->isDigit()) {
685                 state = InNumber;
686                 *out++ = *data++;
687             }
688             // terminator character
689             else if (data->isNull()) {
690                 state = Finish;
691             }
692             else {
693                 switch (data->unicode()) {
694                 case '"': // a string ?
695                     *out++ = *data++;
696                     state = InString;
697                     break;
698                 case '\'': // aposthrophe (') marks sheet name for 3-d cell, e.g 'Sales Q3'!A4, or a named range
699                     ++data;
700                     state = InSheetOrAreaName;
701                     break;
702                 case '#': // error value?
703                     *out++ = *data++;
704                     state = InError;
705                     break;
706                 default:
707                     // decimal dot ?
708                     if (*data == decimal[0]) {
709                         *out++ = *data++;
710                         state = InDecimal;
711                     }
712                     // beginning with alphanumeric ?
713                     // could be identifier, cell, range, or function...
714                     else if (isIdentifier(*data)) {
715                         *out++ = *data++;
716                         state = InIdentifier;
717                     }
718                     else {
719                         // look for operator match
720                         if (parseOperator(data, out)) {
721                             token.resize(out - outStart);
722                             tokens.append(Token(Token::Operator, token, tokenStart - start));
723                             token.resize(length);
724                             out = outStart;
725                         }
726                         else {
727                             // not matched an operator, add an Unknown token and remember we had a parse error
728                             parseError = true;
729                             *out++ = *data++;
730                             token.resize(out - outStart);
731                             tokens.append(Token(Token::Unknown, token, tokenStart - start));
732                             token.resize(length);
733                             out = outStart;
734                         }
735                     }
736                     break;
737                 }
738             }
739             break;
740         case InIdentifier:
741             // consume as long as alpha, dollar sign, underscore, or digit
742             if (isIdentifier(*data) || data->isDigit()) {
743                 *out = *data;
744                 ++out;
745                 ++data;
746             }
747             // a '!' ? then this must be sheet name, e.g "Sheet4!", unless the next character is '='
748             else if (*data == QChar('!', 0) && !(data + 1)->isNull() && *(data + 1) != QChar('=', 0)) {
749                 *out++ = *data++;
750                 cellStart = out;
751                 state = InCell;
752             }
753             // a '(' ? then this must be a function identifier
754             else if (*data == QChar('(', 0)) {
755                 token.resize(out - outStart);
756                 tokens.append(Token(Token::Identifier, token, tokenStart - start));
757                 token.resize(length);
758                 out = outStart;
759                 state = Start;
760             }
761             // we're done with identifier
762             else {
763                 *out = QChar();
764                 // check for cell reference,  e.g A1, VV123, ...
765                 if (Util::isCellReference(token)) {
766                     // so up to now we've got something like A2 or Sheet2!F4
767                     // check for range reference
768                     if (*data == QChar(':', 0)) {
769                         *out++ = *data++;
770                         state = InRange;
771                     }
772                     // we're done with cell reference
773                     else {
774                         token.resize(out - outStart);
775                         tokens.append(Token(Token::Cell, token, tokenStart - start));
776                         token.resize(length);
777                         out = outStart;
778                         state = Start;
779                     }
780                 }
781                 else {
782                     token.resize(out - outStart);
783                     if (isNamedArea(token)) {
784                         tokens.append(Token(Token::Range, token, tokenStart - start));
785                     }
786                     else {
787                         tokens.append(Token(Token::Identifier, token, tokenStart - start));
788                     }
789                     token.resize(length);
790                     out = outStart;
791                     state = Start;
792                 }
793             }
794             break;
795         case InCell:
796             // consume as long as alpha, dollar sign, underscore, or digit
797             if (isIdentifier(*data) || data->isDigit()) {
798                 *out++ = *data++;
799             }
800             else {
801                 *out = QChar();
802                 // check if it's a cell ref like A32, not named area
803                 if (!Util::isCellReference(token, cellStart - outStart)) {
804                     // test failed, means we have something like "Sheet2!TotalSales"
805                     // and not "Sheet2!A2"
806                     // thus, assume so far that it's a named area
807                     token.resize(out - outStart);
808                     tokens.append(Token(Token::Range, token, tokenStart - start));
809                     token.resize(length);
810                     out = outStart;
811                     state = Start;
812                 }
813                 else {
814                     // so up to now we've got something like A2 or Sheet2!F4
815                     // check for range reference
816                     if (*data == QChar(':', 0)) {
817                         *out++ = *data++;
818                         state = InRange;
819                     }
820                     else {
821                         // we're done with cell reference
822                         token.resize(out - outStart);
823                         tokens.append(Token(Token::Cell, token, tokenStart - start));
824                         token.resize(length);
825                         out = outStart;
826                         state = Start;
827                     }
828                 }
829             }
830             break;
831         case InRange:
832             // consume as long as alpha, dollar sign, underscore, or digit or !
833             if (isIdentifier(*data) || data->isDigit() || *data == QChar('!', 0)) {
834                 *out++ = *data++;
835             }
836             // we're done with range reference
837             else {
838                 token.resize(out - outStart);
839                 tokens.append(Token(Token::Range, token, tokenStart - start));
840                 token.resize(length);
841                 out = outStart;
842                 state = Start;
843             }
844             break;
845         case InSheetOrAreaName:
846             // consume until '
847             if (data->isNull()) {
848                 parseError = true;
849                 token.resize(out - outStart);
850                 tokens.append(Token(Token::Unknown, '\'' + token + '\'', tokenStart - start));
851                 state = Start;
852             }
853             else if (*data != QChar('\'', 0)) {
854                 *out++ = *data++;
855             }
856             else {
857                 // eat the aposthrophe itself
858                 ++data;
859                 // must be followed by '!' to be sheet name
860                 if (!data->isNull() && *data == QChar('!', 0)) {
861                     *out++ = *data++;
862                     cellStart = out;
863                     state = InCell;
864                 }
865                 else {
866                     token.resize(out - outStart);
867                     if (isNamedArea(token)) {
868                         tokens.append(Token(Token::Range, token, tokenStart - start));
869                     }
870                     else {
871                         // for compatibility with oocalc (and the openformula spec), don't parse single-quoted
872                         // text as an identifier, instead add an Unknown token and remember we had an error
873                         parseError = true;
874                         tokens.append(Token(Token::Unknown, '\'' + token + '\'', tokenStart - start));
875                     }
876                     token.resize(length);
877                     out = outStart;
878                     state = Start;
879                 }
880             }
881             break;
882         case InNumber:
883             // consume as long as it's digit
884             if (data->isDigit()) {
885                 *out++ = *data++;
886             }
887             // skip thousand separator
888             else if (!thousand.isEmpty() && (*data == thousand[0])) {
889                 ++data;
890             }
891             // convert decimal separator to '.', also support '.' directly
892             // we always support '.' because of bug #98455
893             else if ((!decimal.isEmpty() && (*data == decimal[0])) || *data == QChar('.', 0)) {
894                 *out++ = QChar('.', 0);
895                 ++data;
896                 state = InDecimal;
897             }
898             // exponent ?
899             else if (*data == QChar('E', 0) || *data == QChar('e', 0)) {
900                 *out++ = QChar('E', 0);
901                 ++data;
902                 state = InExpIndicator;
903             }
904             // reference sheet delimiter?
905             else if (*data == QChar('!', 0)) {
906                 *out++ = *data++;
907                 cellStart = out;
908                 state = InCell;
909             }
910             // identifier?
911             else if (isIdentifier(*data)) {
912                 // has to be a sheet or area name then
913                 *out++ = *data++;
914                 state = InIdentifier;
915             }
916             // we're done with integer number
917             else {
918                 token.resize(out - outStart);
919                 tokens.append(Token(Token::Integer, token, tokenStart - start));
920                 token.resize(length);
921                 out = outStart;
922                 state = Start;
923             }
924             break;
925         case InDecimal:
926             // consume as long as it's digit
927             if (data->isDigit()) {
928                 *out++ = *data++;
929             }
930             // exponent ?
931             else if (*data == QChar('E', 0) || *data == QChar('e', 0)) {
932                 *out++ = QChar('E', 0);
933                 ++data;
934                 state = InExpIndicator;
935             }
936             // we're done with floating-point number
937             else {
938                 token.resize(out - outStart);
939                 tokens.append(Token(Token::Float, token, tokenStart - start));
940                 token.resize(length);
941                 out = outStart;
942                 state = Start;
943             }
944             break;
945         case InExpIndicator:
946             // possible + or - right after E, e.g 1.23E+12 or 4.67E-8
947             if (*data == QChar('+', 0) || *data == QChar('-', 0)) {
948                 *out++ = *data++;
949             }
950             // consume as long as it's digit
951             else if (data->isDigit()) {
952                 *out++ = *data++;
953                 state = InExponent;
954             }
955             // invalid thing here
956             else {
957                 parseError = true;
958                 token.resize(out - outStart);
959                 tokens.append(Token(Token::Unknown, token, tokenStart - start));
960                 token.resize(length);
961                 out = outStart;
962                 state = Start;
963             }
964             break;
965         case InExponent:
966             // consume as long as it's digit
967             if (data->isDigit()) {
968                 *out++ = *data++;
969             }
970             // we're done with floating-point number
971             else {
972                 token.resize(out - outStart);
973                 tokens.append(Token(Token::Float, token, tokenStart - start));
974                 token.resize(length);
975                 out = outStart;
976                 state = Start;
977             }
978             break;
979         case InString:
980             // consume until "
981             if (*data != QChar('"', 0)) {
982                 *out++ = *data++;
983             }
984             else {
985                 *out++ = *data++;
986                 // check for escaped ""
987                 if ((!data->isNull()) && (*data == QChar('"', 0))) {
988                     *out++ = *data++;
989                 } else {
990                     token.resize(out - outStart);
991                     tokens.append(Token(Token::String, token, tokenStart - start));
992                     token.resize(length);
993                     out = outStart;
994                     state = Start;
995                 }
996             }
997             break;
998         case InError: {
999             ushort c = data->unicode();
1000             switch (c) {
1001             case '!':
1002             case '?':
1003                 // TODO check if there is at least one char that needs to be there
1004                 *out++ = *data++;
1005                 token.resize(out - outStart);
1006                 tokens.append(Token(Token::Error, token, tokenStart - start));
1007                 token.resize(length);
1008                 out = outStart;
1009                 state = Start;
1010                 break;
1011             case '/':
1012                 *out++ = *data++;
1013                 if (!data->isNull()) {
1014                     bool error = false;
1015                     if (*data >= 'A' && *data <= 'Z') {
1016                         *out++ = *data++;
1017                     }
1018                     else if (*data >= '0' && *data <= '9'){
1019                         *out++ = *data++;
1020                         if (!data->isNull() && (*data == QChar('!', 0) || *data == QChar('?', 0))) {
1021                             *out++ = *data++;
1022                         }
1023                     }
1024                     else {
1025                         error = true;
1026                     }
1027                     if (error) {
1028                         parseError = true;
1029                         token.resize(out - outStart);
1030                         tokens.append(Token(Token::Unknown, token, tokenStart - start));
1031                         token.resize(length);
1032                         out = outStart;
1033                         state = Start;
1034                     }
1035                     else {
1036                         token.resize(out - outStart);
1037                         tokens.append(Token(Token::Error, token, tokenStart - start));
1038                         token.resize(length);
1039                         out = outStart;
1040                         state = Start;
1041                     }
1042                 }
1043                 break;
1044             default:
1045                 if ((c >= 'A' && c <= 'Z') || (c >= '0' && c<= '9')) {
1046                     *out++ = *data++;
1047                 }
1048                 else {
1049                     parseError = true;
1050                     token.resize(out - outStart);
1051                     tokens.append(Token(Token::Unknown, token, tokenStart - start));
1052                     token.resize(length);
1053                     out = outStart;
1054                     state = Start;
1055                 }
1056                 break;
1057             }
1058         }   break;
1059         default:
1060             break;
1061         }
1062     }
1063 
1064     // parse error if any text remains
1065     if (data+1 < end)  {
1066         tokens.append(Token(Token::Unknown, expr.mid(tokenStart - start), tokenStart - start));
1067         parseError = true;
1068     }
1069 
1070     if (parseError)
1071         tokens.setValid(false);
1072     return tokens;
1073 }
1074 
1075 // will affect: dirty, valid, codes, constants
compile(const Tokens & tokens) const1076 void Formula::compile(const Tokens& tokens) const
1077 {
1078     // initialize variables
1079     d->dirty = false;
1080     d->valid = false;
1081     d->codes.clear();
1082     d->constants.clear();
1083 
1084     // sanity check
1085     if (tokens.count() == 0) return;
1086 
1087     TokenStack syntaxStack;
1088     QStack<int> argStack;
1089     unsigned argCount = 1;
1090 
1091     for (int i = 0; i <= tokens.count(); i++) {
1092         // helper token: InvalidOp is end-of-formula
1093         Token token = (i < tokens.count()) ? tokens[i] : Token(Token::Operator);
1094         Token::Type tokenType = token.type();
1095 
1096         // unknown token is invalid
1097         if (tokenType == Token::Unknown) break;
1098 
1099         // are we entering a function ?
1100         // if stack already has: id (
1101         if (syntaxStack.itemCount() >= 2) {
1102             Token par = syntaxStack.top();
1103             Token id = syntaxStack.top(1);
1104             if (par.asOperator() == Token::LeftPar)
1105                 if (id.isIdentifier()) {
1106                     argStack.push(argCount);
1107                     argCount = 1;
1108                 }
1109         }
1110 
1111 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
1112         // are we entering an inline array ?
1113         // if stack already has: {
1114         if (syntaxStack.itemCount() >= 1) {
1115             Token bra = syntaxStack.top();
1116             if (bra.asOperator() == Token::CurlyBra) {
1117                 argStack.push(argCount);
1118                 argStack.push(1);   // row count
1119                 argCount = 1;
1120             }
1121         }
1122 #endif
1123 
1124         // for constants, push immediately to stack
1125         // generate code to load from a constant
1126         if ((tokenType == Token::Integer) || (tokenType == Token::Float) ||
1127                 (tokenType == Token::String) || (tokenType == Token::Boolean) ||
1128                 (tokenType == Token::Error)) {
1129             syntaxStack.push(token);
1130             d->constants.append(tokenAsValue(token));
1131             d->codes.append(Opcode(Opcode::Load, d->constants.count() - 1));
1132         }
1133 
1134         // for cell, range, or identifier, push immediately to stack
1135         // generate code to load from reference
1136         if ((tokenType == Token::Cell) || (tokenType == Token::Range) ||
1137                 (tokenType == Token::Identifier)) {
1138             syntaxStack.push(token);
1139             d->constants.append(Value(token.text()));
1140             if (tokenType == Token::Cell)
1141                 d->codes.append(Opcode(Opcode::Cell, d->constants.count() - 1));
1142             else if (tokenType == Token::Range)
1143                 d->codes.append(Opcode(Opcode::Range, d->constants.count() - 1));
1144             else
1145                 d->codes.append(Opcode(Opcode::Ref, d->constants.count() - 1));
1146         }
1147 
1148         // special case for percentage
1149         if (tokenType == Token::Operator)
1150             if (token.asOperator() == Token::Percent)
1151                 if (syntaxStack.itemCount() >= 1)
1152                     if (!syntaxStack.top().isOperator()) {
1153                         d->constants.append(Value(0.01));
1154                         d->codes.append(Opcode(Opcode::Load, d->constants.count() - 1));
1155                         d->codes.append(Opcode(Opcode::Mul));
1156                     }
1157 
1158         // for any other operator, try to apply all parsing rules
1159         if (tokenType == Token::Operator)
1160             if (token.asOperator() != Token::Percent) {
1161                 // repeat until no more rule applies
1162                 for (; ;) {
1163                     bool ruleFound = false;
1164 
1165                     // rule for function arguments, if token is ; or )
1166                     // id ( arg1 ; arg2 -> id ( arg
1167                     if (!ruleFound)
1168                         if (syntaxStack.itemCount() >= 5)
1169                             if ((token.asOperator() == Token::RightPar) ||
1170                                     (token.asOperator() == Token::Semicolon)) {
1171                                 Token arg2 = syntaxStack.top();
1172                                 Token sep = syntaxStack.top(1);
1173                                 Token arg1 = syntaxStack.top(2);
1174                                 Token par = syntaxStack.top(3);
1175                                 Token id = syntaxStack.top(4);
1176                                 if (!arg2.isOperator())
1177                                     if (sep.asOperator() == Token::Semicolon)
1178                                         if (!arg1.isOperator())
1179                                             if (par.asOperator() == Token::LeftPar)
1180                                                 if (id.isIdentifier()) {
1181                                                     ruleFound = true;
1182                                                     syntaxStack.pop();
1183                                                     syntaxStack.pop();
1184                                                     argCount++;
1185                                                 }
1186                             }
1187 
1188                     // rule for empty function arguments, if token is ; or )
1189                     // id ( arg ; -> id ( arg
1190                     if (!ruleFound)
1191                         if (syntaxStack.itemCount() >= 3)
1192                             if ((token.asOperator() == Token::RightPar) ||
1193                                     (token.asOperator() == Token::Semicolon)) {
1194                                 Token sep = syntaxStack.top();
1195                                 Token arg = syntaxStack.top(1);
1196                                 Token par = syntaxStack.top(2);
1197                                 Token id = syntaxStack.top(3);
1198                                 if (sep.asOperator() == Token::Semicolon)
1199                                     if (!arg.isOperator())
1200                                         if (par.asOperator() == Token::LeftPar)
1201                                             if (id.isIdentifier()) {
1202                                                 ruleFound = true;
1203                                                 syntaxStack.pop();
1204                                                 d->constants.append(Value::null());
1205                                                 d->codes.append(Opcode(Opcode::Load, d->constants.count() - 1));
1206                                                 argCount++;
1207                                             }
1208                             }
1209 
1210                     // rule for function last argument:
1211                     //  id ( arg ) -> arg
1212                     if (!ruleFound)
1213                         if (syntaxStack.itemCount() >= 4) {
1214                             Token par2 = syntaxStack.top();
1215                             Token arg = syntaxStack.top(1);
1216                             Token par1 = syntaxStack.top(2);
1217                             Token id = syntaxStack.top(3);
1218                             if (par2.asOperator() == Token::RightPar)
1219                                 if (!arg.isOperator())
1220                                     if (par1.asOperator() == Token::LeftPar)
1221                                         if (id.isIdentifier()) {
1222                                             ruleFound = true;
1223                                             syntaxStack.pop();
1224                                             syntaxStack.pop();
1225                                             syntaxStack.pop();
1226                                             syntaxStack.pop();
1227                                             syntaxStack.push(arg);
1228                                             d->codes.append(Opcode(Opcode::Function, argCount));
1229                                             Q_ASSERT(!argStack.empty());
1230                                             argCount = argStack.empty() ? 0 : argStack.pop();
1231                                         }
1232                         }
1233 
1234                     // rule for function call with parentheses, but without argument
1235                     // e.g. "2*PI()"
1236                     if (!ruleFound)
1237                         if (syntaxStack.itemCount() >= 3) {
1238                             Token par2 = syntaxStack.top();
1239                             Token par1 = syntaxStack.top(1);
1240                             Token id = syntaxStack.top(2);
1241                             if (par2.asOperator() == Token::RightPar)
1242                                 if (par1.asOperator() == Token::LeftPar)
1243                                     if (id.isIdentifier()) {
1244                                         ruleFound = true;
1245                                         syntaxStack.pop();
1246                                         syntaxStack.pop();
1247                                         syntaxStack.pop();
1248                                         syntaxStack.push(Token(Token::Integer));
1249                                         d->codes.append(Opcode(Opcode::Function, 0));
1250                                         Q_ASSERT(!argStack.empty());
1251                                         argCount = argStack.empty() ? 0 : argStack.pop();
1252                                     }
1253                         }
1254 
1255 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
1256                     // rule for inline array elements, if token is ; or | or }
1257                     // { arg1 ; arg2 -> { arg
1258                     if (!ruleFound)
1259                         if (syntaxStack.itemCount() >= 4)
1260                             if ((token.asOperator() == Token::Semicolon) ||
1261                                     (token.asOperator() == Token::CurlyKet) ||
1262                                     (token.asOperator() == Token::Pipe)) {
1263                                 Token arg2 = syntaxStack.top();
1264                                 Token sep = syntaxStack.top(1);
1265                                 Token arg1 = syntaxStack.top(2);
1266                                 Token bra = syntaxStack.top(3);
1267                                 if (!arg2.isOperator())
1268                                     if (sep.asOperator() == Token::Semicolon)
1269                                         if (!arg1.isOperator())
1270                                             if (bra.asOperator() == Token::CurlyBra) {
1271                                                 ruleFound = true;
1272                                                 syntaxStack.pop();
1273                                                 syntaxStack.pop();
1274                                                 argCount++;
1275                                             }
1276                             }
1277 
1278                     // rule for last array row element, if token is ; or | or }
1279                     //  { arg1 | arg2 -> { arg
1280                     if (!ruleFound)
1281                         if (syntaxStack.itemCount() >= 4)
1282                             if ((token.asOperator() == Token::Semicolon) ||
1283                                     (token.asOperator() == Token::CurlyKet) ||
1284                                     (token.asOperator() == Token::Pipe)) {
1285                                 Token arg2 = syntaxStack.top();
1286                                 Token sep = syntaxStack.top(1);
1287                                 Token arg1 = syntaxStack.top(2);
1288                                 Token bra = syntaxStack.top(3);
1289                                 if (!arg2.isOperator())
1290                                     if (sep.asOperator() == Token::Pipe)
1291                                         if (!arg1.isOperator())
1292                                             if (bra.asOperator() == Token::CurlyBra) {
1293                                                 ruleFound = true;
1294                                                 syntaxStack.pop();
1295                                                 syntaxStack.pop();
1296                                                 int rowCount = argStack.pop();
1297                                                 argStack.push(++rowCount);
1298                                                 argCount = 1;
1299                                             }
1300                             }
1301 
1302                     // rule for last array element:
1303                     //  { arg } -> arg
1304                     if (!ruleFound)
1305                         if (syntaxStack.itemCount() >= 3) {
1306                             Token ket = syntaxStack.top();
1307                             Token arg = syntaxStack.top(1);
1308                             Token bra = syntaxStack.top(2);
1309                             if (ket.asOperator() == Token::CurlyKet)
1310                                 if (!arg.isOperator())
1311                                     if (bra.asOperator() == Token::CurlyBra) {
1312                                         ruleFound = true;
1313                                         syntaxStack.pop();
1314                                         syntaxStack.pop();
1315                                         syntaxStack.pop();
1316                                         syntaxStack.push(arg);
1317                                         const int rowCount = argStack.pop();
1318                                         d->constants.append(Value((int)argCount));     // cols
1319                                         d->constants.append(Value(rowCount));
1320                                         d->codes.append(Opcode(Opcode::Array, d->constants.count() - 2));
1321                                         Q_ASSERT(!argStack.empty());
1322                                         argCount = argStack.empty() ? 0 : argStack.pop();
1323                                     }
1324                         }
1325 #endif
1326                     // rule for parenthesis:  ( Y ) -> Y
1327                     if (!ruleFound)
1328                         if (syntaxStack.itemCount() >= 3) {
1329                             Token right = syntaxStack.top();
1330                             Token y = syntaxStack.top(1);
1331                             Token left = syntaxStack.top(2);
1332                             if (right.isOperator())
1333                                 if (!y.isOperator())
1334                                     if (left.isOperator())
1335                                         if (right.asOperator() == Token::RightPar)
1336                                             if (left.asOperator() == Token::LeftPar) {
1337                                                 ruleFound = true;
1338                                                 syntaxStack.pop();
1339                                                 syntaxStack.pop();
1340                                                 syntaxStack.pop();
1341                                                 syntaxStack.push(y);
1342                                             }
1343                         }
1344 
1345                     // rule for binary operator:  A (op) B -> A
1346                     // conditions: precedence of op >= precedence of token
1347                     // action: push (op) to result
1348                     // e.g. "A * B" becomes 'A' if token is operator '+'
1349                     if (!ruleFound)
1350                         if (syntaxStack.itemCount() >= 3) {
1351                             Token b = syntaxStack.top();
1352                             Token op = syntaxStack.top(1);
1353                             Token a = syntaxStack.top(2);
1354                             if (!a.isOperator())
1355                                 if (!b.isOperator())
1356                                     if (op.isOperator())
1357                                         if (token.asOperator() != Token::LeftPar)
1358                                             if (opPrecedence(op.asOperator()) >= opPrecedence(token.asOperator())) {
1359                                                 ruleFound = true;
1360                                                 syntaxStack.pop();
1361                                                 syntaxStack.pop();
1362                                                 syntaxStack.pop();
1363                                                 syntaxStack.push(b);
1364                                                 switch (op.asOperator()) {
1365                                                     // simple binary operations
1366                                                 case Token::Plus:         d->codes.append(Opcode::Add); break;
1367                                                 case Token::Minus:        d->codes.append(Opcode::Sub); break;
1368                                                 case Token::Asterisk:     d->codes.append(Opcode::Mul); break;
1369                                                 case Token::Slash:        d->codes.append(Opcode::Div); break;
1370                                                 case Token::Caret:        d->codes.append(Opcode::Pow); break;
1371                                                 case Token::Ampersand:    d->codes.append(Opcode::Concat); break;
1372                                                 case Token::Intersect:    d->codes.append(Opcode::Intersect); break;
1373                                                 case Token::Union:        d->codes.append(Opcode::Union); break;
1374 
1375                                                     // simple value comparisons
1376                                                 case Token::Equal:        d->codes.append(Opcode::Equal); break;
1377                                                 case Token::Less:         d->codes.append(Opcode::Less); break;
1378                                                 case Token::Greater:      d->codes.append(Opcode::Greater); break;
1379 
1380                                                     // NotEqual is Equal, followed by Not
1381                                                 case Token::NotEqual:
1382                                                     d->codes.append(Opcode::Equal);
1383                                                     d->codes.append(Opcode::Not);
1384                                                     break;
1385 
1386                                                     // LessOrEqual is Greater, followed by Not
1387                                                 case Token::LessEqual:
1388                                                     d->codes.append(Opcode::Greater);
1389                                                     d->codes.append(Opcode::Not);
1390                                                     break;
1391 
1392                                                     // GreaterOrEqual is Less, followed by Not
1393                                                 case Token::GreaterEqual:
1394                                                     d->codes.append(Opcode::Less);
1395                                                     d->codes.append(Opcode::Not);
1396                                                     break;
1397                                                 default: break;
1398                                                 };
1399                                             }
1400                         }
1401 
1402                     // rule for unary operator:  (op1) (op2) X -> (op1) X
1403                     // conditions: op2 is unary, token is not '('
1404                     // action: push (op2) to result
1405                     // e.g.  "* - 2" becomes '*'
1406                     if (!ruleFound)
1407                         if (token.asOperator() != Token::LeftPar)
1408                             if (syntaxStack.itemCount() >= 3) {
1409                                 Token x = syntaxStack.top();
1410                                 Token op2 = syntaxStack.top(1);
1411                                 Token op1 = syntaxStack.top(2);
1412                                 if (!x.isOperator())
1413                                     if (op1.isOperator())
1414                                         if (op2.isOperator())
1415                                             if ((op2.asOperator() == Token::Plus) ||
1416                                                     (op2.asOperator() == Token::Minus)) {
1417                                                 ruleFound = true;
1418                                                 syntaxStack.pop();
1419                                                 syntaxStack.pop();
1420                                                 syntaxStack.push(x);
1421                                                 if (op2.asOperator() == Token::Minus)
1422                                                     d->codes.append(Opcode(Opcode::Neg));
1423                                             }
1424                             }
1425 
1426                     // auxiliary rule for unary operator:  (op) X -> X
1427                     // conditions: op is unary, op is first in syntax stack, token is not '('
1428                     // action: push (op) to result
1429                     if (!ruleFound)
1430                         if (token.asOperator() != Token::LeftPar)
1431                             if (syntaxStack.itemCount() == 2) {
1432                                 Token x = syntaxStack.top();
1433                                 Token op = syntaxStack.top(1);
1434                                 if (!x.isOperator())
1435                                     if (op.isOperator())
1436                                         if ((op.asOperator() == Token::Plus) ||
1437                                                 (op.asOperator() == Token::Minus)) {
1438                                             ruleFound = true;
1439                                             syntaxStack.pop();
1440                                             syntaxStack.pop();
1441                                             syntaxStack.push(x);
1442                                             if (op.asOperator() == Token::Minus)
1443                                                 d->codes.append(Opcode(Opcode::Neg));
1444                                         }
1445                             }
1446 
1447                     if (!ruleFound) break;
1448                 }
1449 
1450                 // can't apply rules anymore, push the token
1451                 if (token.asOperator() != Token::Percent)
1452                     syntaxStack.push(token);
1453             }
1454     }
1455 
1456     // syntaxStack must left only one operand and end-of-formula (i.e. InvalidOp)
1457     d->valid = false;
1458     if (syntaxStack.itemCount() == 2)
1459         if (syntaxStack.top().isOperator())
1460             if (syntaxStack.top().asOperator() == Token::InvalidOp)
1461                 if (!syntaxStack.top(1).isOperator())
1462                     d->valid = true;
1463 
1464     // bad parsing ? clean-up everything
1465     if (!d->valid) {
1466         d->constants.clear();
1467         d->codes.clear();
1468     }
1469 }
1470 
isNamedArea(const QString & expr) const1471 bool Formula::isNamedArea(const QString& expr) const
1472 {
1473     return d->sheet ? d->sheet->map()->namedAreaManager()->contains(expr) : false;
1474 }
1475 
1476 
1477 // Evaluates the formula, returns the result.
1478 
1479 // evaluate the cellIndirections
eval(CellIndirection cellIndirections) const1480 Value Formula::eval(CellIndirection cellIndirections) const
1481 {
1482     QHash<Cell, Value> values;
1483     return evalRecursive(cellIndirections, values);
1484 }
1485 
1486 // We need to unroll arrays. Do use the same logic to unroll like OpenOffice.org and Excel are using.
valueOrElement(FuncExtra & fe,const stackEntry & entry) const1487 Value Formula::Private::valueOrElement(FuncExtra &fe, const stackEntry& entry) const
1488 {
1489     const Value& v = entry.val;
1490     const Region& region = entry.reg;
1491     if(v.isArray()) {
1492         if(v.count() == 1) // if there is only one item, use that one
1493             return v.element(0);
1494 
1495         if(region.isValid() && entry.regIsNamedOrLabeled) {
1496             const QPoint position = region.firstRange().topLeft();
1497             const int idx = fe.myrow - position.y(); // do we need to do the same for columns?
1498             if(idx >= 0 && idx < int(v.count()))
1499                 return v.element(idx); // within the range returns the selected element
1500         }
1501     }
1502     return v;
1503 }
1504 
1505 // On OO.org Calc and MS Excel operations done with +, -, * and / do fail if one of the values is
1506 // non-numeric. This differs from formulas like SUM which just ignores non numeric values.
numericOrError(const ValueConverter * converter,const Value & v)1507 Value numericOrError(const ValueConverter* converter, const Value &v)
1508 {
1509     switch (v.type()) {
1510     case Value::Empty:
1511     case Value::Boolean:
1512     case Value::Integer:
1513     case Value::Float:
1514     case Value::Complex:
1515     case Value::Error:
1516         return v;
1517     case Value::String: {
1518         if (v.asString().isEmpty())
1519             return v;
1520         bool ok;
1521         converter->asNumeric(v, &ok);
1522         if (ok)
1523             return v;
1524     } break;
1525     case Value::Array:
1526     case Value::CellRange:
1527         return v;
1528     }
1529     return Value::errorVALUE();
1530 }
1531 
evalRecursive(CellIndirection cellIndirections,QHash<Cell,Value> & values) const1532 Value Formula::evalRecursive(CellIndirection cellIndirections, QHash<Cell, Value>& values) const
1533 {
1534     QStack<stackEntry> stack;
1535     stackEntry entry;
1536     int index;
1537     Value val1, val2;
1538     QString c;
1539     QVector<Value> args;
1540 
1541     const Map* map = d->sheet ? d->sheet->map() : new Map(0 /*document*/);
1542     const ValueConverter* converter = map->converter();
1543     ValueCalc* calc = map->calc();
1544 
1545     QSharedPointer<Function> function;
1546     FuncExtra fe;
1547     fe.mycol = fe.myrow = 0;
1548     if (!d->cell.isNull()) {
1549         fe.mycol = d->cell.column();
1550         fe.myrow = d->cell.row();
1551     }
1552 
1553     if (d->dirty) {
1554         Tokens tokens = scan(d->expression);
1555         d->valid = tokens.valid();
1556         if (tokens.valid())
1557             compile(tokens);
1558     }
1559 
1560     if (!d->valid)
1561         return Value::errorPARSE();
1562 
1563     for (int pc = 0; pc < d->codes.count(); pc++) {
1564         Value ret;   // for the function caller
1565         Opcode& opcode = d->codes[pc];
1566         index = opcode.index;
1567         switch (opcode.type) {
1568             // no operation
1569         case Opcode::Nop:
1570             break;
1571 
1572             // load a constant, push to stack
1573         case Opcode::Load:
1574             entry.reset();
1575             entry.val = d->constants[index];
1576             stack.push(entry);
1577             break;
1578 
1579             // unary operation
1580         case Opcode::Neg:
1581             entry.reset();
1582             entry.val = d->valueOrElement(fe, stack.pop());
1583             if (!entry.val.isError()) // do nothing if we got an error
1584                 entry.val = calc->mul(entry.val, -1);
1585             stack.push(entry);
1586             break;
1587 
1588             // binary operation: take two values from stack, do the operation,
1589             // push the result to stack
1590         case Opcode::Add:
1591             entry.reset();
1592             val2 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1593             val1 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1594             val2 = calc->add(val1, val2);
1595             entry.reset();
1596             entry.val = val2;
1597             stack.push(entry);
1598             break;
1599 
1600         case Opcode::Sub:
1601             val2 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1602             val1 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1603             val2 = calc->sub(val1, val2);
1604             entry.reset();
1605             entry.val = val2;
1606             stack.push(entry);
1607             break;
1608 
1609         case Opcode::Mul:
1610             val2 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1611             val1 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1612             val2 = calc->mul(val1, val2);
1613             entry.reset();
1614             entry.val = val2;
1615             stack.push(entry);
1616             break;
1617 
1618         case Opcode::Div:
1619             val2 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1620             val1 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1621             val2 = calc->div(val1, val2);
1622             entry.reset();
1623             entry.val = val2;
1624             stack.push(entry);
1625             break;
1626 
1627         case Opcode::Pow:
1628             val2 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1629             val1 = numericOrError(converter, d->valueOrElement(fe, stack.pop()));
1630             val2 = calc->pow(val1, val2);
1631             entry.reset();
1632             entry.val = val2;
1633             stack.push(entry);
1634             break;
1635 
1636             // string concatenation
1637         case Opcode::Concat:
1638             val1 = converter->asString(stack.pop().val);
1639             val2 = converter->asString(stack.pop().val);
1640             if (val1.isError() || val2.isError())
1641                 val1 = Value::errorVALUE();
1642             else
1643                 val1 = Value(val2.asString().append(val1.asString()));
1644             entry.reset();
1645             entry.val = val1;
1646             stack.push(entry);
1647             break;
1648 
1649            // array intersection
1650         case Opcode::Intersect: {
1651             val1 = stack.pop().val;
1652             val2 = stack.pop().val;
1653             Region r1(d->constants[index].asString(), map, d->sheet);
1654             Region r2(d->constants[index+1].asString(), map, d->sheet);
1655             if(!r1.isValid() || !r2.isValid()) {
1656                 val1 = Value::errorNULL();
1657             } else {
1658                 Region r = r1.intersected(r2);
1659                 QRect rect = r.boundingRect();
1660                 Cell cell;
1661                 if(rect.top() == rect.bottom())
1662                     cell = Cell(r.firstSheet(), fe.mycol, rect.top());
1663                 else if(rect.left() == rect.right())
1664                     cell = Cell(r.firstSheet(), rect.left(), fe.mycol);
1665                 if(cell.isNull())
1666                     val1 = Value::errorNULL();
1667                 else if(cell.isEmpty())
1668                     val1 = Value::errorNULL();
1669                 else
1670                     val1 = cell.value();
1671             }
1672             entry.reset();
1673             entry.val = val1;
1674             stack.push(entry);
1675         } break;
1676 
1677           // region union
1678         case Opcode::Union: {
1679             Region r = stack.pop().reg;
1680             Region r2 = stack.pop().reg;
1681             entry.reset();
1682             if (!r.isValid() || !r2.isValid()) {
1683                 val1 = Value::errorVALUE();
1684                 r = Region();
1685             } else {
1686                 r.add(r2);
1687                 r.firstSheet()->cellStorage()->valueRegion(r);
1688                 // store the reference, so we can use it within functions (not entirely correct)
1689                 entry.col1 = r.boundingRect().left();
1690                 entry.row1 = r.boundingRect().top();
1691                 entry.col2 = r.boundingRect().right();
1692                 entry.row2 = r.boundingRect().bottom();
1693             }
1694             entry.val = val1;
1695             entry.reg = r;
1696             stack.push(entry);
1697         } break;
1698 
1699             // logical not
1700         case Opcode::Not:
1701             val1 = converter->asBoolean(d->valueOrElement(fe, stack.pop()));
1702             if (val1.isError())
1703                 val1 = Value::errorVALUE();
1704             else
1705                 val1 = Value(!val1.asBoolean());
1706             entry.reset();
1707             entry.val = val1;
1708             stack.push(entry);
1709             break;
1710 
1711             // comparison
1712         case Opcode::Equal:
1713             val1 = d->valueOrElement(fe, stack.pop());
1714             val2 = d->valueOrElement(fe, stack.pop());
1715             if (val1.isError())
1716                 ;
1717             else if (val2.isError())
1718                 val1 = val2;
1719             else if (val2.compare(val1, calc->settings()->caseSensitiveComparisons()) == 0)
1720                 val1 = Value(true);
1721             else
1722                 val1 = Value(false);
1723             entry.reset();
1724             entry.val = val1;
1725             stack.push(entry);
1726             break;
1727 
1728             // less than
1729         case Opcode::Less:
1730             val1 = d->valueOrElement(fe, stack.pop());
1731             val2 = d->valueOrElement(fe, stack.pop());
1732             if (val1.isError())
1733                 ;
1734             else if (val2.isError())
1735                 val1 = val2;
1736             else if (val2.compare(val1, calc->settings()->caseSensitiveComparisons()) < 0)
1737                 val1 = Value(true);
1738             else
1739                 val1 = Value(false);
1740             entry.reset();
1741             entry.val = val1;
1742             stack.push(entry);
1743             break;
1744 
1745             // greater than
1746         case Opcode::Greater: {
1747             val1 = d->valueOrElement(fe, stack.pop());
1748             val2 = d->valueOrElement(fe, stack.pop());
1749             if (val1.isError())
1750                 ;
1751             else if (val2.isError())
1752                 val1 = val2;
1753             else if (val2.compare(val1, calc->settings()->caseSensitiveComparisons()) > 0)
1754                 val1 = Value(true);
1755             else
1756                 val1 = Value(false);
1757             entry.reset();
1758             entry.val = val1;
1759             stack.push(entry);
1760         }
1761         break;
1762 
1763         // cell in a sheet
1764         case Opcode::Cell: {
1765             c = d->constants[index].asString();
1766             val1 = Value::empty();
1767             entry.reset();
1768 
1769             const Region region(c, map, d->sheet);
1770             if (!region.isValid()) {
1771                 val1 = Value::errorREF();
1772             } else if (region.isSingular()) {
1773                 const QPoint position = region.firstRange().topLeft();
1774                 if (cellIndirections.isEmpty())
1775                     val1 = Cell(region.firstSheet(), position).value();
1776                 else {
1777                     Cell cell(region.firstSheet(), position);
1778                     cell = cellIndirections.value(cell, cell);
1779                     if (values.contains(cell))
1780                         val1 = values.value(cell);
1781                     else {
1782                         values[cell] = Value::errorCIRCLE();
1783                         if (cell.isFormula())
1784                             val1 = cell.formula().evalRecursive(cellIndirections, values);
1785                         else
1786                             val1 = cell.value();
1787                         values[cell] = val1;
1788                     }
1789                 }
1790                 // store the reference, so we can use it within functions
1791                 entry.col1 = entry.col2 = position.x();
1792                 entry.row1 = entry.row2 = position.y();
1793                 entry.reg = region;
1794                 entry.regIsNamedOrLabeled = map->namedAreaManager()->contains(c);
1795             } else {
1796                 warnSheets << "Unhandled non singular region in Opcode::Cell with rects=" << region.rects();
1797             }
1798             entry.val = val1;
1799             stack.push(entry);
1800         }
1801         break;
1802 
1803         // selected range in a sheet
1804         case Opcode::Range: {
1805             c = d->constants[index].asString();
1806             val1 = Value::empty();
1807             entry.reset();
1808 
1809             const Region region(c, map, d->sheet);
1810             if (region.isValid()) {
1811                 val1 = region.firstSheet()->cellStorage()->valueRegion(region);
1812                 // store the reference, so we can use it within functions
1813                 entry.col1 = region.firstRange().left();
1814                 entry.row1 = region.firstRange().top();
1815                 entry.col2 = region.firstRange().right();
1816                 entry.row2 = region.firstRange().bottom();
1817                 entry.reg = region;
1818                 entry.regIsNamedOrLabeled = map->namedAreaManager()->contains(c);
1819             }
1820 
1821             entry.val = val1; // any array is valid here
1822             stack.push(entry);
1823         }
1824         break;
1825 
1826         // reference
1827         case Opcode::Ref:
1828             val1 = d->constants[index];
1829             entry.reset();
1830             entry.val = val1;
1831             stack.push(entry);
1832             break;
1833 
1834             // calling function
1835         case Opcode::Function:
1836             // sanity check, this should not happen unless opcode is wrong
1837             // (i.e. there's a bug in the compile() function)
1838             if (stack.count() < index)
1839                 return Value::errorVALUE(); // not enough arguments
1840 
1841             args.clear();
1842             fe.ranges.clear();
1843             fe.ranges.resize(index);
1844             fe.regions.clear();
1845             fe.regions.resize(index);
1846             fe.sheet = d->sheet;
1847             for (; index; index--) {
1848                 stackEntry e = stack.pop();
1849                 args.insert(args.begin(), e.val);
1850                 // fill the FunctionExtra object
1851                 fe.ranges[index - 1].col1 = e.col1;
1852                 fe.ranges[index - 1].row1 = e.row1;
1853                 fe.ranges[index - 1].col2 = e.col2;
1854                 fe.ranges[index - 1].row2 = e.row2;
1855                 fe.regions[index - 1] = e.reg;
1856             }
1857 
1858             // function name as string value
1859             val1 = converter->asString(stack.pop().val);
1860             if (val1.isError())
1861                 return val1;
1862             function = FunctionRepository::self()->function(val1.asString());
1863             if (!function)
1864                 return Value::errorNAME(); // no such function
1865 
1866             ret = function->exec(args, calc, &fe);
1867             entry.reset();
1868             entry.val = ret;
1869             stack.push(entry);
1870 
1871             break;
1872 
1873 #ifdef CALLIGRA_SHEETS_INLINE_ARRAYS
1874             // creating an array
1875         case Opcode::Array: {
1876             const int cols = d->constants[index].asInteger();
1877             const int rows = d->constants[index+1].asInteger();
1878             // check if enough array elements are available
1879             if (stack.count() < cols * rows)
1880                 return Value::errorVALUE();
1881             Value array(Value::Array);
1882             for (int row = rows - 1; row >= 0; --row) {
1883                 for (int col = cols - 1; col >= 0; --col) {
1884                     array.setElement(col, row, stack.pop().val);
1885                 }
1886             }
1887             entry.reset();
1888             entry.val = array;
1889             stack.push(entry);
1890             break;
1891         }
1892 #endif
1893         default:
1894             break;
1895         }
1896     }
1897 
1898     if (!d->sheet)
1899         delete map;
1900 
1901     // more than one value in stack ? unsuccessful execution...
1902     if (stack.count() != 1)
1903         return Value::errorVALUE();
1904 
1905     return stack.pop().val;
1906 }
1907 
operator =(const Formula & other)1908 Formula& Formula::operator=(const Formula & other)
1909 {
1910     d = other.d;
1911     return *this;
1912 }
1913 
operator ==(const Formula & other) const1914 bool Formula::operator==(const Formula& other) const
1915 {
1916     return (d->expression == other.d->expression);
1917 }
1918 
1919 // Debugging aid
1920 
dump() const1921 QString Formula::dump() const
1922 {
1923     QString result;
1924 
1925     if (d->dirty) {
1926         Tokens tokens = scan(d->expression);
1927         compile(tokens);
1928     }
1929 
1930     result = QString("Expression: [%1]\n").arg(d->expression);
1931 #if 0
1932     Value value = eval();
1933     result.append(QString("Result: %1\n").arg(
1934                       converter->asString(value).asString()));
1935 #endif
1936 
1937     result.append("  Constants:\n");
1938     for (int c = 0; c < d->constants.count(); c++) {
1939         QString vtext;
1940         Value val = d->constants[c];
1941         if (val.isString()) vtext = QString("[%1]").arg(val.asString());
1942         else if (val.isNumber()) vtext = QString("%1").arg((double) numToDouble(val.asFloat()));
1943         else if (val.isBoolean()) vtext = QString("%1").arg(val.asBoolean() ? "True" : "False");
1944         else if (val.isError()) vtext = "error";
1945         else vtext = "???";
1946         result += QString("    #%1 = %2\n").arg(c).arg(vtext);
1947     }
1948 
1949     result.append("\n");
1950     result.append("  Code:\n");
1951     for (int i = 0; i < d->codes.count(); i++) {
1952         QString ctext;
1953         switch (d->codes[i].type) {
1954         case Opcode::Load:      ctext = QString("Load #%1").arg(d->codes[i].index); break;
1955         case Opcode::Ref:       ctext = QString("Ref #%1").arg(d->codes[i].index); break;
1956         case Opcode::Function:  ctext = QString("Function (%1)").arg(d->codes[i].index); break;
1957         case Opcode::Add:       ctext = "Add"; break;
1958         case Opcode::Sub:       ctext = "Sub"; break;
1959         case Opcode::Mul:       ctext = "Mul"; break;
1960         case Opcode::Div:       ctext = "Div"; break;
1961         case Opcode::Neg:       ctext = "Neg"; break;
1962         case Opcode::Concat:    ctext = "Concat"; break;
1963         case Opcode::Pow:       ctext = "Pow"; break;
1964         case Opcode::Intersect: ctext = "Intersect"; break;
1965         case Opcode::Equal:     ctext = "Equal"; break;
1966         case Opcode::Not:       ctext = "Not"; break;
1967         case Opcode::Less:      ctext = "Less"; break;
1968         case Opcode::Greater:   ctext = "Greater"; break;
1969         case Opcode::Array:     ctext = QString("Array (%1x%2)").arg(d->constants[d->codes[i].index].asInteger()).arg(d->constants[d->codes[i].index+1].asInteger()); break;
1970         case Opcode::Nop:       ctext = "Nop"; break;
1971         case Opcode::Cell:      ctext = "Cell"; break;
1972         case Opcode::Range:     ctext = "Range"; break;
1973         default: ctext = "Unknown"; break;
1974         }
1975         result.append("   ").append(ctext).append("\n");
1976     }
1977 
1978     return result;
1979 }
1980 
operator <<(QTextStream & ts,Formula formula)1981 QTextStream& operator<<(QTextStream& ts, Formula formula)
1982 {
1983     ts << formula.dump();
1984     return ts;
1985 }
1986