1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  */
10 
11 #include <document.hxx>
12 #include <reffact.hxx>
13 #include <TableFillingAndNavigationTools.hxx>
14 #include <RegressionDialog.hxx>
15 #include <scresid.hxx>
16 #include <strings.hrc>
17 
18 /*
19    Some regression basics
20    ----------------------
21 
22    1. Linear regression fits using data, a linear function between the dependent variable and the independent variable(s).
23       The basic form of this function is :-
24 
25       y = b + m_1*x_1 + m_2*x_2 + ... + m_k*x_k
26 
27       where y is the dependent variable
28             x_1, x_2, ..., x_k are the k independent variables
29             b is the intercept
30             m_1, m_2, ..., m_k are the slopes corresponding to the variables x_1, x_2, ..., x_k respectively.
31 
32 
33      This equation for n observations can be compactly written using matrices as :-
34 
35      y = X*A
36 
37      where y is the n dimensional column vector containing dependent variable observations.
38      where X is matrix of shape n*(k+1) where a row looks like [ 1  x_1  x_2 ...  x_k ]
39            A is the k+1 dimensional column vector              [ b  m_1  m_2 ...  m_k ]
40 
41      Calc formula LINEST(Y_array ; X_array) can be used to compute all entries in "A" along with many other statistics.
42 
43 
44   2. Logarithmic regression is basically used to find a linear function between the dependent variable and
45      the natural logarithm of the independent variable(s).
46      So the basic form of this functions is :-
47 
48      y = b + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k)
49 
50      This can be again written in a compact matrix form for n observations.
51 
52      y = ln(X)*A
53 
54      where y is the n dimensional column vector containing dependent variable observations.
55      where X is matrix of shape n*(k+1) where a row looks like  [ e  x_1  x_2 ...  x_k ]
56            A is the k+1 dimensional column vector               [ b  m_1  m_2 ...  m_k ]
57 
58      To estimate A, we use the formula =LINEST(Y_array ; LN(X_array))
59 
60 
61   3. Power regression is used to fit the following model :-
62 
63      y = b * (x_1 ^ m_1) * (x_2 ^ m_2) * ... * (x_k ^ m_k)
64 
65      To reduce this to a linear function(so that we can still use LINEST()), we take natural logarithm on both sides
66 
67      ln(y) = c + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k) ; where c = ln(b)
68 
69 
70      This again can be written compactly in matrix form as :-
71 
72      ln(y) = ln(X)*A
73 
74      where y is the n dimensional column vector containing dependent variable observations.
75      where X is matrix of shape n*(k+1) where a row looks like  [ e  x_1  x_2 ...  x_k ]
76            A is the k+1 dimensional column vector               [ c  m_1  m_2 ...  m_k ]
77 
78      To estimate A, we use the formula =LINEST(LN(Y_array) ; LN(X_array))
79 
80      Once we get A, to get back y from x's we use the formula :-
81 
82      y = exp( ln(X)*A )
83 
84 
85 
86      Some references for computing confidence interval for the regression coefficients :-
87 
88      [1] https://en.wikipedia.org/wiki/Student%27s_t-test#Slope_of_a_regression_line
89      [2] https://en.wikipedia.org/wiki/Simple_linear_regression#Normality_assumption
90      [3] https://onlinecourses.science.psu.edu/stat414/node/280
91 
92  */
93 
94 namespace
95 {
96     enum class ScRegType {
97         LINEAR,
98         LOGARITHMIC,
99         POWER
100     };
101 
102     const char* constRegressionModel[] =
103     {
104         STR_LABEL_LINEAR,
105         STR_LABEL_LOGARITHMIC,
106         STR_LABEL_POWER
107     };
108 
109     OUString constTemplateLINEST[] =
110     {
111         "=LINEST(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE% ; %CALC_INTERCEPT% ; TRUE)",
112         "=LINEST(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%) ; %CALC_INTERCEPT% ; TRUE)",
113         "=LINEST(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%) ; %CALC_INTERCEPT% ; TRUE)"
114     };
115 
116     OUString constRegressionFormula[] =
117     {
118         "=MMULT(%XDATAMATRIX_RANGE% ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
119         "=MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
120         "=EXP(MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%)"
121     };
122 
123 } // end anonymous namespace
124 
lcl_GetNumRowsColsInRange(const ScRange & rRange,bool bRows)125 static size_t lcl_GetNumRowsColsInRange(const ScRange& rRange, bool bRows)
126 {
127     if (bRows)
128         return rRange.aEnd.Row() - rRange.aStart.Row() + 1;
129 
130     return rRange.aEnd.Col() - rRange.aStart.Col() + 1;
131 }
132 
ScRegressionDialog(SfxBindings * pSfxBindings,SfxChildWindow * pChildWindow,weld::Window * pParent,ScViewData * pViewData)133 ScRegressionDialog::ScRegressionDialog(
134                     SfxBindings* pSfxBindings, SfxChildWindow* pChildWindow,
135                     weld::Window* pParent, ScViewData* pViewData )
136     : ScStatisticsTwoVariableDialog(
137             pSfxBindings, pChildWindow, pParent, pViewData,
138             "modules/scalc/ui/regressiondialog.ui", "RegressionDialog")
139     , mbUnivariate(true)
140     , mnNumIndependentVars(1)
141     , mnNumObservations(0)
142     , mbUse3DAddresses(false)
143     , mbCalcIntercept(true)
144     , mxWithLabelsCheckBox(m_xBuilder->weld_check_button("withlabels-check"))
145     , mxLinearRadioButton(m_xBuilder->weld_radio_button("linear-radio"))
146     , mxLogarithmicRadioButton(m_xBuilder->weld_radio_button("logarithmic-radio"))
147     , mxPowerRadioButton(m_xBuilder->weld_radio_button("power-radio"))
148     , mxErrorMessage(m_xBuilder->weld_label("error-message"))
149     , mxConfidenceLevelField(m_xBuilder->weld_spin_button("confidencelevel-spin"))
150     , mxCalcResidualsCheckBox(m_xBuilder->weld_check_button("calcresiduals-check"))
151     , mxNoInterceptCheckBox(m_xBuilder->weld_check_button("nointercept-check"))
152 {
153     mxWithLabelsCheckBox->connect_toggled(LINK(this, ScRegressionDialog, CheckBoxHdl));
154     mxConfidenceLevelField->connect_value_changed(LINK(this, ScRegressionDialog, NumericFieldHdl));
155 }
156 
~ScRegressionDialog()157 ScRegressionDialog::~ScRegressionDialog()
158 {
159 }
160 
Close()161 void ScRegressionDialog::Close()
162 {
163     DoClose(ScRegressionDialogWrapper::GetChildWindowId());
164 }
165 
GetUndoNameId()166 const char* ScRegressionDialog::GetUndoNameId()
167 {
168     return STR_REGRESSION_UNDO_NAME;
169 }
170 
ApplyOutput(ScDocShell * pDocShell)171 ScRange ScRegressionDialog::ApplyOutput(ScDocShell* pDocShell)
172 {
173     AddressWalkerWriter aOutput(mOutputAddress, pDocShell, mDocument,
174             formula::FormulaGrammar::mergeToGrammar( formula::FormulaGrammar::GRAM_ENGLISH, mAddressDetails.eConv));
175     FormulaTemplate aTemplate(mDocument);
176     aTemplate.autoReplaceUses3D(mbUse3DAddresses);
177     mbCalcIntercept = !mxNoInterceptCheckBox->get_active();
178 
179     // max col of our output should account for
180     // 1. constant term column,
181     // 2. mnNumIndependentVars columns
182     // 3. Actual Y column
183     // 4. Predicted Y column
184     // 5. Residual Column
185     SCCOL nOutputMaxCol = mOutputAddress.Col() + mnNumIndependentVars + 3;
186 
187     ScRange aXDataRange(GetDataRange(mVariable1Range));
188     ScRange aYDataRange(GetDataRange(mVariable2Range));
189 
190     aTemplate.autoReplaceRange("%VARIABLE1_RANGE%", aXDataRange);
191     aTemplate.autoReplaceRange("%VARIABLE2_RANGE%", aYDataRange);
192     size_t nRegressionIndex = GetRegressionTypeIndex();
193     ScRegType eRegType = static_cast<ScRegType>(nRegressionIndex);
194     bool bTakeLogX = eRegType == ScRegType::LOGARITHMIC || eRegType == ScRegType::POWER;
195 
196     WriteRawRegressionResults(aOutput, aTemplate, nRegressionIndex);
197     WriteRegressionStatistics(aOutput, aTemplate);
198     WriteRegressionANOVAResults(aOutput, aTemplate);
199     WriteRegressionEstimatesWithCI(aOutput, aTemplate, bTakeLogX);
200     if (mxCalcResidualsCheckBox->get_active())
201         WritePredictionsWithResiduals(aOutput, aTemplate, nRegressionIndex);
202 
203     ScAddress aMaxAddress(aOutput.mMaximumAddress);
204     aMaxAddress.SetCol(std::max(aMaxAddress.Col(), nOutputMaxCol));
205     return ScRange(aOutput.mMinimumAddress, aMaxAddress);
206 }
207 
InputRangesValid()208 bool ScRegressionDialog::InputRangesValid()
209 {
210     if (!mVariable1Range.IsValid())
211     {
212         mxErrorMessage->set_label(ScResId(STR_MESSAGE_XINVALID_RANGE));
213         return false;
214     }
215 
216     if (!mVariable2Range.IsValid())
217     {
218         mxErrorMessage->set_label(ScResId(STR_MESSAGE_YINVALID_RANGE));
219         return false;
220     }
221 
222     if (!mOutputAddress.IsValid())
223     {
224         mxErrorMessage->set_label(ScResId(STR_MESSAGE_INVALID_OUTPUT_ADDR));
225         return false;
226     }
227 
228     {
229         double fConfidenceLevel = mxConfidenceLevelField->get_value();
230         if ( fConfidenceLevel <= 0.0 || fConfidenceLevel >= 100.0 )
231         {
232             mxErrorMessage->set_label(ScResId(STR_MESSAGE_INVALID_CONFIDENCE_LEVEL));
233             return false;
234         }
235     }
236 
237     mVariable1Range.PutInOrder();
238     mVariable2Range.PutInOrder();
239 
240     bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
241 
242     bool bYHasSingleDim = (
243         (bGroupedByColumn &&
244          mVariable2Range.aStart.Col() == mVariable2Range.aEnd.Col()) ||
245         (!bGroupedByColumn &&
246          mVariable2Range.aStart.Row() == mVariable2Range.aEnd.Row()));
247 
248     if (!bYHasSingleDim)
249     {
250         if (bGroupedByColumn)
251             mxErrorMessage->set_label(ScResId(STR_MESSAGE_YVARIABLE_MULTI_COLUMN));
252         else
253             mxErrorMessage->set_label(ScResId(STR_MESSAGE_YVARIABLE_MULTI_ROW));
254         return false;
255     }
256 
257     bool bWithLabels = mxWithLabelsCheckBox->get_active();
258 
259     size_t nYObs = lcl_GetNumRowsColsInRange(mVariable2Range, bGroupedByColumn);
260     size_t nNumXVars = lcl_GetNumRowsColsInRange(mVariable1Range, !bGroupedByColumn);
261     mbUnivariate = nNumXVars == 1;
262     // Observation count mismatch check
263     if (lcl_GetNumRowsColsInRange(mVariable1Range, bGroupedByColumn) != nYObs)
264     {
265         if (mbUnivariate)
266             mxErrorMessage->set_label(ScResId(STR_MESSAGE_UNIVARIATE_NUMOBS_MISMATCH));
267         else
268             mxErrorMessage->set_label(ScResId(STR_MESSAGE_MULTIVARIATE_NUMOBS_MISMATCH));
269         return false;
270     }
271 
272     mnNumIndependentVars = nNumXVars;
273     mnNumObservations = bWithLabels ? nYObs - 1 : nYObs;
274 
275     mbUse3DAddresses = mVariable1Range.aStart.Tab() != mOutputAddress.Tab() ||
276         mVariable2Range.aStart.Tab() != mOutputAddress.Tab();
277 
278     mxErrorMessage->set_label("");
279 
280     return true;
281 }
282 
GetRegressionTypeIndex() const283 size_t ScRegressionDialog::GetRegressionTypeIndex() const
284 {
285     if (mxLinearRadioButton->get_active())
286         return 0;
287     if (mxLogarithmicRadioButton->get_active())
288         return 1;
289     return 2;
290 }
291 
GetDataRange(const ScRange & rRange)292 ScRange ScRegressionDialog::GetDataRange(const ScRange& rRange)
293 {
294     if (!mxWithLabelsCheckBox->get_active())
295         return rRange;
296 
297     ScRange aDataRange(rRange);
298     if (mGroupedBy == BY_COLUMN)
299         aDataRange.aStart.IncRow(1);
300     else
301         aDataRange.aStart.IncCol(1);
302 
303     return aDataRange;
304 }
305 
GetVariableNameFormula(bool bXVar,size_t nIndex,bool bWithLog)306 OUString ScRegressionDialog::GetVariableNameFormula(bool bXVar, size_t nIndex, bool bWithLog)
307 {
308     if (bXVar && nIndex == 0)
309         return "=\"" + ScResId(STR_LABEL_INTERCEPT) + "\"";
310 
311     if (mxWithLabelsCheckBox->get_active())
312     {
313         ScAddress aAddr(bXVar ? mVariable1Range.aStart : mVariable2Range.aStart);
314         if (mGroupedBy == BY_COLUMN)
315             aAddr.IncCol(nIndex - 1);
316         else
317             aAddr.IncRow(nIndex - 1);
318 
319         ScRefFlags eAddrFlag = mbUse3DAddresses ? ScRefFlags::ADDR_ABS_3D : ScRefFlags::ADDR_ABS;
320         return bWithLog ? OUString("=CONCAT(\"LN(\";" +
321             aAddr.Format(eAddrFlag, mDocument, mDocument->GetAddressConvention()) + ";\")\")") :
322             OUString("=" + aAddr.Format(eAddrFlag, mDocument, mDocument->GetAddressConvention()));
323     }
324 
325     OUString aDefaultVarName;
326 
327     if (bXVar)
328         aDefaultVarName = "X" + OUString::number(nIndex);
329     else
330         aDefaultVarName = "Y";
331 
332     return bWithLog ? OUString("=\"LN(" + aDefaultVarName + ")\"") :
333         OUString("=\"" + aDefaultVarName + "\"");
334 }
335 
GetXVariableNameFormula(size_t nIndex,bool bWithLog)336 OUString ScRegressionDialog::GetXVariableNameFormula(size_t nIndex, bool bWithLog)
337 {
338     assert(nIndex <= mnNumIndependentVars);
339     return GetVariableNameFormula(true, nIndex, bWithLog);
340 }
341 
GetYVariableNameFormula(bool bWithLog)342 OUString ScRegressionDialog::GetYVariableNameFormula(bool bWithLog)
343 {
344     return GetVariableNameFormula(false, 1, bWithLog);
345 }
346 
WriteRawRegressionResults(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,size_t nRegressionIndex)347 void ScRegressionDialog::WriteRawRegressionResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
348                                                    size_t nRegressionIndex)
349 {
350     rOutput.writeBoldString(ScResId(STR_REGRESSION));
351     rOutput.newLine();
352     // REGRESSION MODEL
353     rOutput.writeString(ScResId(STR_LABEL_REGRESSION_MODEL));
354     rOutput.nextColumn();
355     rOutput.writeString(ScResId(constRegressionModel[nRegressionIndex]));
356     rOutput.newLine();
357     rOutput.newLine();
358 
359     rOutput.writeString(ScResId(STR_LINEST_RAW_OUTPUT_TITLE));
360     rOutput.newLine();
361     rOutput.push();
362 
363     rTemplate.setTemplate(constTemplateLINEST[nRegressionIndex].
364                           replaceFirst("%CALC_INTERCEPT%",
365                                        mbCalcIntercept ? OUStringLiteral("TRUE") : OUStringLiteral("FALSE")));
366     rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1 + mnNumIndependentVars, 5);
367     // Add LINEST result components to template
368     // 1. Add ranges for coefficients and standard errors for indep. vars and the intercept.
369     // Note that these two are in the reverse order(m_n, m_n-1, ..., m_1, b) w.r.t what we expect.
370     rTemplate.autoReplaceRange("%COEFFICIENTS_REV_RANGE%", ScRange(rOutput.current(), rOutput.current(mnNumIndependentVars)));
371     rTemplate.autoReplaceRange("%SERRORSX_REV_RANGE%", ScRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars, 1)));
372 
373     // 2. Add R-squared and standard error for y estimate.
374     rTemplate.autoReplaceAddress("%RSQUARED_ADDR%", rOutput.current(0, 2));
375     rTemplate.autoReplaceAddress("%SERRORY_ADDR%", rOutput.current(1, 2));
376 
377     // 3. Add F statistic and degrees of freedom
378     rTemplate.autoReplaceAddress("%FSTATISTIC_ADDR%", rOutput.current(0, 3));
379     rTemplate.autoReplaceAddress("%DoFRESID_ADDR%", rOutput.current(1, 3));
380 
381     // 4. Add regression sum of squares and residual sum of squares
382     rTemplate.autoReplaceAddress("%SSREG_ADDR%", rOutput.current(0, 4));
383     rTemplate.autoReplaceAddress("%SSRESID_ADDR%", rOutput.current(1, 4));
384 
385     rOutput.push(0, 4);
386     rOutput.newLine();
387 }
388 
WriteRegressionStatistics(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate)389 void ScRegressionDialog::WriteRegressionStatistics(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate)
390 {
391     rOutput.newLine();
392     rOutput.writeString(ScResId(STR_LABEL_REGRESSION_STATISTICS));
393     rOutput.newLine();
394 
395     const char* aMeasureNames[] =
396     {
397         STR_LABEL_RSQUARED,
398         STRID_CALC_STD_ERROR,
399         STR_LABEL_XVARIABLES_COUNT,
400         STR_OBSERVATIONS_LABEL,
401         STR_LABEL_ADJUSTED_RSQUARED
402     };
403 
404     OUString aMeasureFormulas[] =
405     {
406         "=%RSQUARED_ADDR%",
407         "=%SERRORY_ADDR%",
408         "=" + OUString::number(mnNumIndependentVars),
409         "=" + OUString::number(mnNumObservations),
410         "=1 - (1 - %RSQUARED_ADDR%)*(%NUMOBS_ADDR% - 1)/(%NUMOBS_ADDR% - %NUMXVARS_ADDR%" +
411             (mbCalcIntercept ? OUStringLiteral(" - 1)") : OUStringLiteral(")"))
412     };
413 
414     rTemplate.autoReplaceAddress("%NUMXVARS_ADDR%", rOutput.current(1, 2));
415     rTemplate.autoReplaceAddress("%NUMOBS_ADDR%", rOutput.current(1, 3));
416 
417     for (size_t nIdx = 0; nIdx < SAL_N_ELEMENTS(aMeasureNames); ++nIdx)
418     {
419         rOutput.writeString(ScResId(aMeasureNames[nIdx]));
420         rOutput.nextColumn();
421         rTemplate.setTemplate(aMeasureFormulas[nIdx]);
422         rOutput.writeFormula(rTemplate.getTemplate());
423         rOutput.newLine();
424     }
425 }
426 
WriteRegressionANOVAResults(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate)427 void ScRegressionDialog::WriteRegressionANOVAResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate)
428 {
429     rOutput.newLine();
430     rOutput.writeString(ScResId(STR_LABEL_ANOVA));
431     rOutput.newLine();
432 
433     const size_t nColsInTable = 6;
434     const size_t nRowsInTable = 4;
435     OUString aTable[nRowsInTable][nColsInTable] =
436     {
437         {
438             "",
439             ScResId(STR_ANOVA_LABEL_DF),
440             ScResId(STR_ANOVA_LABEL_SS),
441             ScResId(STR_ANOVA_LABEL_MS),
442             ScResId(STR_ANOVA_LABEL_F),
443             ScResId(STR_ANOVA_LABEL_SIGNIFICANCE_F)
444         },
445         {
446             ScResId(STR_REGRESSION),
447             "=%NUMXVARS_ADDR%",
448             "=%SSREG_ADDR%",
449             "=%SSREG_ADDR% / %DoFREG_ADDR%",
450             "=%FSTATISTIC_ADDR%",
451             "=FDIST(%FSTATISTIC_ADDR% ; %DoFREG_ADDR% ; %DoFRESID_ADDR%)"
452         },
453         {
454             ScResId(STR_LABEL_RESIDUAL),
455             "=%DoFRESID_ADDR%",
456             "=%SSRESID_ADDR%",
457             "=%SSRESID_ADDR% / %DoFRESID_ADDR%",
458             "",
459             ""
460         },
461         {
462             ScResId(STR_ANOVA_LABEL_TOTAL),
463             "=%DoFREG_ADDR% + %DoFRESID_ADDR%",
464             "=%SSREG_ADDR% + %SSRESID_ADDR%",
465             "",
466             "",
467             ""
468         }
469     };
470 
471     rTemplate.autoReplaceAddress("%DoFREG_ADDR%", rOutput.current(1, 1));
472 
473     // Cell getter lambda
474     std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
475     {
476         return aTable[nRowIdx][nColIdx];
477     };
478 
479     // Cell writer lambda
480     std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate]
481         (const OUString& rContent, size_t /*nRowIdx*/, size_t /*nColIdx*/)
482     {
483         if (!rContent.isEmpty())
484         {
485             if (rContent.startsWith("="))
486             {
487                 rTemplate.setTemplate(rContent);
488                 rOutput.writeFormula(rTemplate.getTemplate());
489             }
490             else
491                 rOutput.writeString(rContent);
492         }
493     };
494 
495     WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
496 
497     // User given confidence level
498     rOutput.newLine();
499     rOutput.writeString(ScResId(STR_LABEL_CONFIDENCE_LEVEL));
500     rOutput.nextColumn();
501     rOutput.writeValue(mxConfidenceLevelField->get_value() / 100.0);
502     rTemplate.autoReplaceAddress("%CONFIDENCE_LEVEL_ADDR%", rOutput.current());
503     rOutput.newLine();
504 }
505 
506 // Write slopes, intercept, their standard errors, t-statistics, p-value, confidence intervals
WriteRegressionEstimatesWithCI(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,bool bTakeLogX)507 void ScRegressionDialog::WriteRegressionEstimatesWithCI(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
508                                                         bool bTakeLogX)
509 {
510     rOutput.newLine();
511     SCROW nLastRow = rOutput.current(0, 1 + mnNumIndependentVars).Row();
512 
513     // Coefficients & Std.Errors ranges (column vectors) in this table (yet to populate).
514     rTemplate.autoReplaceRange("%COEFFICIENTS_RANGE%",
515                                ScRange(rOutput.current(1, 1),
516                                        rOutput.current(1, 1 + mnNumIndependentVars)));
517     rTemplate.autoReplaceRange("%SLOPES_RANGE%",  // Excludes the intercept
518                                ScRange(rOutput.current(1, 2),
519                                        rOutput.current(1, 1 + mnNumIndependentVars)));
520     rTemplate.autoReplaceAddress("%INTERCEPT_ADDR%", rOutput.current(1, 1));
521     rTemplate.autoReplaceRange("%SERRORSX_RANGE%",
522                                ScRange(rOutput.current(2, 1),
523                                        rOutput.current(2, 1 + mnNumIndependentVars)));
524     // t-Statistics range in this table (yet to populate)
525     rTemplate.autoReplaceRange("%TSTAT_RANGE%",
526                                ScRange(rOutput.current(3, 1),
527                                        rOutput.current(3, 1 + mnNumIndependentVars)));
528 
529     const size_t nColsInTable = 7;
530     const size_t nRowsInTable = 2;
531     OUString aTable[nRowsInTable][nColsInTable] =
532     {
533         {
534             "",
535             ScResId(STR_LABEL_COEFFICIENTS),
536             ScResId(STRID_CALC_STD_ERROR),
537             ScResId(STR_LABEL_TSTATISTIC),
538             ScResId(STR_P_VALUE_LABEL),
539 
540             "=CONCAT(\"" + ScResId(STR_LABEL_LOWER) +
541             " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
542 
543             "=CONCAT(\"" + ScResId(STR_LABEL_UPPER) +
544             " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
545         },
546 
547         // Following are matrix formulas of size numcols = 1, numrows = (mnNumIndependentVars + 1)
548         {
549             "",
550             // This puts the coefficients in the reverse order compared to that in LINEST output.
551             "=INDEX(%COEFFICIENTS_REV_RANGE%; 1 ; " + OUString::number(nLastRow + 2) + " - ROW())",
552             // This puts the standard errors in the reverse order compared to that in LINEST output.
553             "=INDEX(%SERRORSX_REV_RANGE%; 1 ; " + OUString::number(nLastRow + 2) + " - ROW())",
554             // t-Statistic
555             "=%COEFFICIENTS_RANGE% / %SERRORSX_RANGE%",
556             // p-Value
557             "=TDIST(ABS(%TSTAT_RANGE%) ; %DoFRESID_ADDR% ; 2 )",
558             // Lower limit of confidence interval
559             "=%COEFFICIENTS_RANGE% - %SERRORSX_RANGE% * "
560             "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)",
561             // Upper limit of confidence interval
562             "=%COEFFICIENTS_RANGE% + %SERRORSX_RANGE% * "
563             "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)"
564         }
565     };
566 
567     // Cell getter lambda
568     std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
569     {
570         return aTable[nRowIdx][nColIdx];
571     };
572 
573     // Cell writer lambda
574     size_t nNumIndependentVars = mnNumIndependentVars;
575     std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate, nNumIndependentVars]
576         (const OUString& rContent, size_t nRowIdx, size_t /*nColIdx*/)
577     {
578         if (!rContent.isEmpty())
579         {
580             if (rContent.startsWith("="))
581             {
582                 rTemplate.setTemplate(rContent);
583                 if (nRowIdx == 0)
584                     rOutput.writeFormula(rTemplate.getTemplate());
585                 else
586                     rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, 1 + nNumIndependentVars);
587             }
588             else
589                 rOutput.writeString(rContent);
590         }
591     };
592 
593     WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
594 
595     // Go back to the second row and first column of the table to
596     // fill the names of variables + intercept
597     rOutput.push(0, -1);
598 
599     for (size_t nXvarIdx = 0; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
600     {
601         rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, bTakeLogX));
602         rOutput.newLine();
603     }
604 
605 }
606 
607 // Re-write all observations in group-by column mode with predictions and residuals
WritePredictionsWithResiduals(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,size_t nRegressionIndex)608 void ScRegressionDialog::WritePredictionsWithResiduals(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
609                                                        size_t nRegressionIndex)
610 {
611     bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
612     rOutput.newLine();
613     rOutput.push();
614 
615     // Range of X variables with rows as observations and columns as variables.
616     ScRange aDataMatrixRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars - 1, mnNumObservations));
617     rTemplate.autoReplaceRange("%XDATAMATRIX_RANGE%", aDataMatrixRange);
618 
619     // Write X variable names
620     for (size_t nXvarIdx = 1; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
621     {
622         // Here we write the X variables without any transformation(LN)
623         rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, false));
624         rOutput.nextColumn();
625     }
626     rOutput.reset();
627 
628     // Write the X data matrix
629     rOutput.nextRow();
630     OUString aDataMatrixFormula = bGroupedByColumn ? OUString("=%VARIABLE1_RANGE%") : OUString("=TRANSPOSE(%VARIABLE1_RANGE%)");
631     rTemplate.setTemplate(aDataMatrixFormula);
632     rOutput.writeMatrixFormula(rTemplate.getTemplate(), mnNumIndependentVars, mnNumObservations);
633 
634     // Write predicted values
635     rOutput.push(mnNumIndependentVars, -1);
636     rOutput.writeString(ScResId(STR_LABEL_PREDICTEDY));
637     rOutput.nextRow();
638     rTemplate.setTemplate(constRegressionFormula[nRegressionIndex]);
639     rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
640     rTemplate.autoReplaceRange("%PREDICTEDY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
641 
642     // Write actual Y
643     rOutput.push(1, -1);
644     rOutput.writeFormula(GetYVariableNameFormula(false));
645     rOutput.nextRow();
646     OUString aYVectorFormula = bGroupedByColumn ? OUString("=%VARIABLE2_RANGE%") : OUString("=TRANSPOSE(%VARIABLE2_RANGE%)");
647     rTemplate.setTemplate(aYVectorFormula);
648     rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
649     rTemplate.autoReplaceRange("%ACTUALY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
650 
651     // Write residual
652     rOutput.push(1, -1);
653     rOutput.writeString(ScResId(STR_LABEL_RESIDUAL));
654     rOutput.nextRow();
655     rTemplate.setTemplate("=%ACTUALY_RANGE% - %PREDICTEDY_RANGE%");
656     rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
657 }
658 
659 // Generic table writer
WriteTable(const std::function<CellValueGetter> & rCellGetter,size_t nRowsInTable,size_t nColsInTable,AddressWalkerWriter & rOutput,const std::function<CellWriter> & rFunc)660 void ScRegressionDialog::WriteTable(const std::function<CellValueGetter>& rCellGetter,
661                                     size_t nRowsInTable, size_t nColsInTable,
662                                     AddressWalkerWriter& rOutput,
663                                     const std::function<CellWriter>& rFunc)
664 {
665     for (size_t nRowIdx = 0; nRowIdx < nRowsInTable; ++nRowIdx)
666     {
667         for (size_t nColIdx = 0; nColIdx < nColsInTable; ++nColIdx)
668         {
669             rFunc(rCellGetter(nRowIdx, nColIdx), nRowIdx, nColIdx);
670             rOutput.nextColumn();
671         }
672         rOutput.newLine();
673     }
674 }
675 
IMPL_LINK_NOARG(ScRegressionDialog,CheckBoxHdl,weld::ToggleButton &,void)676 IMPL_LINK_NOARG(ScRegressionDialog, CheckBoxHdl, weld::ToggleButton&, void)
677 {
678     ValidateDialogInput();
679 }
680 
IMPL_LINK_NOARG(ScRegressionDialog,NumericFieldHdl,weld::SpinButton &,void)681 IMPL_LINK_NOARG(ScRegressionDialog, NumericFieldHdl, weld::SpinButton&, void)
682 {
683     ValidateDialogInput();
684 }
685 
686 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
687