1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  */
10 
11 #include <sal/config.h>
12 
13 #include <string_view>
14 
15 #include <document.hxx>
16 #include <reffact.hxx>
17 #include <TableFillingAndNavigationTools.hxx>
18 #include <RegressionDialog.hxx>
19 #include <scresid.hxx>
20 #include <strings.hrc>
21 
22 /*
23    Some regression basics
24    ----------------------
25 
26    1. Linear regression fits using data, a linear function between the dependent variable and the independent variable(s).
27       The basic form of this function is :-
28 
29       y = b + m_1*x_1 + m_2*x_2 + ... + m_k*x_k
30 
31       where y is the dependent variable
32             x_1, x_2, ..., x_k are the k independent variables
33             b is the intercept
34             m_1, m_2, ..., m_k are the slopes corresponding to the variables x_1, x_2, ..., x_k respectively.
35 
36 
37      This equation for n observations can be compactly written using matrices as :-
38 
39      y = X*A
40 
41      where y is the n dimensional column vector containing dependent variable observations.
42      where X is matrix of shape n*(k+1) where a row looks like [ 1  x_1  x_2 ...  x_k ]
43            A is the k+1 dimensional column vector              [ b  m_1  m_2 ...  m_k ]
44 
45      Calc formula LINEST(Y_array ; X_array) can be used to compute all entries in "A" along with many other statistics.
46 
47 
48   2. Logarithmic regression is basically used to find a linear function between the dependent variable and
49      the natural logarithm of the independent variable(s).
50      So the basic form of this functions is :-
51 
52      y = b + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k)
53 
54      This can be again written in a compact matrix form for n observations.
55 
56      y = ln(X)*A
57 
58      where y is the n dimensional column vector containing dependent variable observations.
59      where X is matrix of shape n*(k+1) where a row looks like  [ e  x_1  x_2 ...  x_k ]
60            A is the k+1 dimensional column vector               [ b  m_1  m_2 ...  m_k ]
61 
62      To estimate A, we use the formula =LINEST(Y_array ; LN(X_array))
63 
64 
65   3. Power regression is used to fit the following model :-
66 
67      y = b * (x_1 ^ m_1) * (x_2 ^ m_2) * ... * (x_k ^ m_k)
68 
69      To reduce this to a linear function(so that we can still use LINEST()), we take natural logarithm on both sides
70 
71      ln(y) = c + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k) ; where c = ln(b)
72 
73 
74      This again can be written compactly in matrix form as :-
75 
76      ln(y) = ln(X)*A
77 
78      where y is the n dimensional column vector containing dependent variable observations.
79      where X is matrix of shape n*(k+1) where a row looks like  [ e  x_1  x_2 ...  x_k ]
80            A is the k+1 dimensional column vector               [ c  m_1  m_2 ...  m_k ]
81 
82      To estimate A, we use the formula =LINEST(LN(Y_array) ; LN(X_array))
83 
84      Once we get A, to get back y from x's we use the formula :-
85 
86      y = exp( ln(X)*A )
87 
88 
89 
90      Some references for computing confidence interval for the regression coefficients :-
91 
92      [1] https://en.wikipedia.org/wiki/Student%27s_t-test#Slope_of_a_regression_line
93      [2] https://en.wikipedia.org/wiki/Simple_linear_regression#Normality_assumption
94      [3] https://onlinecourses.science.psu.edu/stat414/node/280
95 
96  */
97 
98 namespace
99 {
100     enum class ScRegType {
101         LINEAR,
102         LOGARITHMIC,
103         POWER
104     };
105 
106     const char* constRegressionModel[] =
107     {
108         STR_LABEL_LINEAR,
109         STR_LABEL_LOGARITHMIC,
110         STR_LABEL_POWER
111     };
112 
113     OUString constTemplateLINEST[] =
114     {
115         "=LINEST(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE% ; %CALC_INTERCEPT% ; TRUE)",
116         "=LINEST(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%) ; %CALC_INTERCEPT% ; TRUE)",
117         "=LINEST(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%) ; %CALC_INTERCEPT% ; TRUE)"
118     };
119 
120     OUString constRegressionFormula[] =
121     {
122         "=MMULT(%XDATAMATRIX_RANGE% ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
123         "=MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
124         "=EXP(MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%)"
125     };
126 
127 } // end anonymous namespace
128 
lcl_GetNumRowsColsInRange(const ScRange & rRange,bool bRows)129 static size_t lcl_GetNumRowsColsInRange(const ScRange& rRange, bool bRows)
130 {
131     if (bRows)
132         return rRange.aEnd.Row() - rRange.aStart.Row() + 1;
133 
134     return rRange.aEnd.Col() - rRange.aStart.Col() + 1;
135 }
136 
ScRegressionDialog(SfxBindings * pSfxBindings,SfxChildWindow * pChildWindow,weld::Window * pParent,ScViewData & rViewData)137 ScRegressionDialog::ScRegressionDialog(
138                     SfxBindings* pSfxBindings, SfxChildWindow* pChildWindow,
139                     weld::Window* pParent, ScViewData& rViewData )
140     : ScStatisticsTwoVariableDialog(
141             pSfxBindings, pChildWindow, pParent, rViewData,
142             "modules/scalc/ui/regressiondialog.ui", "RegressionDialog")
143     , mbUnivariate(true)
144     , mnNumIndependentVars(1)
145     , mnNumObservations(0)
146     , mbUse3DAddresses(false)
147     , mbCalcIntercept(true)
148     , mxWithLabelsCheckBox(m_xBuilder->weld_check_button("withlabels-check"))
149     , mxLinearRadioButton(m_xBuilder->weld_radio_button("linear-radio"))
150     , mxLogarithmicRadioButton(m_xBuilder->weld_radio_button("logarithmic-radio"))
151     , mxPowerRadioButton(m_xBuilder->weld_radio_button("power-radio"))
152     , mxErrorMessage(m_xBuilder->weld_label("error-message"))
153     , mxConfidenceLevelField(m_xBuilder->weld_spin_button("confidencelevel-spin"))
154     , mxCalcResidualsCheckBox(m_xBuilder->weld_check_button("calcresiduals-check"))
155     , mxNoInterceptCheckBox(m_xBuilder->weld_check_button("nointercept-check"))
156 {
157     mxWithLabelsCheckBox->connect_toggled(LINK(this, ScRegressionDialog, CheckBoxHdl));
158     mxConfidenceLevelField->connect_value_changed(LINK(this, ScRegressionDialog, NumericFieldHdl));
159 }
160 
~ScRegressionDialog()161 ScRegressionDialog::~ScRegressionDialog()
162 {
163 }
164 
Close()165 void ScRegressionDialog::Close()
166 {
167     DoClose(ScRegressionDialogWrapper::GetChildWindowId());
168 }
169 
GetUndoNameId()170 const char* ScRegressionDialog::GetUndoNameId()
171 {
172     return STR_REGRESSION_UNDO_NAME;
173 }
174 
ApplyOutput(ScDocShell * pDocShell)175 ScRange ScRegressionDialog::ApplyOutput(ScDocShell* pDocShell)
176 {
177     AddressWalkerWriter aOutput(mOutputAddress, pDocShell, mDocument,
178             formula::FormulaGrammar::mergeToGrammar( formula::FormulaGrammar::GRAM_ENGLISH, mAddressDetails.eConv));
179     FormulaTemplate aTemplate(&mDocument);
180     aTemplate.autoReplaceUses3D(mbUse3DAddresses);
181     mbCalcIntercept = !mxNoInterceptCheckBox->get_active();
182 
183     // max col of our output should account for
184     // 1. constant term column,
185     // 2. mnNumIndependentVars columns
186     // 3. Actual Y column
187     // 4. Predicted Y column
188     // 5. Residual Column
189     SCCOL nOutputMaxCol = mOutputAddress.Col() + mnNumIndependentVars + 3;
190 
191     ScRange aXDataRange(GetDataRange(mVariable1Range));
192     ScRange aYDataRange(GetDataRange(mVariable2Range));
193 
194     aTemplate.autoReplaceRange("%VARIABLE1_RANGE%", aXDataRange);
195     aTemplate.autoReplaceRange("%VARIABLE2_RANGE%", aYDataRange);
196     size_t nRegressionIndex = GetRegressionTypeIndex();
197     ScRegType eRegType = static_cast<ScRegType>(nRegressionIndex);
198     bool bTakeLogX = eRegType == ScRegType::LOGARITHMIC || eRegType == ScRegType::POWER;
199 
200     WriteRawRegressionResults(aOutput, aTemplate, nRegressionIndex);
201     WriteRegressionStatistics(aOutput, aTemplate);
202     WriteRegressionANOVAResults(aOutput, aTemplate);
203     WriteRegressionEstimatesWithCI(aOutput, aTemplate, bTakeLogX);
204     if (mxCalcResidualsCheckBox->get_active())
205         WritePredictionsWithResiduals(aOutput, aTemplate, nRegressionIndex);
206 
207     ScAddress aMaxAddress(aOutput.mMaximumAddress);
208     aMaxAddress.SetCol(std::max(aMaxAddress.Col(), nOutputMaxCol));
209     return ScRange(aOutput.mMinimumAddress, aMaxAddress);
210 }
211 
InputRangesValid()212 bool ScRegressionDialog::InputRangesValid()
213 {
214     if (!mVariable1Range.IsValid())
215     {
216         mxErrorMessage->set_label(ScResId(STR_MESSAGE_XINVALID_RANGE));
217         return false;
218     }
219 
220     if (!mVariable2Range.IsValid())
221     {
222         mxErrorMessage->set_label(ScResId(STR_MESSAGE_YINVALID_RANGE));
223         return false;
224     }
225 
226     if (!mOutputAddress.IsValid())
227     {
228         mxErrorMessage->set_label(ScResId(STR_MESSAGE_INVALID_OUTPUT_ADDR));
229         return false;
230     }
231 
232     {
233         double fConfidenceLevel = mxConfidenceLevelField->get_value();
234         if ( fConfidenceLevel <= 0.0 || fConfidenceLevel >= 100.0 )
235         {
236             mxErrorMessage->set_label(ScResId(STR_MESSAGE_INVALID_CONFIDENCE_LEVEL));
237             return false;
238         }
239     }
240 
241     mVariable1Range.PutInOrder();
242     mVariable2Range.PutInOrder();
243 
244     bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
245 
246     bool bYHasSingleDim = (
247         (bGroupedByColumn &&
248          mVariable2Range.aStart.Col() == mVariable2Range.aEnd.Col()) ||
249         (!bGroupedByColumn &&
250          mVariable2Range.aStart.Row() == mVariable2Range.aEnd.Row()));
251 
252     if (!bYHasSingleDim)
253     {
254         if (bGroupedByColumn)
255             mxErrorMessage->set_label(ScResId(STR_MESSAGE_YVARIABLE_MULTI_COLUMN));
256         else
257             mxErrorMessage->set_label(ScResId(STR_MESSAGE_YVARIABLE_MULTI_ROW));
258         return false;
259     }
260 
261     bool bWithLabels = mxWithLabelsCheckBox->get_active();
262 
263     size_t nYObs = lcl_GetNumRowsColsInRange(mVariable2Range, bGroupedByColumn);
264     size_t nNumXVars = lcl_GetNumRowsColsInRange(mVariable1Range, !bGroupedByColumn);
265     mbUnivariate = nNumXVars == 1;
266     // Observation count mismatch check
267     if (lcl_GetNumRowsColsInRange(mVariable1Range, bGroupedByColumn) != nYObs)
268     {
269         if (mbUnivariate)
270             mxErrorMessage->set_label(ScResId(STR_MESSAGE_UNIVARIATE_NUMOBS_MISMATCH));
271         else
272             mxErrorMessage->set_label(ScResId(STR_MESSAGE_MULTIVARIATE_NUMOBS_MISMATCH));
273         return false;
274     }
275 
276     mnNumIndependentVars = nNumXVars;
277     mnNumObservations = bWithLabels ? nYObs - 1 : nYObs;
278 
279     mbUse3DAddresses = mVariable1Range.aStart.Tab() != mOutputAddress.Tab() ||
280         mVariable2Range.aStart.Tab() != mOutputAddress.Tab();
281 
282     mxErrorMessage->set_label("");
283 
284     return true;
285 }
286 
GetRegressionTypeIndex() const287 size_t ScRegressionDialog::GetRegressionTypeIndex() const
288 {
289     if (mxLinearRadioButton->get_active())
290         return 0;
291     if (mxLogarithmicRadioButton->get_active())
292         return 1;
293     return 2;
294 }
295 
GetDataRange(const ScRange & rRange)296 ScRange ScRegressionDialog::GetDataRange(const ScRange& rRange)
297 {
298     if (!mxWithLabelsCheckBox->get_active())
299         return rRange;
300 
301     ScRange aDataRange(rRange);
302     if (mGroupedBy == BY_COLUMN)
303         aDataRange.aStart.IncRow(1);
304     else
305         aDataRange.aStart.IncCol(1);
306 
307     return aDataRange;
308 }
309 
GetVariableNameFormula(bool bXVar,size_t nIndex,bool bWithLog)310 OUString ScRegressionDialog::GetVariableNameFormula(bool bXVar, size_t nIndex, bool bWithLog)
311 {
312     if (bXVar && nIndex == 0)
313         return "=\"" + ScResId(STR_LABEL_INTERCEPT) + "\"";
314 
315     if (mxWithLabelsCheckBox->get_active())
316     {
317         ScAddress aAddr(bXVar ? mVariable1Range.aStart : mVariable2Range.aStart);
318         if (mGroupedBy == BY_COLUMN)
319             aAddr.IncCol(nIndex - 1);
320         else
321             aAddr.IncRow(nIndex - 1);
322 
323         ScRefFlags eAddrFlag = mbUse3DAddresses ? ScRefFlags::ADDR_ABS_3D : ScRefFlags::ADDR_ABS;
324         return bWithLog ? OUString("=CONCAT(\"LN(\";" +
325             aAddr.Format(eAddrFlag, &mDocument, mDocument.GetAddressConvention()) + ";\")\")") :
326             OUString("=" + aAddr.Format(eAddrFlag, &mDocument, mDocument.GetAddressConvention()));
327     }
328 
329     OUString aDefaultVarName;
330 
331     if (bXVar)
332         aDefaultVarName = "X" + OUString::number(nIndex);
333     else
334         aDefaultVarName = "Y";
335 
336     return bWithLog ? OUString("=\"LN(" + aDefaultVarName + ")\"") :
337         OUString("=\"" + aDefaultVarName + "\"");
338 }
339 
GetXVariableNameFormula(size_t nIndex,bool bWithLog)340 OUString ScRegressionDialog::GetXVariableNameFormula(size_t nIndex, bool bWithLog)
341 {
342     assert(nIndex <= mnNumIndependentVars);
343     return GetVariableNameFormula(true, nIndex, bWithLog);
344 }
345 
GetYVariableNameFormula(bool bWithLog)346 OUString ScRegressionDialog::GetYVariableNameFormula(bool bWithLog)
347 {
348     return GetVariableNameFormula(false, 1, bWithLog);
349 }
350 
WriteRawRegressionResults(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,size_t nRegressionIndex)351 void ScRegressionDialog::WriteRawRegressionResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
352                                                    size_t nRegressionIndex)
353 {
354     rOutput.writeBoldString(ScResId(STR_REGRESSION));
355     rOutput.newLine();
356     // REGRESSION MODEL
357     rOutput.writeString(ScResId(STR_LABEL_REGRESSION_MODEL));
358     rOutput.nextColumn();
359     rOutput.writeString(ScResId(constRegressionModel[nRegressionIndex]));
360     rOutput.newLine();
361     rOutput.newLine();
362 
363     rOutput.writeString(ScResId(STR_LINEST_RAW_OUTPUT_TITLE));
364     rOutput.newLine();
365     rOutput.push();
366 
367     rTemplate.setTemplate(constTemplateLINEST[nRegressionIndex].
368                           replaceFirst("%CALC_INTERCEPT%",
369                                        mbCalcIntercept ? std::u16string_view(u"TRUE") : std::u16string_view(u"FALSE")));
370     rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1 + mnNumIndependentVars, 5);
371     // Add LINEST result components to template
372     // 1. Add ranges for coefficients and standard errors for indep. vars and the intercept.
373     // Note that these two are in the reverse order(m_n, m_n-1, ..., m_1, b) w.r.t what we expect.
374     rTemplate.autoReplaceRange("%COEFFICIENTS_REV_RANGE%", ScRange(rOutput.current(), rOutput.current(mnNumIndependentVars)));
375     rTemplate.autoReplaceRange("%SERRORSX_REV_RANGE%", ScRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars, 1)));
376 
377     // 2. Add R-squared and standard error for y estimate.
378     rTemplate.autoReplaceAddress("%RSQUARED_ADDR%", rOutput.current(0, 2));
379     rTemplate.autoReplaceAddress("%SERRORY_ADDR%", rOutput.current(1, 2));
380 
381     // 3. Add F statistic and degrees of freedom
382     rTemplate.autoReplaceAddress("%FSTATISTIC_ADDR%", rOutput.current(0, 3));
383     rTemplate.autoReplaceAddress("%DoFRESID_ADDR%", rOutput.current(1, 3));
384 
385     // 4. Add regression sum of squares and residual sum of squares
386     rTemplate.autoReplaceAddress("%SSREG_ADDR%", rOutput.current(0, 4));
387     rTemplate.autoReplaceAddress("%SSRESID_ADDR%", rOutput.current(1, 4));
388 
389     rOutput.push(0, 4);
390     rOutput.newLine();
391 }
392 
WriteRegressionStatistics(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate)393 void ScRegressionDialog::WriteRegressionStatistics(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate)
394 {
395     rOutput.newLine();
396     rOutput.writeString(ScResId(STR_LABEL_REGRESSION_STATISTICS));
397     rOutput.newLine();
398 
399     const char* aMeasureNames[] =
400     {
401         STR_LABEL_RSQUARED,
402         STRID_CALC_STD_ERROR,
403         STR_LABEL_XVARIABLES_COUNT,
404         STR_OBSERVATIONS_LABEL,
405         STR_LABEL_ADJUSTED_RSQUARED
406     };
407 
408     OUString aMeasureFormulas[] =
409     {
410         "=%RSQUARED_ADDR%",
411         "=%SERRORY_ADDR%",
412         "=" + OUString::number(mnNumIndependentVars),
413         "=" + OUString::number(mnNumObservations),
414         OUString::Concat(
415                 "=1 - (1 - %RSQUARED_ADDR%)*(%NUMOBS_ADDR% - 1)/(%NUMOBS_ADDR% - %NUMXVARS_ADDR%") +
416             (mbCalcIntercept ? std::u16string_view(u" - 1)") : std::u16string_view(u")"))
417     };
418 
419     rTemplate.autoReplaceAddress("%NUMXVARS_ADDR%", rOutput.current(1, 2));
420     rTemplate.autoReplaceAddress("%NUMOBS_ADDR%", rOutput.current(1, 3));
421 
422     for (size_t nIdx = 0; nIdx < SAL_N_ELEMENTS(aMeasureNames); ++nIdx)
423     {
424         rOutput.writeString(ScResId(aMeasureNames[nIdx]));
425         rOutput.nextColumn();
426         rTemplate.setTemplate(aMeasureFormulas[nIdx]);
427         rOutput.writeFormula(rTemplate.getTemplate());
428         rOutput.newLine();
429     }
430 }
431 
WriteRegressionANOVAResults(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate)432 void ScRegressionDialog::WriteRegressionANOVAResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate)
433 {
434     rOutput.newLine();
435     rOutput.writeString(ScResId(STR_LABEL_ANOVA));
436     rOutput.newLine();
437 
438     const size_t nColsInTable = 6;
439     const size_t nRowsInTable = 4;
440     OUString aTable[nRowsInTable][nColsInTable] =
441     {
442         {
443             "",
444             ScResId(STR_ANOVA_LABEL_DF),
445             ScResId(STR_ANOVA_LABEL_SS),
446             ScResId(STR_ANOVA_LABEL_MS),
447             ScResId(STR_ANOVA_LABEL_F),
448             ScResId(STR_ANOVA_LABEL_SIGNIFICANCE_F)
449         },
450         {
451             ScResId(STR_REGRESSION),
452             "=%NUMXVARS_ADDR%",
453             "=%SSREG_ADDR%",
454             "=%SSREG_ADDR% / %DoFREG_ADDR%",
455             "=%FSTATISTIC_ADDR%",
456             "=FDIST(%FSTATISTIC_ADDR% ; %DoFREG_ADDR% ; %DoFRESID_ADDR%)"
457         },
458         {
459             ScResId(STR_LABEL_RESIDUAL),
460             "=%DoFRESID_ADDR%",
461             "=%SSRESID_ADDR%",
462             "=%SSRESID_ADDR% / %DoFRESID_ADDR%",
463             "",
464             ""
465         },
466         {
467             ScResId(STR_ANOVA_LABEL_TOTAL),
468             "=%DoFREG_ADDR% + %DoFRESID_ADDR%",
469             "=%SSREG_ADDR% + %SSRESID_ADDR%",
470             "",
471             "",
472             ""
473         }
474     };
475 
476     rTemplate.autoReplaceAddress("%DoFREG_ADDR%", rOutput.current(1, 1));
477 
478     // Cell getter lambda
479     std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
480     {
481         return aTable[nRowIdx][nColIdx];
482     };
483 
484     // Cell writer lambda
485     std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate]
486         (const OUString& rContent, size_t /*nRowIdx*/, size_t /*nColIdx*/)
487     {
488         if (!rContent.isEmpty())
489         {
490             if (rContent.startsWith("="))
491             {
492                 rTemplate.setTemplate(rContent);
493                 rOutput.writeFormula(rTemplate.getTemplate());
494             }
495             else
496                 rOutput.writeString(rContent);
497         }
498     };
499 
500     WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
501 
502     // User given confidence level
503     rOutput.newLine();
504     rOutput.writeString(ScResId(STR_LABEL_CONFIDENCE_LEVEL));
505     rOutput.nextColumn();
506     rOutput.writeValue(mxConfidenceLevelField->get_value() / 100.0);
507     rTemplate.autoReplaceAddress("%CONFIDENCE_LEVEL_ADDR%", rOutput.current());
508     rOutput.newLine();
509 }
510 
511 // Write slopes, intercept, their standard errors, t-statistics, p-value, confidence intervals
WriteRegressionEstimatesWithCI(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,bool bTakeLogX)512 void ScRegressionDialog::WriteRegressionEstimatesWithCI(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
513                                                         bool bTakeLogX)
514 {
515     rOutput.newLine();
516     ScAddress aEnd( rOutput.current(0, 1 + mnNumIndependentVars));
517     ScRefFlags eAddrFlag = mbUse3DAddresses ? ScRefFlags::ADDR_ABS_3D : ScRefFlags::ADDR_ABS;
518     aEnd.IncCol();
519     const OUString aCoeffAddr( aEnd.Format( eAddrFlag, &mDocument, mDocument.GetAddressConvention()));
520     aEnd.IncCol();
521     const OUString aStErrAddr( aEnd.Format( eAddrFlag, &mDocument, mDocument.GetAddressConvention()));
522 
523     // Coefficients & Std.Errors ranges (column vectors) in this table (yet to populate).
524     rTemplate.autoReplaceRange("%COEFFICIENTS_RANGE%",
525                                ScRange(rOutput.current(1, 1),
526                                        rOutput.current(1, 1 + mnNumIndependentVars)));
527     rTemplate.autoReplaceRange("%SLOPES_RANGE%",  // Excludes the intercept
528                                ScRange(rOutput.current(1, 2),
529                                        rOutput.current(1, 1 + mnNumIndependentVars)));
530     rTemplate.autoReplaceAddress("%INTERCEPT_ADDR%", rOutput.current(1, 1));
531     rTemplate.autoReplaceRange("%SERRORSX_RANGE%",
532                                ScRange(rOutput.current(2, 1),
533                                        rOutput.current(2, 1 + mnNumIndependentVars)));
534     // t-Statistics range in this table (yet to populate)
535     rTemplate.autoReplaceRange("%TSTAT_RANGE%",
536                                ScRange(rOutput.current(3, 1),
537                                        rOutput.current(3, 1 + mnNumIndependentVars)));
538 
539     const size_t nColsInTable = 7;
540     const size_t nRowsInTable = 2;
541     OUString aTable[nRowsInTable][nColsInTable] =
542     {
543         {
544             "",
545             ScResId(STR_LABEL_COEFFICIENTS),
546             ScResId(STRID_CALC_STD_ERROR),
547             ScResId(STR_LABEL_TSTATISTIC),
548             ScResId(STR_P_VALUE_LABEL),
549 
550             "=CONCAT(\"" + ScResId(STR_LABEL_LOWER) +
551             " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
552 
553             "=CONCAT(\"" + ScResId(STR_LABEL_UPPER) +
554             " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
555         },
556 
557         // Following are matrix formulas of size numcols = 1, numrows = (mnNumIndependentVars + 1)
558         {
559             "",
560             // This puts the coefficients in the reverse order compared to that in LINEST output.
561             "=INDEX(%COEFFICIENTS_REV_RANGE%; 1 ; ROW(" + aCoeffAddr + ")+1 - ROW())",
562             // This puts the standard errors in the reverse order compared to that in LINEST output.
563             "=INDEX(%SERRORSX_REV_RANGE%; 1 ; ROW(" + aStErrAddr + ")+1 - ROW())",
564             // t-Statistic
565             "=%COEFFICIENTS_RANGE% / %SERRORSX_RANGE%",
566             // p-Value
567             "=TDIST(ABS(%TSTAT_RANGE%) ; %DoFRESID_ADDR% ; 2 )",
568             // Lower limit of confidence interval
569             "=%COEFFICIENTS_RANGE% - %SERRORSX_RANGE% * "
570             "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)",
571             // Upper limit of confidence interval
572             "=%COEFFICIENTS_RANGE% + %SERRORSX_RANGE% * "
573             "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)"
574         }
575     };
576 
577     // Cell getter lambda
578     std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
579     {
580         return aTable[nRowIdx][nColIdx];
581     };
582 
583     // Cell writer lambda
584     size_t nNumIndependentVars = mnNumIndependentVars;
585     std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate, nNumIndependentVars]
586         (const OUString& rContent, size_t nRowIdx, size_t /*nColIdx*/)
587     {
588         if (!rContent.isEmpty())
589         {
590             if (rContent.startsWith("="))
591             {
592                 rTemplate.setTemplate(rContent);
593                 if (nRowIdx == 0)
594                     rOutput.writeFormula(rTemplate.getTemplate());
595                 else
596                     rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, 1 + nNumIndependentVars);
597             }
598             else
599                 rOutput.writeString(rContent);
600         }
601     };
602 
603     WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
604 
605     // Go back to the second row and first column of the table to
606     // fill the names of variables + intercept
607     rOutput.push(0, -1);
608 
609     for (size_t nXvarIdx = 0; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
610     {
611         rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, bTakeLogX));
612         rOutput.newLine();
613     }
614 
615 }
616 
617 // Re-write all observations in group-by column mode with predictions and residuals
WritePredictionsWithResiduals(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,size_t nRegressionIndex)618 void ScRegressionDialog::WritePredictionsWithResiduals(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
619                                                        size_t nRegressionIndex)
620 {
621     bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
622     rOutput.newLine();
623     rOutput.push();
624 
625     // Range of X variables with rows as observations and columns as variables.
626     ScRange aDataMatrixRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars - 1, mnNumObservations));
627     rTemplate.autoReplaceRange("%XDATAMATRIX_RANGE%", aDataMatrixRange);
628 
629     // Write X variable names
630     for (size_t nXvarIdx = 1; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
631     {
632         // Here we write the X variables without any transformation(LN)
633         rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, false));
634         rOutput.nextColumn();
635     }
636     rOutput.reset();
637 
638     // Write the X data matrix
639     rOutput.nextRow();
640     OUString aDataMatrixFormula = bGroupedByColumn ? OUString("=%VARIABLE1_RANGE%") : OUString("=TRANSPOSE(%VARIABLE1_RANGE%)");
641     rTemplate.setTemplate(aDataMatrixFormula);
642     rOutput.writeMatrixFormula(rTemplate.getTemplate(), mnNumIndependentVars, mnNumObservations);
643 
644     // Write predicted values
645     rOutput.push(mnNumIndependentVars, -1);
646     rOutput.writeString(ScResId(STR_LABEL_PREDICTEDY));
647     rOutput.nextRow();
648     rTemplate.setTemplate(constRegressionFormula[nRegressionIndex]);
649     rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
650     rTemplate.autoReplaceRange("%PREDICTEDY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
651 
652     // Write actual Y
653     rOutput.push(1, -1);
654     rOutput.writeFormula(GetYVariableNameFormula(false));
655     rOutput.nextRow();
656     OUString aYVectorFormula = bGroupedByColumn ? OUString("=%VARIABLE2_RANGE%") : OUString("=TRANSPOSE(%VARIABLE2_RANGE%)");
657     rTemplate.setTemplate(aYVectorFormula);
658     rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
659     rTemplate.autoReplaceRange("%ACTUALY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
660 
661     // Write residual
662     rOutput.push(1, -1);
663     rOutput.writeString(ScResId(STR_LABEL_RESIDUAL));
664     rOutput.nextRow();
665     rTemplate.setTemplate("=%ACTUALY_RANGE% - %PREDICTEDY_RANGE%");
666     rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
667 }
668 
669 // Generic table writer
WriteTable(const std::function<CellValueGetter> & rCellGetter,size_t nRowsInTable,size_t nColsInTable,AddressWalkerWriter & rOutput,const std::function<CellWriter> & rFunc)670 void ScRegressionDialog::WriteTable(const std::function<CellValueGetter>& rCellGetter,
671                                     size_t nRowsInTable, size_t nColsInTable,
672                                     AddressWalkerWriter& rOutput,
673                                     const std::function<CellWriter>& rFunc)
674 {
675     for (size_t nRowIdx = 0; nRowIdx < nRowsInTable; ++nRowIdx)
676     {
677         for (size_t nColIdx = 0; nColIdx < nColsInTable; ++nColIdx)
678         {
679             rFunc(rCellGetter(nRowIdx, nColIdx), nRowIdx, nColIdx);
680             rOutput.nextColumn();
681         }
682         rOutput.newLine();
683     }
684 }
685 
IMPL_LINK_NOARG(ScRegressionDialog,CheckBoxHdl,weld::Toggleable &,void)686 IMPL_LINK_NOARG(ScRegressionDialog, CheckBoxHdl, weld::Toggleable&, void)
687 {
688     ValidateDialogInput();
689 }
690 
IMPL_LINK_NOARG(ScRegressionDialog,NumericFieldHdl,weld::SpinButton &,void)691 IMPL_LINK_NOARG(ScRegressionDialog, NumericFieldHdl, weld::SpinButton&, void)
692 {
693     ValidateDialogInput();
694 }
695 
696 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
697