1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 */
10
11 #include <document.hxx>
12 #include <reffact.hxx>
13 #include <TableFillingAndNavigationTools.hxx>
14 #include <RegressionDialog.hxx>
15 #include <scresid.hxx>
16 #include <strings.hrc>
17
18 /*
19 Some regression basics
20 ----------------------
21
22 1. Linear regression fits using data, a linear function between the dependent variable and the independent variable(s).
23 The basic form of this function is :-
24
25 y = b + m_1*x_1 + m_2*x_2 + ... + m_k*x_k
26
27 where y is the dependent variable
28 x_1, x_2, ..., x_k are the k independent variables
29 b is the intercept
30 m_1, m_2, ..., m_k are the slopes corresponding to the variables x_1, x_2, ..., x_k respectively.
31
32
33 This equation for n observations can be compactly written using matrices as :-
34
35 y = X*A
36
37 where y is the n dimensional column vector containing dependent variable observations.
38 where X is matrix of shape n*(k+1) where a row looks like [ 1 x_1 x_2 ... x_k ]
39 A is the k+1 dimensional column vector [ b m_1 m_2 ... m_k ]
40
41 Calc formula LINEST(Y_array ; X_array) can be used to compute all entries in "A" along with many other statistics.
42
43
44 2. Logarithmic regression is basically used to find a linear function between the dependent variable and
45 the natural logarithm of the independent variable(s).
46 So the basic form of this functions is :-
47
48 y = b + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k)
49
50 This can be again written in a compact matrix form for n observations.
51
52 y = ln(X)*A
53
54 where y is the n dimensional column vector containing dependent variable observations.
55 where X is matrix of shape n*(k+1) where a row looks like [ e x_1 x_2 ... x_k ]
56 A is the k+1 dimensional column vector [ b m_1 m_2 ... m_k ]
57
58 To estimate A, we use the formula =LINEST(Y_array ; LN(X_array))
59
60
61 3. Power regression is used to fit the following model :-
62
63 y = b * (x_1 ^ m_1) * (x_2 ^ m_2) * ... * (x_k ^ m_k)
64
65 To reduce this to a linear function(so that we can still use LINEST()), we take natural logarithm on both sides
66
67 ln(y) = c + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k) ; where c = ln(b)
68
69
70 This again can be written compactly in matrix form as :-
71
72 ln(y) = ln(X)*A
73
74 where y is the n dimensional column vector containing dependent variable observations.
75 where X is matrix of shape n*(k+1) where a row looks like [ e x_1 x_2 ... x_k ]
76 A is the k+1 dimensional column vector [ c m_1 m_2 ... m_k ]
77
78 To estimate A, we use the formula =LINEST(LN(Y_array) ; LN(X_array))
79
80 Once we get A, to get back y from x's we use the formula :-
81
82 y = exp( ln(X)*A )
83
84
85
86 Some references for computing confidence interval for the regression coefficients :-
87
88 [1] https://en.wikipedia.org/wiki/Student%27s_t-test#Slope_of_a_regression_line
89 [2] https://en.wikipedia.org/wiki/Simple_linear_regression#Normality_assumption
90 [3] https://onlinecourses.science.psu.edu/stat414/node/280
91
92 */
93
94 namespace
95 {
96 enum class ScRegType {
97 LINEAR,
98 LOGARITHMIC,
99 POWER
100 };
101
102 const char* constRegressionModel[] =
103 {
104 STR_LABEL_LINEAR,
105 STR_LABEL_LOGARITHMIC,
106 STR_LABEL_POWER
107 };
108
109 OUString constTemplateLINEST[] =
110 {
111 "=LINEST(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE% ; %CALC_INTERCEPT% ; TRUE)",
112 "=LINEST(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%) ; %CALC_INTERCEPT% ; TRUE)",
113 "=LINEST(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%) ; %CALC_INTERCEPT% ; TRUE)"
114 };
115
116 OUString constRegressionFormula[] =
117 {
118 "=MMULT(%XDATAMATRIX_RANGE% ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
119 "=MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
120 "=EXP(MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%)"
121 };
122
123 } // end anonymous namespace
124
lcl_GetNumRowsColsInRange(const ScRange & rRange,bool bRows)125 static size_t lcl_GetNumRowsColsInRange(const ScRange& rRange, bool bRows)
126 {
127 if (bRows)
128 return rRange.aEnd.Row() - rRange.aStart.Row() + 1;
129
130 return rRange.aEnd.Col() - rRange.aStart.Col() + 1;
131 }
132
ScRegressionDialog(SfxBindings * pSfxBindings,SfxChildWindow * pChildWindow,weld::Window * pParent,ScViewData * pViewData)133 ScRegressionDialog::ScRegressionDialog(
134 SfxBindings* pSfxBindings, SfxChildWindow* pChildWindow,
135 weld::Window* pParent, ScViewData* pViewData )
136 : ScStatisticsTwoVariableDialog(
137 pSfxBindings, pChildWindow, pParent, pViewData,
138 "modules/scalc/ui/regressiondialog.ui", "RegressionDialog")
139 , mbUnivariate(true)
140 , mnNumIndependentVars(1)
141 , mnNumObservations(0)
142 , mbUse3DAddresses(false)
143 , mbCalcIntercept(true)
144 , mxWithLabelsCheckBox(m_xBuilder->weld_check_button("withlabels-check"))
145 , mxLinearRadioButton(m_xBuilder->weld_radio_button("linear-radio"))
146 , mxLogarithmicRadioButton(m_xBuilder->weld_radio_button("logarithmic-radio"))
147 , mxPowerRadioButton(m_xBuilder->weld_radio_button("power-radio"))
148 , mxErrorMessage(m_xBuilder->weld_label("error-message"))
149 , mxConfidenceLevelField(m_xBuilder->weld_spin_button("confidencelevel-spin"))
150 , mxCalcResidualsCheckBox(m_xBuilder->weld_check_button("calcresiduals-check"))
151 , mxNoInterceptCheckBox(m_xBuilder->weld_check_button("nointercept-check"))
152 {
153 mxWithLabelsCheckBox->connect_toggled(LINK(this, ScRegressionDialog, CheckBoxHdl));
154 mxConfidenceLevelField->connect_value_changed(LINK(this, ScRegressionDialog, NumericFieldHdl));
155 }
156
~ScRegressionDialog()157 ScRegressionDialog::~ScRegressionDialog()
158 {
159 }
160
Close()161 void ScRegressionDialog::Close()
162 {
163 DoClose(ScRegressionDialogWrapper::GetChildWindowId());
164 }
165
GetUndoNameId()166 const char* ScRegressionDialog::GetUndoNameId()
167 {
168 return STR_REGRESSION_UNDO_NAME;
169 }
170
ApplyOutput(ScDocShell * pDocShell)171 ScRange ScRegressionDialog::ApplyOutput(ScDocShell* pDocShell)
172 {
173 AddressWalkerWriter aOutput(mOutputAddress, pDocShell, mDocument,
174 formula::FormulaGrammar::mergeToGrammar( formula::FormulaGrammar::GRAM_ENGLISH, mAddressDetails.eConv));
175 FormulaTemplate aTemplate(mDocument);
176 aTemplate.autoReplaceUses3D(mbUse3DAddresses);
177 mbCalcIntercept = !mxNoInterceptCheckBox->get_active();
178
179 // max col of our output should account for
180 // 1. constant term column,
181 // 2. mnNumIndependentVars columns
182 // 3. Actual Y column
183 // 4. Predicted Y column
184 // 5. Residual Column
185 SCCOL nOutputMaxCol = mOutputAddress.Col() + mnNumIndependentVars + 3;
186
187 ScRange aXDataRange(GetDataRange(mVariable1Range));
188 ScRange aYDataRange(GetDataRange(mVariable2Range));
189
190 aTemplate.autoReplaceRange("%VARIABLE1_RANGE%", aXDataRange);
191 aTemplate.autoReplaceRange("%VARIABLE2_RANGE%", aYDataRange);
192 size_t nRegressionIndex = GetRegressionTypeIndex();
193 ScRegType eRegType = static_cast<ScRegType>(nRegressionIndex);
194 bool bTakeLogX = eRegType == ScRegType::LOGARITHMIC || eRegType == ScRegType::POWER;
195
196 WriteRawRegressionResults(aOutput, aTemplate, nRegressionIndex);
197 WriteRegressionStatistics(aOutput, aTemplate);
198 WriteRegressionANOVAResults(aOutput, aTemplate);
199 WriteRegressionEstimatesWithCI(aOutput, aTemplate, bTakeLogX);
200 if (mxCalcResidualsCheckBox->get_active())
201 WritePredictionsWithResiduals(aOutput, aTemplate, nRegressionIndex);
202
203 ScAddress aMaxAddress(aOutput.mMaximumAddress);
204 aMaxAddress.SetCol(std::max(aMaxAddress.Col(), nOutputMaxCol));
205 return ScRange(aOutput.mMinimumAddress, aMaxAddress);
206 }
207
InputRangesValid()208 bool ScRegressionDialog::InputRangesValid()
209 {
210 if (!mVariable1Range.IsValid())
211 {
212 mxErrorMessage->set_label(ScResId(STR_MESSAGE_XINVALID_RANGE));
213 return false;
214 }
215
216 if (!mVariable2Range.IsValid())
217 {
218 mxErrorMessage->set_label(ScResId(STR_MESSAGE_YINVALID_RANGE));
219 return false;
220 }
221
222 if (!mOutputAddress.IsValid())
223 {
224 mxErrorMessage->set_label(ScResId(STR_MESSAGE_INVALID_OUTPUT_ADDR));
225 return false;
226 }
227
228 {
229 double fConfidenceLevel = mxConfidenceLevelField->get_value();
230 if ( fConfidenceLevel <= 0.0 || fConfidenceLevel >= 100.0 )
231 {
232 mxErrorMessage->set_label(ScResId(STR_MESSAGE_INVALID_CONFIDENCE_LEVEL));
233 return false;
234 }
235 }
236
237 mVariable1Range.PutInOrder();
238 mVariable2Range.PutInOrder();
239
240 bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
241
242 bool bYHasSingleDim = (
243 (bGroupedByColumn &&
244 mVariable2Range.aStart.Col() == mVariable2Range.aEnd.Col()) ||
245 (!bGroupedByColumn &&
246 mVariable2Range.aStart.Row() == mVariable2Range.aEnd.Row()));
247
248 if (!bYHasSingleDim)
249 {
250 if (bGroupedByColumn)
251 mxErrorMessage->set_label(ScResId(STR_MESSAGE_YVARIABLE_MULTI_COLUMN));
252 else
253 mxErrorMessage->set_label(ScResId(STR_MESSAGE_YVARIABLE_MULTI_ROW));
254 return false;
255 }
256
257 bool bWithLabels = mxWithLabelsCheckBox->get_active();
258
259 size_t nYObs = lcl_GetNumRowsColsInRange(mVariable2Range, bGroupedByColumn);
260 size_t nNumXVars = lcl_GetNumRowsColsInRange(mVariable1Range, !bGroupedByColumn);
261 mbUnivariate = nNumXVars == 1;
262 // Observation count mismatch check
263 if (lcl_GetNumRowsColsInRange(mVariable1Range, bGroupedByColumn) != nYObs)
264 {
265 if (mbUnivariate)
266 mxErrorMessage->set_label(ScResId(STR_MESSAGE_UNIVARIATE_NUMOBS_MISMATCH));
267 else
268 mxErrorMessage->set_label(ScResId(STR_MESSAGE_MULTIVARIATE_NUMOBS_MISMATCH));
269 return false;
270 }
271
272 mnNumIndependentVars = nNumXVars;
273 mnNumObservations = bWithLabels ? nYObs - 1 : nYObs;
274
275 mbUse3DAddresses = mVariable1Range.aStart.Tab() != mOutputAddress.Tab() ||
276 mVariable2Range.aStart.Tab() != mOutputAddress.Tab();
277
278 mxErrorMessage->set_label("");
279
280 return true;
281 }
282
GetRegressionTypeIndex() const283 size_t ScRegressionDialog::GetRegressionTypeIndex() const
284 {
285 if (mxLinearRadioButton->get_active())
286 return 0;
287 if (mxLogarithmicRadioButton->get_active())
288 return 1;
289 return 2;
290 }
291
GetDataRange(const ScRange & rRange)292 ScRange ScRegressionDialog::GetDataRange(const ScRange& rRange)
293 {
294 if (!mxWithLabelsCheckBox->get_active())
295 return rRange;
296
297 ScRange aDataRange(rRange);
298 if (mGroupedBy == BY_COLUMN)
299 aDataRange.aStart.IncRow(1);
300 else
301 aDataRange.aStart.IncCol(1);
302
303 return aDataRange;
304 }
305
GetVariableNameFormula(bool bXVar,size_t nIndex,bool bWithLog)306 OUString ScRegressionDialog::GetVariableNameFormula(bool bXVar, size_t nIndex, bool bWithLog)
307 {
308 if (bXVar && nIndex == 0)
309 return "=\"" + ScResId(STR_LABEL_INTERCEPT) + "\"";
310
311 if (mxWithLabelsCheckBox->get_active())
312 {
313 ScAddress aAddr(bXVar ? mVariable1Range.aStart : mVariable2Range.aStart);
314 if (mGroupedBy == BY_COLUMN)
315 aAddr.IncCol(nIndex - 1);
316 else
317 aAddr.IncRow(nIndex - 1);
318
319 ScRefFlags eAddrFlag = mbUse3DAddresses ? ScRefFlags::ADDR_ABS_3D : ScRefFlags::ADDR_ABS;
320 return bWithLog ? OUString("=CONCAT(\"LN(\";" +
321 aAddr.Format(eAddrFlag, mDocument, mDocument->GetAddressConvention()) + ";\")\")") :
322 OUString("=" + aAddr.Format(eAddrFlag, mDocument, mDocument->GetAddressConvention()));
323 }
324
325 OUString aDefaultVarName;
326
327 if (bXVar)
328 aDefaultVarName = "X" + OUString::number(nIndex);
329 else
330 aDefaultVarName = "Y";
331
332 return bWithLog ? OUString("=\"LN(" + aDefaultVarName + ")\"") :
333 OUString("=\"" + aDefaultVarName + "\"");
334 }
335
GetXVariableNameFormula(size_t nIndex,bool bWithLog)336 OUString ScRegressionDialog::GetXVariableNameFormula(size_t nIndex, bool bWithLog)
337 {
338 assert(nIndex <= mnNumIndependentVars);
339 return GetVariableNameFormula(true, nIndex, bWithLog);
340 }
341
GetYVariableNameFormula(bool bWithLog)342 OUString ScRegressionDialog::GetYVariableNameFormula(bool bWithLog)
343 {
344 return GetVariableNameFormula(false, 1, bWithLog);
345 }
346
WriteRawRegressionResults(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,size_t nRegressionIndex)347 void ScRegressionDialog::WriteRawRegressionResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
348 size_t nRegressionIndex)
349 {
350 rOutput.writeBoldString(ScResId(STR_REGRESSION));
351 rOutput.newLine();
352 // REGRESSION MODEL
353 rOutput.writeString(ScResId(STR_LABEL_REGRESSION_MODEL));
354 rOutput.nextColumn();
355 rOutput.writeString(ScResId(constRegressionModel[nRegressionIndex]));
356 rOutput.newLine();
357 rOutput.newLine();
358
359 rOutput.writeString(ScResId(STR_LINEST_RAW_OUTPUT_TITLE));
360 rOutput.newLine();
361 rOutput.push();
362
363 rTemplate.setTemplate(constTemplateLINEST[nRegressionIndex].
364 replaceFirst("%CALC_INTERCEPT%",
365 mbCalcIntercept ? OUStringLiteral("TRUE") : OUStringLiteral("FALSE")));
366 rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1 + mnNumIndependentVars, 5);
367 // Add LINEST result components to template
368 // 1. Add ranges for coefficients and standard errors for indep. vars and the intercept.
369 // Note that these two are in the reverse order(m_n, m_n-1, ..., m_1, b) w.r.t what we expect.
370 rTemplate.autoReplaceRange("%COEFFICIENTS_REV_RANGE%", ScRange(rOutput.current(), rOutput.current(mnNumIndependentVars)));
371 rTemplate.autoReplaceRange("%SERRORSX_REV_RANGE%", ScRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars, 1)));
372
373 // 2. Add R-squared and standard error for y estimate.
374 rTemplate.autoReplaceAddress("%RSQUARED_ADDR%", rOutput.current(0, 2));
375 rTemplate.autoReplaceAddress("%SERRORY_ADDR%", rOutput.current(1, 2));
376
377 // 3. Add F statistic and degrees of freedom
378 rTemplate.autoReplaceAddress("%FSTATISTIC_ADDR%", rOutput.current(0, 3));
379 rTemplate.autoReplaceAddress("%DoFRESID_ADDR%", rOutput.current(1, 3));
380
381 // 4. Add regression sum of squares and residual sum of squares
382 rTemplate.autoReplaceAddress("%SSREG_ADDR%", rOutput.current(0, 4));
383 rTemplate.autoReplaceAddress("%SSRESID_ADDR%", rOutput.current(1, 4));
384
385 rOutput.push(0, 4);
386 rOutput.newLine();
387 }
388
WriteRegressionStatistics(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate)389 void ScRegressionDialog::WriteRegressionStatistics(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate)
390 {
391 rOutput.newLine();
392 rOutput.writeString(ScResId(STR_LABEL_REGRESSION_STATISTICS));
393 rOutput.newLine();
394
395 const char* aMeasureNames[] =
396 {
397 STR_LABEL_RSQUARED,
398 STRID_CALC_STD_ERROR,
399 STR_LABEL_XVARIABLES_COUNT,
400 STR_OBSERVATIONS_LABEL,
401 STR_LABEL_ADJUSTED_RSQUARED
402 };
403
404 OUString aMeasureFormulas[] =
405 {
406 "=%RSQUARED_ADDR%",
407 "=%SERRORY_ADDR%",
408 "=" + OUString::number(mnNumIndependentVars),
409 "=" + OUString::number(mnNumObservations),
410 "=1 - (1 - %RSQUARED_ADDR%)*(%NUMOBS_ADDR% - 1)/(%NUMOBS_ADDR% - %NUMXVARS_ADDR%" +
411 (mbCalcIntercept ? OUStringLiteral(" - 1)") : OUStringLiteral(")"))
412 };
413
414 rTemplate.autoReplaceAddress("%NUMXVARS_ADDR%", rOutput.current(1, 2));
415 rTemplate.autoReplaceAddress("%NUMOBS_ADDR%", rOutput.current(1, 3));
416
417 for (size_t nIdx = 0; nIdx < SAL_N_ELEMENTS(aMeasureNames); ++nIdx)
418 {
419 rOutput.writeString(ScResId(aMeasureNames[nIdx]));
420 rOutput.nextColumn();
421 rTemplate.setTemplate(aMeasureFormulas[nIdx]);
422 rOutput.writeFormula(rTemplate.getTemplate());
423 rOutput.newLine();
424 }
425 }
426
WriteRegressionANOVAResults(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate)427 void ScRegressionDialog::WriteRegressionANOVAResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate)
428 {
429 rOutput.newLine();
430 rOutput.writeString(ScResId(STR_LABEL_ANOVA));
431 rOutput.newLine();
432
433 const size_t nColsInTable = 6;
434 const size_t nRowsInTable = 4;
435 OUString aTable[nRowsInTable][nColsInTable] =
436 {
437 {
438 "",
439 ScResId(STR_ANOVA_LABEL_DF),
440 ScResId(STR_ANOVA_LABEL_SS),
441 ScResId(STR_ANOVA_LABEL_MS),
442 ScResId(STR_ANOVA_LABEL_F),
443 ScResId(STR_ANOVA_LABEL_SIGNIFICANCE_F)
444 },
445 {
446 ScResId(STR_REGRESSION),
447 "=%NUMXVARS_ADDR%",
448 "=%SSREG_ADDR%",
449 "=%SSREG_ADDR% / %DoFREG_ADDR%",
450 "=%FSTATISTIC_ADDR%",
451 "=FDIST(%FSTATISTIC_ADDR% ; %DoFREG_ADDR% ; %DoFRESID_ADDR%)"
452 },
453 {
454 ScResId(STR_LABEL_RESIDUAL),
455 "=%DoFRESID_ADDR%",
456 "=%SSRESID_ADDR%",
457 "=%SSRESID_ADDR% / %DoFRESID_ADDR%",
458 "",
459 ""
460 },
461 {
462 ScResId(STR_ANOVA_LABEL_TOTAL),
463 "=%DoFREG_ADDR% + %DoFRESID_ADDR%",
464 "=%SSREG_ADDR% + %SSRESID_ADDR%",
465 "",
466 "",
467 ""
468 }
469 };
470
471 rTemplate.autoReplaceAddress("%DoFREG_ADDR%", rOutput.current(1, 1));
472
473 // Cell getter lambda
474 std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
475 {
476 return aTable[nRowIdx][nColIdx];
477 };
478
479 // Cell writer lambda
480 std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate]
481 (const OUString& rContent, size_t /*nRowIdx*/, size_t /*nColIdx*/)
482 {
483 if (!rContent.isEmpty())
484 {
485 if (rContent.startsWith("="))
486 {
487 rTemplate.setTemplate(rContent);
488 rOutput.writeFormula(rTemplate.getTemplate());
489 }
490 else
491 rOutput.writeString(rContent);
492 }
493 };
494
495 WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
496
497 // User given confidence level
498 rOutput.newLine();
499 rOutput.writeString(ScResId(STR_LABEL_CONFIDENCE_LEVEL));
500 rOutput.nextColumn();
501 rOutput.writeValue(mxConfidenceLevelField->get_value() / 100.0);
502 rTemplate.autoReplaceAddress("%CONFIDENCE_LEVEL_ADDR%", rOutput.current());
503 rOutput.newLine();
504 }
505
506 // Write slopes, intercept, their standard errors, t-statistics, p-value, confidence intervals
WriteRegressionEstimatesWithCI(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,bool bTakeLogX)507 void ScRegressionDialog::WriteRegressionEstimatesWithCI(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
508 bool bTakeLogX)
509 {
510 rOutput.newLine();
511 SCROW nLastRow = rOutput.current(0, 1 + mnNumIndependentVars).Row();
512
513 // Coefficients & Std.Errors ranges (column vectors) in this table (yet to populate).
514 rTemplate.autoReplaceRange("%COEFFICIENTS_RANGE%",
515 ScRange(rOutput.current(1, 1),
516 rOutput.current(1, 1 + mnNumIndependentVars)));
517 rTemplate.autoReplaceRange("%SLOPES_RANGE%", // Excludes the intercept
518 ScRange(rOutput.current(1, 2),
519 rOutput.current(1, 1 + mnNumIndependentVars)));
520 rTemplate.autoReplaceAddress("%INTERCEPT_ADDR%", rOutput.current(1, 1));
521 rTemplate.autoReplaceRange("%SERRORSX_RANGE%",
522 ScRange(rOutput.current(2, 1),
523 rOutput.current(2, 1 + mnNumIndependentVars)));
524 // t-Statistics range in this table (yet to populate)
525 rTemplate.autoReplaceRange("%TSTAT_RANGE%",
526 ScRange(rOutput.current(3, 1),
527 rOutput.current(3, 1 + mnNumIndependentVars)));
528
529 const size_t nColsInTable = 7;
530 const size_t nRowsInTable = 2;
531 OUString aTable[nRowsInTable][nColsInTable] =
532 {
533 {
534 "",
535 ScResId(STR_LABEL_COEFFICIENTS),
536 ScResId(STRID_CALC_STD_ERROR),
537 ScResId(STR_LABEL_TSTATISTIC),
538 ScResId(STR_P_VALUE_LABEL),
539
540 "=CONCAT(\"" + ScResId(STR_LABEL_LOWER) +
541 " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
542
543 "=CONCAT(\"" + ScResId(STR_LABEL_UPPER) +
544 " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
545 },
546
547 // Following are matrix formulas of size numcols = 1, numrows = (mnNumIndependentVars + 1)
548 {
549 "",
550 // This puts the coefficients in the reverse order compared to that in LINEST output.
551 "=INDEX(%COEFFICIENTS_REV_RANGE%; 1 ; " + OUString::number(nLastRow + 2) + " - ROW())",
552 // This puts the standard errors in the reverse order compared to that in LINEST output.
553 "=INDEX(%SERRORSX_REV_RANGE%; 1 ; " + OUString::number(nLastRow + 2) + " - ROW())",
554 // t-Statistic
555 "=%COEFFICIENTS_RANGE% / %SERRORSX_RANGE%",
556 // p-Value
557 "=TDIST(ABS(%TSTAT_RANGE%) ; %DoFRESID_ADDR% ; 2 )",
558 // Lower limit of confidence interval
559 "=%COEFFICIENTS_RANGE% - %SERRORSX_RANGE% * "
560 "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)",
561 // Upper limit of confidence interval
562 "=%COEFFICIENTS_RANGE% + %SERRORSX_RANGE% * "
563 "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)"
564 }
565 };
566
567 // Cell getter lambda
568 std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
569 {
570 return aTable[nRowIdx][nColIdx];
571 };
572
573 // Cell writer lambda
574 size_t nNumIndependentVars = mnNumIndependentVars;
575 std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate, nNumIndependentVars]
576 (const OUString& rContent, size_t nRowIdx, size_t /*nColIdx*/)
577 {
578 if (!rContent.isEmpty())
579 {
580 if (rContent.startsWith("="))
581 {
582 rTemplate.setTemplate(rContent);
583 if (nRowIdx == 0)
584 rOutput.writeFormula(rTemplate.getTemplate());
585 else
586 rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, 1 + nNumIndependentVars);
587 }
588 else
589 rOutput.writeString(rContent);
590 }
591 };
592
593 WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
594
595 // Go back to the second row and first column of the table to
596 // fill the names of variables + intercept
597 rOutput.push(0, -1);
598
599 for (size_t nXvarIdx = 0; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
600 {
601 rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, bTakeLogX));
602 rOutput.newLine();
603 }
604
605 }
606
607 // Re-write all observations in group-by column mode with predictions and residuals
WritePredictionsWithResiduals(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,size_t nRegressionIndex)608 void ScRegressionDialog::WritePredictionsWithResiduals(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
609 size_t nRegressionIndex)
610 {
611 bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
612 rOutput.newLine();
613 rOutput.push();
614
615 // Range of X variables with rows as observations and columns as variables.
616 ScRange aDataMatrixRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars - 1, mnNumObservations));
617 rTemplate.autoReplaceRange("%XDATAMATRIX_RANGE%", aDataMatrixRange);
618
619 // Write X variable names
620 for (size_t nXvarIdx = 1; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
621 {
622 // Here we write the X variables without any transformation(LN)
623 rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, false));
624 rOutput.nextColumn();
625 }
626 rOutput.reset();
627
628 // Write the X data matrix
629 rOutput.nextRow();
630 OUString aDataMatrixFormula = bGroupedByColumn ? OUString("=%VARIABLE1_RANGE%") : OUString("=TRANSPOSE(%VARIABLE1_RANGE%)");
631 rTemplate.setTemplate(aDataMatrixFormula);
632 rOutput.writeMatrixFormula(rTemplate.getTemplate(), mnNumIndependentVars, mnNumObservations);
633
634 // Write predicted values
635 rOutput.push(mnNumIndependentVars, -1);
636 rOutput.writeString(ScResId(STR_LABEL_PREDICTEDY));
637 rOutput.nextRow();
638 rTemplate.setTemplate(constRegressionFormula[nRegressionIndex]);
639 rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
640 rTemplate.autoReplaceRange("%PREDICTEDY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
641
642 // Write actual Y
643 rOutput.push(1, -1);
644 rOutput.writeFormula(GetYVariableNameFormula(false));
645 rOutput.nextRow();
646 OUString aYVectorFormula = bGroupedByColumn ? OUString("=%VARIABLE2_RANGE%") : OUString("=TRANSPOSE(%VARIABLE2_RANGE%)");
647 rTemplate.setTemplate(aYVectorFormula);
648 rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
649 rTemplate.autoReplaceRange("%ACTUALY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
650
651 // Write residual
652 rOutput.push(1, -1);
653 rOutput.writeString(ScResId(STR_LABEL_RESIDUAL));
654 rOutput.nextRow();
655 rTemplate.setTemplate("=%ACTUALY_RANGE% - %PREDICTEDY_RANGE%");
656 rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
657 }
658
659 // Generic table writer
WriteTable(const std::function<CellValueGetter> & rCellGetter,size_t nRowsInTable,size_t nColsInTable,AddressWalkerWriter & rOutput,const std::function<CellWriter> & rFunc)660 void ScRegressionDialog::WriteTable(const std::function<CellValueGetter>& rCellGetter,
661 size_t nRowsInTable, size_t nColsInTable,
662 AddressWalkerWriter& rOutput,
663 const std::function<CellWriter>& rFunc)
664 {
665 for (size_t nRowIdx = 0; nRowIdx < nRowsInTable; ++nRowIdx)
666 {
667 for (size_t nColIdx = 0; nColIdx < nColsInTable; ++nColIdx)
668 {
669 rFunc(rCellGetter(nRowIdx, nColIdx), nRowIdx, nColIdx);
670 rOutput.nextColumn();
671 }
672 rOutput.newLine();
673 }
674 }
675
IMPL_LINK_NOARG(ScRegressionDialog,CheckBoxHdl,weld::ToggleButton &,void)676 IMPL_LINK_NOARG(ScRegressionDialog, CheckBoxHdl, weld::ToggleButton&, void)
677 {
678 ValidateDialogInput();
679 }
680
IMPL_LINK_NOARG(ScRegressionDialog,NumericFieldHdl,weld::SpinButton &,void)681 IMPL_LINK_NOARG(ScRegressionDialog, NumericFieldHdl, weld::SpinButton&, void)
682 {
683 ValidateDialogInput();
684 }
685
686 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
687