1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 */
10
11 #include <sal/config.h>
12
13 #include <string_view>
14
15 #include <document.hxx>
16 #include <reffact.hxx>
17 #include <TableFillingAndNavigationTools.hxx>
18 #include <RegressionDialog.hxx>
19 #include <scresid.hxx>
20 #include <strings.hrc>
21
22 /*
23 Some regression basics
24 ----------------------
25
26 1. Linear regression fits using data, a linear function between the dependent variable and the independent variable(s).
27 The basic form of this function is :-
28
29 y = b + m_1*x_1 + m_2*x_2 + ... + m_k*x_k
30
31 where y is the dependent variable
32 x_1, x_2, ..., x_k are the k independent variables
33 b is the intercept
34 m_1, m_2, ..., m_k are the slopes corresponding to the variables x_1, x_2, ..., x_k respectively.
35
36
37 This equation for n observations can be compactly written using matrices as :-
38
39 y = X*A
40
41 where y is the n dimensional column vector containing dependent variable observations.
42 where X is matrix of shape n*(k+1) where a row looks like [ 1 x_1 x_2 ... x_k ]
43 A is the k+1 dimensional column vector [ b m_1 m_2 ... m_k ]
44
45 Calc formula LINEST(Y_array ; X_array) can be used to compute all entries in "A" along with many other statistics.
46
47
48 2. Logarithmic regression is basically used to find a linear function between the dependent variable and
49 the natural logarithm of the independent variable(s).
50 So the basic form of this functions is :-
51
52 y = b + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k)
53
54 This can be again written in a compact matrix form for n observations.
55
56 y = ln(X)*A
57
58 where y is the n dimensional column vector containing dependent variable observations.
59 where X is matrix of shape n*(k+1) where a row looks like [ e x_1 x_2 ... x_k ]
60 A is the k+1 dimensional column vector [ b m_1 m_2 ... m_k ]
61
62 To estimate A, we use the formula =LINEST(Y_array ; LN(X_array))
63
64
65 3. Power regression is used to fit the following model :-
66
67 y = b * (x_1 ^ m_1) * (x_2 ^ m_2) * ... * (x_k ^ m_k)
68
69 To reduce this to a linear function(so that we can still use LINEST()), we take natural logarithm on both sides
70
71 ln(y) = c + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k) ; where c = ln(b)
72
73
74 This again can be written compactly in matrix form as :-
75
76 ln(y) = ln(X)*A
77
78 where y is the n dimensional column vector containing dependent variable observations.
79 where X is matrix of shape n*(k+1) where a row looks like [ e x_1 x_2 ... x_k ]
80 A is the k+1 dimensional column vector [ c m_1 m_2 ... m_k ]
81
82 To estimate A, we use the formula =LINEST(LN(Y_array) ; LN(X_array))
83
84 Once we get A, to get back y from x's we use the formula :-
85
86 y = exp( ln(X)*A )
87
88
89
90 Some references for computing confidence interval for the regression coefficients :-
91
92 [1] https://en.wikipedia.org/wiki/Student%27s_t-test#Slope_of_a_regression_line
93 [2] https://en.wikipedia.org/wiki/Simple_linear_regression#Normality_assumption
94 [3] https://onlinecourses.science.psu.edu/stat414/node/280
95
96 */
97
98 namespace
99 {
100 enum class ScRegType {
101 LINEAR,
102 LOGARITHMIC,
103 POWER
104 };
105
106 const char* constRegressionModel[] =
107 {
108 STR_LABEL_LINEAR,
109 STR_LABEL_LOGARITHMIC,
110 STR_LABEL_POWER
111 };
112
113 OUString constTemplateLINEST[] =
114 {
115 "=LINEST(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE% ; %CALC_INTERCEPT% ; TRUE)",
116 "=LINEST(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%) ; %CALC_INTERCEPT% ; TRUE)",
117 "=LINEST(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%) ; %CALC_INTERCEPT% ; TRUE)"
118 };
119
120 OUString constRegressionFormula[] =
121 {
122 "=MMULT(%XDATAMATRIX_RANGE% ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
123 "=MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
124 "=EXP(MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%)"
125 };
126
127 } // end anonymous namespace
128
lcl_GetNumRowsColsInRange(const ScRange & rRange,bool bRows)129 static size_t lcl_GetNumRowsColsInRange(const ScRange& rRange, bool bRows)
130 {
131 if (bRows)
132 return rRange.aEnd.Row() - rRange.aStart.Row() + 1;
133
134 return rRange.aEnd.Col() - rRange.aStart.Col() + 1;
135 }
136
ScRegressionDialog(SfxBindings * pSfxBindings,SfxChildWindow * pChildWindow,weld::Window * pParent,ScViewData & rViewData)137 ScRegressionDialog::ScRegressionDialog(
138 SfxBindings* pSfxBindings, SfxChildWindow* pChildWindow,
139 weld::Window* pParent, ScViewData& rViewData )
140 : ScStatisticsTwoVariableDialog(
141 pSfxBindings, pChildWindow, pParent, rViewData,
142 "modules/scalc/ui/regressiondialog.ui", "RegressionDialog")
143 , mbUnivariate(true)
144 , mnNumIndependentVars(1)
145 , mnNumObservations(0)
146 , mbUse3DAddresses(false)
147 , mbCalcIntercept(true)
148 , mxWithLabelsCheckBox(m_xBuilder->weld_check_button("withlabels-check"))
149 , mxLinearRadioButton(m_xBuilder->weld_radio_button("linear-radio"))
150 , mxLogarithmicRadioButton(m_xBuilder->weld_radio_button("logarithmic-radio"))
151 , mxPowerRadioButton(m_xBuilder->weld_radio_button("power-radio"))
152 , mxErrorMessage(m_xBuilder->weld_label("error-message"))
153 , mxConfidenceLevelField(m_xBuilder->weld_spin_button("confidencelevel-spin"))
154 , mxCalcResidualsCheckBox(m_xBuilder->weld_check_button("calcresiduals-check"))
155 , mxNoInterceptCheckBox(m_xBuilder->weld_check_button("nointercept-check"))
156 {
157 mxWithLabelsCheckBox->connect_toggled(LINK(this, ScRegressionDialog, CheckBoxHdl));
158 mxConfidenceLevelField->connect_value_changed(LINK(this, ScRegressionDialog, NumericFieldHdl));
159 }
160
~ScRegressionDialog()161 ScRegressionDialog::~ScRegressionDialog()
162 {
163 }
164
Close()165 void ScRegressionDialog::Close()
166 {
167 DoClose(ScRegressionDialogWrapper::GetChildWindowId());
168 }
169
GetUndoNameId()170 const char* ScRegressionDialog::GetUndoNameId()
171 {
172 return STR_REGRESSION_UNDO_NAME;
173 }
174
ApplyOutput(ScDocShell * pDocShell)175 ScRange ScRegressionDialog::ApplyOutput(ScDocShell* pDocShell)
176 {
177 AddressWalkerWriter aOutput(mOutputAddress, pDocShell, mDocument,
178 formula::FormulaGrammar::mergeToGrammar( formula::FormulaGrammar::GRAM_ENGLISH, mAddressDetails.eConv));
179 FormulaTemplate aTemplate(&mDocument);
180 aTemplate.autoReplaceUses3D(mbUse3DAddresses);
181 mbCalcIntercept = !mxNoInterceptCheckBox->get_active();
182
183 // max col of our output should account for
184 // 1. constant term column,
185 // 2. mnNumIndependentVars columns
186 // 3. Actual Y column
187 // 4. Predicted Y column
188 // 5. Residual Column
189 SCCOL nOutputMaxCol = mOutputAddress.Col() + mnNumIndependentVars + 3;
190
191 ScRange aXDataRange(GetDataRange(mVariable1Range));
192 ScRange aYDataRange(GetDataRange(mVariable2Range));
193
194 aTemplate.autoReplaceRange("%VARIABLE1_RANGE%", aXDataRange);
195 aTemplate.autoReplaceRange("%VARIABLE2_RANGE%", aYDataRange);
196 size_t nRegressionIndex = GetRegressionTypeIndex();
197 ScRegType eRegType = static_cast<ScRegType>(nRegressionIndex);
198 bool bTakeLogX = eRegType == ScRegType::LOGARITHMIC || eRegType == ScRegType::POWER;
199
200 WriteRawRegressionResults(aOutput, aTemplate, nRegressionIndex);
201 WriteRegressionStatistics(aOutput, aTemplate);
202 WriteRegressionANOVAResults(aOutput, aTemplate);
203 WriteRegressionEstimatesWithCI(aOutput, aTemplate, bTakeLogX);
204 if (mxCalcResidualsCheckBox->get_active())
205 WritePredictionsWithResiduals(aOutput, aTemplate, nRegressionIndex);
206
207 ScAddress aMaxAddress(aOutput.mMaximumAddress);
208 aMaxAddress.SetCol(std::max(aMaxAddress.Col(), nOutputMaxCol));
209 return ScRange(aOutput.mMinimumAddress, aMaxAddress);
210 }
211
InputRangesValid()212 bool ScRegressionDialog::InputRangesValid()
213 {
214 if (!mVariable1Range.IsValid())
215 {
216 mxErrorMessage->set_label(ScResId(STR_MESSAGE_XINVALID_RANGE));
217 return false;
218 }
219
220 if (!mVariable2Range.IsValid())
221 {
222 mxErrorMessage->set_label(ScResId(STR_MESSAGE_YINVALID_RANGE));
223 return false;
224 }
225
226 if (!mOutputAddress.IsValid())
227 {
228 mxErrorMessage->set_label(ScResId(STR_MESSAGE_INVALID_OUTPUT_ADDR));
229 return false;
230 }
231
232 {
233 double fConfidenceLevel = mxConfidenceLevelField->get_value();
234 if ( fConfidenceLevel <= 0.0 || fConfidenceLevel >= 100.0 )
235 {
236 mxErrorMessage->set_label(ScResId(STR_MESSAGE_INVALID_CONFIDENCE_LEVEL));
237 return false;
238 }
239 }
240
241 mVariable1Range.PutInOrder();
242 mVariable2Range.PutInOrder();
243
244 bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
245
246 bool bYHasSingleDim = (
247 (bGroupedByColumn &&
248 mVariable2Range.aStart.Col() == mVariable2Range.aEnd.Col()) ||
249 (!bGroupedByColumn &&
250 mVariable2Range.aStart.Row() == mVariable2Range.aEnd.Row()));
251
252 if (!bYHasSingleDim)
253 {
254 if (bGroupedByColumn)
255 mxErrorMessage->set_label(ScResId(STR_MESSAGE_YVARIABLE_MULTI_COLUMN));
256 else
257 mxErrorMessage->set_label(ScResId(STR_MESSAGE_YVARIABLE_MULTI_ROW));
258 return false;
259 }
260
261 bool bWithLabels = mxWithLabelsCheckBox->get_active();
262
263 size_t nYObs = lcl_GetNumRowsColsInRange(mVariable2Range, bGroupedByColumn);
264 size_t nNumXVars = lcl_GetNumRowsColsInRange(mVariable1Range, !bGroupedByColumn);
265 mbUnivariate = nNumXVars == 1;
266 // Observation count mismatch check
267 if (lcl_GetNumRowsColsInRange(mVariable1Range, bGroupedByColumn) != nYObs)
268 {
269 if (mbUnivariate)
270 mxErrorMessage->set_label(ScResId(STR_MESSAGE_UNIVARIATE_NUMOBS_MISMATCH));
271 else
272 mxErrorMessage->set_label(ScResId(STR_MESSAGE_MULTIVARIATE_NUMOBS_MISMATCH));
273 return false;
274 }
275
276 mnNumIndependentVars = nNumXVars;
277 mnNumObservations = bWithLabels ? nYObs - 1 : nYObs;
278
279 mbUse3DAddresses = mVariable1Range.aStart.Tab() != mOutputAddress.Tab() ||
280 mVariable2Range.aStart.Tab() != mOutputAddress.Tab();
281
282 mxErrorMessage->set_label("");
283
284 return true;
285 }
286
GetRegressionTypeIndex() const287 size_t ScRegressionDialog::GetRegressionTypeIndex() const
288 {
289 if (mxLinearRadioButton->get_active())
290 return 0;
291 if (mxLogarithmicRadioButton->get_active())
292 return 1;
293 return 2;
294 }
295
GetDataRange(const ScRange & rRange)296 ScRange ScRegressionDialog::GetDataRange(const ScRange& rRange)
297 {
298 if (!mxWithLabelsCheckBox->get_active())
299 return rRange;
300
301 ScRange aDataRange(rRange);
302 if (mGroupedBy == BY_COLUMN)
303 aDataRange.aStart.IncRow(1);
304 else
305 aDataRange.aStart.IncCol(1);
306
307 return aDataRange;
308 }
309
GetVariableNameFormula(bool bXVar,size_t nIndex,bool bWithLog)310 OUString ScRegressionDialog::GetVariableNameFormula(bool bXVar, size_t nIndex, bool bWithLog)
311 {
312 if (bXVar && nIndex == 0)
313 return "=\"" + ScResId(STR_LABEL_INTERCEPT) + "\"";
314
315 if (mxWithLabelsCheckBox->get_active())
316 {
317 ScAddress aAddr(bXVar ? mVariable1Range.aStart : mVariable2Range.aStart);
318 if (mGroupedBy == BY_COLUMN)
319 aAddr.IncCol(nIndex - 1);
320 else
321 aAddr.IncRow(nIndex - 1);
322
323 ScRefFlags eAddrFlag = mbUse3DAddresses ? ScRefFlags::ADDR_ABS_3D : ScRefFlags::ADDR_ABS;
324 return bWithLog ? OUString("=CONCAT(\"LN(\";" +
325 aAddr.Format(eAddrFlag, &mDocument, mDocument.GetAddressConvention()) + ";\")\")") :
326 OUString("=" + aAddr.Format(eAddrFlag, &mDocument, mDocument.GetAddressConvention()));
327 }
328
329 OUString aDefaultVarName;
330
331 if (bXVar)
332 aDefaultVarName = "X" + OUString::number(nIndex);
333 else
334 aDefaultVarName = "Y";
335
336 return bWithLog ? OUString("=\"LN(" + aDefaultVarName + ")\"") :
337 OUString("=\"" + aDefaultVarName + "\"");
338 }
339
GetXVariableNameFormula(size_t nIndex,bool bWithLog)340 OUString ScRegressionDialog::GetXVariableNameFormula(size_t nIndex, bool bWithLog)
341 {
342 assert(nIndex <= mnNumIndependentVars);
343 return GetVariableNameFormula(true, nIndex, bWithLog);
344 }
345
GetYVariableNameFormula(bool bWithLog)346 OUString ScRegressionDialog::GetYVariableNameFormula(bool bWithLog)
347 {
348 return GetVariableNameFormula(false, 1, bWithLog);
349 }
350
WriteRawRegressionResults(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,size_t nRegressionIndex)351 void ScRegressionDialog::WriteRawRegressionResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
352 size_t nRegressionIndex)
353 {
354 rOutput.writeBoldString(ScResId(STR_REGRESSION));
355 rOutput.newLine();
356 // REGRESSION MODEL
357 rOutput.writeString(ScResId(STR_LABEL_REGRESSION_MODEL));
358 rOutput.nextColumn();
359 rOutput.writeString(ScResId(constRegressionModel[nRegressionIndex]));
360 rOutput.newLine();
361 rOutput.newLine();
362
363 rOutput.writeString(ScResId(STR_LINEST_RAW_OUTPUT_TITLE));
364 rOutput.newLine();
365 rOutput.push();
366
367 rTemplate.setTemplate(constTemplateLINEST[nRegressionIndex].
368 replaceFirst("%CALC_INTERCEPT%",
369 mbCalcIntercept ? std::u16string_view(u"TRUE") : std::u16string_view(u"FALSE")));
370 rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1 + mnNumIndependentVars, 5);
371 // Add LINEST result components to template
372 // 1. Add ranges for coefficients and standard errors for indep. vars and the intercept.
373 // Note that these two are in the reverse order(m_n, m_n-1, ..., m_1, b) w.r.t what we expect.
374 rTemplate.autoReplaceRange("%COEFFICIENTS_REV_RANGE%", ScRange(rOutput.current(), rOutput.current(mnNumIndependentVars)));
375 rTemplate.autoReplaceRange("%SERRORSX_REV_RANGE%", ScRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars, 1)));
376
377 // 2. Add R-squared and standard error for y estimate.
378 rTemplate.autoReplaceAddress("%RSQUARED_ADDR%", rOutput.current(0, 2));
379 rTemplate.autoReplaceAddress("%SERRORY_ADDR%", rOutput.current(1, 2));
380
381 // 3. Add F statistic and degrees of freedom
382 rTemplate.autoReplaceAddress("%FSTATISTIC_ADDR%", rOutput.current(0, 3));
383 rTemplate.autoReplaceAddress("%DoFRESID_ADDR%", rOutput.current(1, 3));
384
385 // 4. Add regression sum of squares and residual sum of squares
386 rTemplate.autoReplaceAddress("%SSREG_ADDR%", rOutput.current(0, 4));
387 rTemplate.autoReplaceAddress("%SSRESID_ADDR%", rOutput.current(1, 4));
388
389 rOutput.push(0, 4);
390 rOutput.newLine();
391 }
392
WriteRegressionStatistics(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate)393 void ScRegressionDialog::WriteRegressionStatistics(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate)
394 {
395 rOutput.newLine();
396 rOutput.writeString(ScResId(STR_LABEL_REGRESSION_STATISTICS));
397 rOutput.newLine();
398
399 const char* aMeasureNames[] =
400 {
401 STR_LABEL_RSQUARED,
402 STRID_CALC_STD_ERROR,
403 STR_LABEL_XVARIABLES_COUNT,
404 STR_OBSERVATIONS_LABEL,
405 STR_LABEL_ADJUSTED_RSQUARED
406 };
407
408 OUString aMeasureFormulas[] =
409 {
410 "=%RSQUARED_ADDR%",
411 "=%SERRORY_ADDR%",
412 "=" + OUString::number(mnNumIndependentVars),
413 "=" + OUString::number(mnNumObservations),
414 OUString::Concat(
415 "=1 - (1 - %RSQUARED_ADDR%)*(%NUMOBS_ADDR% - 1)/(%NUMOBS_ADDR% - %NUMXVARS_ADDR%") +
416 (mbCalcIntercept ? std::u16string_view(u" - 1)") : std::u16string_view(u")"))
417 };
418
419 rTemplate.autoReplaceAddress("%NUMXVARS_ADDR%", rOutput.current(1, 2));
420 rTemplate.autoReplaceAddress("%NUMOBS_ADDR%", rOutput.current(1, 3));
421
422 for (size_t nIdx = 0; nIdx < SAL_N_ELEMENTS(aMeasureNames); ++nIdx)
423 {
424 rOutput.writeString(ScResId(aMeasureNames[nIdx]));
425 rOutput.nextColumn();
426 rTemplate.setTemplate(aMeasureFormulas[nIdx]);
427 rOutput.writeFormula(rTemplate.getTemplate());
428 rOutput.newLine();
429 }
430 }
431
WriteRegressionANOVAResults(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate)432 void ScRegressionDialog::WriteRegressionANOVAResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate)
433 {
434 rOutput.newLine();
435 rOutput.writeString(ScResId(STR_LABEL_ANOVA));
436 rOutput.newLine();
437
438 const size_t nColsInTable = 6;
439 const size_t nRowsInTable = 4;
440 OUString aTable[nRowsInTable][nColsInTable] =
441 {
442 {
443 "",
444 ScResId(STR_ANOVA_LABEL_DF),
445 ScResId(STR_ANOVA_LABEL_SS),
446 ScResId(STR_ANOVA_LABEL_MS),
447 ScResId(STR_ANOVA_LABEL_F),
448 ScResId(STR_ANOVA_LABEL_SIGNIFICANCE_F)
449 },
450 {
451 ScResId(STR_REGRESSION),
452 "=%NUMXVARS_ADDR%",
453 "=%SSREG_ADDR%",
454 "=%SSREG_ADDR% / %DoFREG_ADDR%",
455 "=%FSTATISTIC_ADDR%",
456 "=FDIST(%FSTATISTIC_ADDR% ; %DoFREG_ADDR% ; %DoFRESID_ADDR%)"
457 },
458 {
459 ScResId(STR_LABEL_RESIDUAL),
460 "=%DoFRESID_ADDR%",
461 "=%SSRESID_ADDR%",
462 "=%SSRESID_ADDR% / %DoFRESID_ADDR%",
463 "",
464 ""
465 },
466 {
467 ScResId(STR_ANOVA_LABEL_TOTAL),
468 "=%DoFREG_ADDR% + %DoFRESID_ADDR%",
469 "=%SSREG_ADDR% + %SSRESID_ADDR%",
470 "",
471 "",
472 ""
473 }
474 };
475
476 rTemplate.autoReplaceAddress("%DoFREG_ADDR%", rOutput.current(1, 1));
477
478 // Cell getter lambda
479 std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
480 {
481 return aTable[nRowIdx][nColIdx];
482 };
483
484 // Cell writer lambda
485 std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate]
486 (const OUString& rContent, size_t /*nRowIdx*/, size_t /*nColIdx*/)
487 {
488 if (!rContent.isEmpty())
489 {
490 if (rContent.startsWith("="))
491 {
492 rTemplate.setTemplate(rContent);
493 rOutput.writeFormula(rTemplate.getTemplate());
494 }
495 else
496 rOutput.writeString(rContent);
497 }
498 };
499
500 WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
501
502 // User given confidence level
503 rOutput.newLine();
504 rOutput.writeString(ScResId(STR_LABEL_CONFIDENCE_LEVEL));
505 rOutput.nextColumn();
506 rOutput.writeValue(mxConfidenceLevelField->get_value() / 100.0);
507 rTemplate.autoReplaceAddress("%CONFIDENCE_LEVEL_ADDR%", rOutput.current());
508 rOutput.newLine();
509 }
510
511 // Write slopes, intercept, their standard errors, t-statistics, p-value, confidence intervals
WriteRegressionEstimatesWithCI(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,bool bTakeLogX)512 void ScRegressionDialog::WriteRegressionEstimatesWithCI(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
513 bool bTakeLogX)
514 {
515 rOutput.newLine();
516 ScAddress aEnd( rOutput.current(0, 1 + mnNumIndependentVars));
517 ScRefFlags eAddrFlag = mbUse3DAddresses ? ScRefFlags::ADDR_ABS_3D : ScRefFlags::ADDR_ABS;
518 aEnd.IncCol();
519 const OUString aCoeffAddr( aEnd.Format( eAddrFlag, &mDocument, mDocument.GetAddressConvention()));
520 aEnd.IncCol();
521 const OUString aStErrAddr( aEnd.Format( eAddrFlag, &mDocument, mDocument.GetAddressConvention()));
522
523 // Coefficients & Std.Errors ranges (column vectors) in this table (yet to populate).
524 rTemplate.autoReplaceRange("%COEFFICIENTS_RANGE%",
525 ScRange(rOutput.current(1, 1),
526 rOutput.current(1, 1 + mnNumIndependentVars)));
527 rTemplate.autoReplaceRange("%SLOPES_RANGE%", // Excludes the intercept
528 ScRange(rOutput.current(1, 2),
529 rOutput.current(1, 1 + mnNumIndependentVars)));
530 rTemplate.autoReplaceAddress("%INTERCEPT_ADDR%", rOutput.current(1, 1));
531 rTemplate.autoReplaceRange("%SERRORSX_RANGE%",
532 ScRange(rOutput.current(2, 1),
533 rOutput.current(2, 1 + mnNumIndependentVars)));
534 // t-Statistics range in this table (yet to populate)
535 rTemplate.autoReplaceRange("%TSTAT_RANGE%",
536 ScRange(rOutput.current(3, 1),
537 rOutput.current(3, 1 + mnNumIndependentVars)));
538
539 const size_t nColsInTable = 7;
540 const size_t nRowsInTable = 2;
541 OUString aTable[nRowsInTable][nColsInTable] =
542 {
543 {
544 "",
545 ScResId(STR_LABEL_COEFFICIENTS),
546 ScResId(STRID_CALC_STD_ERROR),
547 ScResId(STR_LABEL_TSTATISTIC),
548 ScResId(STR_P_VALUE_LABEL),
549
550 "=CONCAT(\"" + ScResId(STR_LABEL_LOWER) +
551 " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
552
553 "=CONCAT(\"" + ScResId(STR_LABEL_UPPER) +
554 " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
555 },
556
557 // Following are matrix formulas of size numcols = 1, numrows = (mnNumIndependentVars + 1)
558 {
559 "",
560 // This puts the coefficients in the reverse order compared to that in LINEST output.
561 "=INDEX(%COEFFICIENTS_REV_RANGE%; 1 ; ROW(" + aCoeffAddr + ")+1 - ROW())",
562 // This puts the standard errors in the reverse order compared to that in LINEST output.
563 "=INDEX(%SERRORSX_REV_RANGE%; 1 ; ROW(" + aStErrAddr + ")+1 - ROW())",
564 // t-Statistic
565 "=%COEFFICIENTS_RANGE% / %SERRORSX_RANGE%",
566 // p-Value
567 "=TDIST(ABS(%TSTAT_RANGE%) ; %DoFRESID_ADDR% ; 2 )",
568 // Lower limit of confidence interval
569 "=%COEFFICIENTS_RANGE% - %SERRORSX_RANGE% * "
570 "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)",
571 // Upper limit of confidence interval
572 "=%COEFFICIENTS_RANGE% + %SERRORSX_RANGE% * "
573 "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)"
574 }
575 };
576
577 // Cell getter lambda
578 std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
579 {
580 return aTable[nRowIdx][nColIdx];
581 };
582
583 // Cell writer lambda
584 size_t nNumIndependentVars = mnNumIndependentVars;
585 std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate, nNumIndependentVars]
586 (const OUString& rContent, size_t nRowIdx, size_t /*nColIdx*/)
587 {
588 if (!rContent.isEmpty())
589 {
590 if (rContent.startsWith("="))
591 {
592 rTemplate.setTemplate(rContent);
593 if (nRowIdx == 0)
594 rOutput.writeFormula(rTemplate.getTemplate());
595 else
596 rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, 1 + nNumIndependentVars);
597 }
598 else
599 rOutput.writeString(rContent);
600 }
601 };
602
603 WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
604
605 // Go back to the second row and first column of the table to
606 // fill the names of variables + intercept
607 rOutput.push(0, -1);
608
609 for (size_t nXvarIdx = 0; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
610 {
611 rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, bTakeLogX));
612 rOutput.newLine();
613 }
614
615 }
616
617 // Re-write all observations in group-by column mode with predictions and residuals
WritePredictionsWithResiduals(AddressWalkerWriter & rOutput,FormulaTemplate & rTemplate,size_t nRegressionIndex)618 void ScRegressionDialog::WritePredictionsWithResiduals(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
619 size_t nRegressionIndex)
620 {
621 bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
622 rOutput.newLine();
623 rOutput.push();
624
625 // Range of X variables with rows as observations and columns as variables.
626 ScRange aDataMatrixRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars - 1, mnNumObservations));
627 rTemplate.autoReplaceRange("%XDATAMATRIX_RANGE%", aDataMatrixRange);
628
629 // Write X variable names
630 for (size_t nXvarIdx = 1; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
631 {
632 // Here we write the X variables without any transformation(LN)
633 rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, false));
634 rOutput.nextColumn();
635 }
636 rOutput.reset();
637
638 // Write the X data matrix
639 rOutput.nextRow();
640 OUString aDataMatrixFormula = bGroupedByColumn ? OUString("=%VARIABLE1_RANGE%") : OUString("=TRANSPOSE(%VARIABLE1_RANGE%)");
641 rTemplate.setTemplate(aDataMatrixFormula);
642 rOutput.writeMatrixFormula(rTemplate.getTemplate(), mnNumIndependentVars, mnNumObservations);
643
644 // Write predicted values
645 rOutput.push(mnNumIndependentVars, -1);
646 rOutput.writeString(ScResId(STR_LABEL_PREDICTEDY));
647 rOutput.nextRow();
648 rTemplate.setTemplate(constRegressionFormula[nRegressionIndex]);
649 rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
650 rTemplate.autoReplaceRange("%PREDICTEDY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
651
652 // Write actual Y
653 rOutput.push(1, -1);
654 rOutput.writeFormula(GetYVariableNameFormula(false));
655 rOutput.nextRow();
656 OUString aYVectorFormula = bGroupedByColumn ? OUString("=%VARIABLE2_RANGE%") : OUString("=TRANSPOSE(%VARIABLE2_RANGE%)");
657 rTemplate.setTemplate(aYVectorFormula);
658 rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
659 rTemplate.autoReplaceRange("%ACTUALY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
660
661 // Write residual
662 rOutput.push(1, -1);
663 rOutput.writeString(ScResId(STR_LABEL_RESIDUAL));
664 rOutput.nextRow();
665 rTemplate.setTemplate("=%ACTUALY_RANGE% - %PREDICTEDY_RANGE%");
666 rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
667 }
668
669 // Generic table writer
WriteTable(const std::function<CellValueGetter> & rCellGetter,size_t nRowsInTable,size_t nColsInTable,AddressWalkerWriter & rOutput,const std::function<CellWriter> & rFunc)670 void ScRegressionDialog::WriteTable(const std::function<CellValueGetter>& rCellGetter,
671 size_t nRowsInTable, size_t nColsInTable,
672 AddressWalkerWriter& rOutput,
673 const std::function<CellWriter>& rFunc)
674 {
675 for (size_t nRowIdx = 0; nRowIdx < nRowsInTable; ++nRowIdx)
676 {
677 for (size_t nColIdx = 0; nColIdx < nColsInTable; ++nColIdx)
678 {
679 rFunc(rCellGetter(nRowIdx, nColIdx), nRowIdx, nColIdx);
680 rOutput.nextColumn();
681 }
682 rOutput.newLine();
683 }
684 }
685
IMPL_LINK_NOARG(ScRegressionDialog,CheckBoxHdl,weld::Toggleable &,void)686 IMPL_LINK_NOARG(ScRegressionDialog, CheckBoxHdl, weld::Toggleable&, void)
687 {
688 ValidateDialogInput();
689 }
690
IMPL_LINK_NOARG(ScRegressionDialog,NumericFieldHdl,weld::SpinButton &,void)691 IMPL_LINK_NOARG(ScRegressionDialog, NumericFieldHdl, weld::SpinButton&, void)
692 {
693 ValidateDialogInput();
694 }
695
696 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
697