1 /* Discriminant.cpp
2  *
3  * Copyright (C) 1993-2019 David Weenink
4  *
5  * This code is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or (at
8  * your option) any later version.
9  *
10  * This code is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this work. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /*
20  djmw 20011016 removed some causes for compiler warnings
21  djmw 20020313 removed obsolete TableOfReal_sortByLabels method
22  djmw 20020314 +Discriminant_extractWithinGroupSSCP,
23  	+Discriminant_extractGroupLabels, +Discriminant_setGroupLabels.
24  djmw 20020327 modified Discriminant_TableOfReal_to_Configuration
25  djmw 20020418 Removed some causes for compiler warnings
26  djmw 20020502 modified call Eigen_TableOfReal_project_into
27  djmw 20030801 Discriminant_drawConcentrationEllipses extra argument
28  djmw 20050405 Modified column label: eigenvector->Eigenvector
29  djmw 20061212 Changed info to Melder_writeLine<x> format.
30  djmw 20071009 wchar
31  djmw 20071012 Added: oo_CAN_WRITE_AS_ENCODING.h
32  djmw 20071201 Melder_warning<n>
33  djmw 20081119 Check in TableOfReal_to_Discriminant if TableOfReal_areAllCellsDefined
34  djmw 20100107 +Discriminant_TableOfReal_mahalanobis
35  djmw 20110304 Thing_new
36 */
37 
38 #include "Discriminant.h"
39 #include "SSCP.h"
40 #include "Eigen_and_SSCP.h"
41 #include "NUM2.h"
42 #include "TableOfReal_extensions.h"
43 
44 #include "oo_DESTROY.h"
45 #include "Discriminant_def.h"
46 #include "oo_COPY.h"
47 #include "Discriminant_def.h"
48 #include "oo_EQUAL.h"
49 #include "Discriminant_def.h"
50 #include "oo_CAN_WRITE_AS_ENCODING.h"
51 #include "Discriminant_def.h"
52 #include "oo_WRITE_TEXT.h"
53 #include "Discriminant_def.h"
54 #include "oo_READ_TEXT.h"
55 #include "Discriminant_def.h"
56 #include "oo_WRITE_BINARY.h"
57 #include "Discriminant_def.h"
58 #include "oo_READ_BINARY.h"
59 #include "Discriminant_def.h"
60 #include "oo_DESCRIPTION.h"
61 #include "Discriminant_def.h"
62 
63 Thing_implement (Discriminant, Daata, 1);
64 
v_info()65 void structDiscriminant :: v_info () {
66 	structDaata :: v_info ();
67 	MelderInfo_writeLine (U"Number of groups: ", numberOfGroups);
68 	MelderInfo_writeLine (U"Number of eigenvalues: ", eigen -> numberOfEigenvalues);
69 	MelderInfo_writeLine (U"Dimension of eigenvector: ", eigen -> dimension);
70 	MelderInfo_writeLine (U"Number of discriminant functions: ", Discriminant_getNumberOfFunctions (this));
71 	MelderInfo_writeLine (U"Number of observations (total): ", Discriminant_getNumberOfObservations (this, 0));
72 }
73 
Discriminant_create(integer numberOfGroups,integer numberOfEigenvalues,integer dimension)74 autoDiscriminant Discriminant_create (integer numberOfGroups, integer numberOfEigenvalues, integer dimension) {
75 	try {
76 		autoDiscriminant me = Thing_new (Discriminant);
77 		my eigen = Eigen_create (numberOfEigenvalues, dimension);
78 		my numberOfGroups = numberOfGroups;
79 		my groups = SSCPList_create ();
80 		my total = SSCP_create (dimension);
81 		my aprioriProbabilities = raw_VEC (numberOfGroups);
82 		my costs = raw_MAT (numberOfGroups, numberOfGroups);
83 		return me;
84 	} catch (MelderError) {
85 		Melder_throw (U"Discriminant not created.");
86 	}
87 }
88 
Discriminant_groupLabelToIndex(Discriminant me,conststring32 label)89 integer Discriminant_groupLabelToIndex (Discriminant me, conststring32 label) {
90 	for (integer i = 1; i <= my numberOfGroups; i ++) {
91 		const conststring32 name = Thing_getName (my groups -> at [i]);
92 		if (name && str32equ (name, label))
93 			return i;
94 	}
95 	return 0;
96 }
97 
Discriminant_getNumberOfGroups(Discriminant me)98 integer Discriminant_getNumberOfGroups (Discriminant me) {
99 	return my numberOfGroups;
100 }
101 
Discriminant_getNumberOfObservations(Discriminant me,integer group)102 integer Discriminant_getNumberOfObservations (Discriminant me, integer group) {
103 	if (group == 0) {
104 		return Melder_ifloor (my total -> numberOfObservations);
105 	} else if (group >= 1 && group <= my numberOfGroups) {
106 		SSCP sscp = my groups->at [group];
107 		return Melder_ifloor (sscp -> numberOfObservations);
108 	} else {
109 		return -1;
110 	}
111 }
112 
Discriminant_setAprioriProbability(Discriminant me,integer group,double p)113 void Discriminant_setAprioriProbability (Discriminant me, integer group, double p) {
114 	Melder_require (group > 0 && group <= my numberOfGroups,
115 		U"The group number (", group, U") should be in the interval [1, ", my numberOfGroups, U"]; the supplied value (", group, U") falls outside it.");
116 	Melder_require (p >= 0.0 && p <= 1.0,
117 		U"The probability should be in the interval [0, 1]");
118 
119 	my aprioriProbabilities [group] = p;
120 }
121 
Discriminant_getNumberOfFunctions(Discriminant me)122 integer Discriminant_getNumberOfFunctions (Discriminant me) {
123 	integer numberOfFunctions = std::min (my numberOfGroups - 1, my eigen -> dimension);
124 	numberOfFunctions = std::min (numberOfFunctions, my eigen ->  numberOfEigenvalues);
125 	return numberOfFunctions;
126 }
127 
Discriminant_setGroupLabels(Discriminant me,Strings thee)128 void Discriminant_setGroupLabels (Discriminant me, Strings thee) {
129 	Melder_require (my numberOfGroups == thy numberOfStrings,
130 		U"The number of strings should equal the number of groups.");
131 	for (integer i = 1; i <= my numberOfGroups; i ++) {
132 		const conststring32 name = thy strings [i].get();
133 		Thing_setName ( my groups->at [i], name ? name : U"" );
134 	}
135 }
136 
Discriminant_extractGroupLabels(Discriminant me)137 autoStrings Discriminant_extractGroupLabels (Discriminant me) {
138 	try {
139 		autoStrings thee = Thing_new (Strings);
140 		thy strings = autoSTRVEC (my numberOfGroups);
141 		thy numberOfStrings = my numberOfGroups;
142 		for (integer i = 1; i <= my numberOfGroups; i ++) {
143 			const conststring32 name = Thing_getName (my groups->at [i]);
144 			thy strings [i] = Melder_dup (name);
145 		}
146 		return thee;
147 	} catch (MelderError) {
148 		Melder_throw (me, U": group labels not extracted.");
149 	}
150 }
151 
Discriminant_extractGroupCentroids(Discriminant me)152 autoTableOfReal Discriminant_extractGroupCentroids (Discriminant me) {
153 	try {
154 		autoTableOfReal thee = TableOfReal_create (my groups -> size, my eigen -> dimension);
155 
156 		for (integer i = 1; i <= my groups -> size; i ++) {
157 			const SSCP sscp = my groups->at [i];
158 			TableOfReal_setRowLabel (thee.get(), i, Thing_getName (sscp));
159 			thy data.row (i)  <<=  sscp -> centroid.all();
160 		}
161 		thy columnLabels.all()  <<=  my groups->at [my groups -> size] -> columnLabels.part (1, my eigen -> dimension);
162 		// The elements in my groups always have my eigen -> dimension columns
163 		return thee;
164 	} catch (MelderError) {
165 		Melder_throw (me, U": group centroids not extracted.");
166 	}
167 }
168 
Discriminant_extractGroupStandardDeviations(Discriminant me)169 autoTableOfReal Discriminant_extractGroupStandardDeviations (Discriminant me) {
170 	try {
171 		autoTableOfReal thee = TableOfReal_create (my groups->size, my eigen -> dimension);
172 
173 		for (integer i = 1; i <= my groups->size; i ++) {
174 			const SSCP sscp = my groups->at [i];
175 			TableOfReal_setRowLabel (thee.get(), i, Thing_getName (sscp));
176 			const integer numberOfObservationsm1 = Melder_ifloor (sscp -> numberOfObservations) - 1;
177 			for (integer j = 1; j <= my eigen -> dimension; j ++) {
178 				thy data [i] [j] = ( numberOfObservationsm1 > 0.0 ? sqrt (sscp -> data [j] [j] / numberOfObservationsm1) : undefined );
179 			}
180 		}
181 		thy columnLabels.all()  <<=  my groups->at [my groups->size] -> columnLabels.part (1, my eigen -> dimension);
182 		// The elements in my groups always have my eigen -> dimension columns
183 
184 		return thee;
185 	} catch (MelderError) {
186 		Melder_throw (me, U": group standard deviations not extracted.");
187 	}
188 }
189 
Discriminant_getWilksLambda(Discriminant me,integer from)190 double Discriminant_getWilksLambda (Discriminant me, integer from) {
191 	integer numberOfFunctions = Discriminant_getNumberOfFunctions (me);
192 	if (from >= numberOfFunctions)
193 		return 1;
194 	if (from < 1)
195 		from = 1;
196 	return NUMwilksLambda (my eigen -> eigenvalues.get(), 1 + from, numberOfFunctions);
197 }
198 
199 /*
200 	raw r [j]: eigenvec [i] [j]
201 	unstandardized u [j]: sqrt(N-g) * r [j]
202 	standardized s [j]: u [j] sqrt (w [i] [i] / (N-g))
203 */
Discriminant_extractCoefficients(Discriminant me,integer choice)204 autoTableOfReal Discriminant_extractCoefficients (Discriminant me, integer choice) {
205 	try {
206 		bool raw = choice == 0, standardized = choice == 2;
207 		const integer nx = my eigen -> dimension, ny = my eigen -> numberOfEigenvalues;
208 
209 		const SSCP total = my total.get();
210 		autoTableOfReal thee = TableOfReal_create (ny, nx + 1);
211 		thy columnLabels.part (1, nx)  <<=  my total -> columnLabels.part (1, nx);
212 		// The elements in my groups always have my eigen -> dimension columns
213 
214 		autoSSCP within;
215 		if (standardized)
216 			within = Discriminant_extractPooledWithinGroupsSSCP (me);
217 
218 		TableOfReal_setColumnLabel (thee.get(), nx + 1, U"constant");
219 		TableOfReal_setSequentialRowLabels (thee.get(), 1, ny, U"function_", 1, 1);
220 
221 		double scale = sqrt (total -> numberOfObservations - my numberOfGroups);
222 		//double *centroid = my total -> centroid.at;
223 		for (integer i = 1; i <= ny; i ++) {
224 			longdouble u0 = 0.0;
225 			for (integer j = 1; j <= nx; j ++) {
226 				if (standardized)
227 					scale = sqrt (within -> data [j] [j]);
228 				const double ui = scale * my eigen -> eigenvectors [i] [j];
229 				thy data [i] [j] = ui;
230 				u0 += ui * my total -> centroid [j];
231 			}
232 			thy data [i] [nx + 1] = ( raw ? 0.0 : - (double) u0 );
233 		}
234 		return thee;
235 	} catch (MelderError) {
236 		Melder_throw (me, U": coefficients not extracted.");
237 	}
238 }
239 
Discriminant_getDegreesOfFreedom(Discriminant me)240 static double Discriminant_getDegreesOfFreedom (Discriminant me) {
241 	double ndf = 0.0;
242 	for (integer i = 1; i <= my groups->size; i ++) {
243 		ndf += SSCP_getDegreesOfFreedom (my groups->at [i]);
244 	}
245 	return ndf;
246 }
247 
Discriminant_getPartialDiscriminationProbability(Discriminant me,integer numberOfWantedDimensions,double * out_prob,double * out_chisq,double * out_df)248 void Discriminant_getPartialDiscriminationProbability (Discriminant me, integer numberOfWantedDimensions, double *out_prob, double *out_chisq, double *out_df)
249 {
250 	const integer eigendimension = my eigen -> dimension, numberOfGroups = my numberOfGroups;
251 	const integer numberOfFunctions = Discriminant_getNumberOfFunctions (me);
252 	const double degreesOfFreedom = Discriminant_getDegreesOfFreedom (me);
253 
254 	double prob = undefined, chisq = undefined, df = undefined;
255 
256 	if (numberOfWantedDimensions < numberOfFunctions) {
257 		const double lambda = NUMwilksLambda (my eigen -> eigenvalues.get(), numberOfWantedDimensions + 1, numberOfFunctions);
258 		if (lambda != 1.0) {
259 			chisq = - (degreesOfFreedom + (numberOfGroups - eigendimension) / 2.0 - 1.0) * log (lambda);
260 			df = (eigendimension - numberOfWantedDimensions) * (numberOfGroups - numberOfWantedDimensions - 1);
261 			if (out_prob)
262 				prob =  NUMchiSquareQ (chisq, df);
263 		}
264 	}
265 	if (out_prob)
266 		*out_prob = prob;
267 	if (out_chisq)
268 		*out_chisq = chisq;
269 	if (out_df)
270 		*out_df = df;
271 }
272 
Discriminant_getConcentrationEllipseArea(Discriminant me,integer groupNumber,double scale,bool confidence,bool discriminantDirections,integer d1,integer d2)273 double Discriminant_getConcentrationEllipseArea (Discriminant me, integer groupNumber, double scale, bool confidence, bool discriminantDirections, integer d1, integer d2) {
274 	double area = undefined;
275 
276 	if (groupNumber < 1 || groupNumber > my numberOfGroups)
277 		return area;
278 
279 	if (discriminantDirections) {
280 		autoSSCP thee = Eigen_SSCP_project (my eigen.get(), my groups->at [groupNumber]);
281 		area = SSCP_getConcentrationEllipseArea (thee.get(), scale, confidence, d1, d2);
282 	} else {
283 		area = SSCP_getConcentrationEllipseArea (my groups->at [groupNumber], scale, confidence, d1, d2);
284 	}
285 	return area;
286 }
287 
Discriminant_getLnDeterminant_group(Discriminant me,integer groupNumber)288 double Discriminant_getLnDeterminant_group (Discriminant me, integer groupNumber) {
289 	if (groupNumber < 1 || groupNumber > my numberOfGroups)
290 		return undefined;
291 
292 	autoCovariance c = SSCP_to_Covariance (my groups->at [groupNumber], 1);
293 	const double ln_d = SSCP_getLnDeterminant (c.get());
294 	return ln_d;
295 }
296 
Discriminant_getLnDeterminant_total(Discriminant me)297 double Discriminant_getLnDeterminant_total (Discriminant me) {
298 	autoCovariance c = SSCP_to_Covariance (my total.get(), 1);
299 	const double ln_d = SSCP_getLnDeterminant (c.get());
300 	return ln_d;
301 }
302 
Discriminant_extractPooledWithinGroupsSSCP(Discriminant me)303 autoSSCP Discriminant_extractPooledWithinGroupsSSCP (Discriminant me) {
304 	return SSCPList_to_SSCP_pool (my groups.get());
305 }
306 
Discriminant_extractWithinGroupSSCP(Discriminant me,integer groupNumber)307 autoSSCP Discriminant_extractWithinGroupSSCP (Discriminant me, integer groupNumber) {
308 	try {
309 		Melder_require (groupNumber >= 1 && groupNumber <= my numberOfGroups,
310 			U"The group number should be between 1 and ", my numberOfGroups, U".");
311 		autoSSCP thee = Data_copy (my groups->at [groupNumber]);
312 		return thee;
313 	} catch (MelderError) {
314 		Melder_throw (me, U": within-group SSCP not created.");
315 	}
316 }
317 
Discriminant_extractBetweenGroupsSSCP(Discriminant me)318 autoSSCP Discriminant_extractBetweenGroupsSSCP (Discriminant me) {
319 	try {
320 		autoSSCP between = Data_copy (my total.get());   // for the moment, `between` contains the total sums of squares
321 		autoSSCP within = SSCPList_to_SSCP_pool (my groups.get());
322 		between -> data.get()  -=  within -> data.get();   // now, `between` does contain the between-groups sums of squares
323 		return between;
324 	} catch (MelderError) {
325 		Melder_throw (me, U": between-groups SSCP not created.");
326 	}
327 }
328 
Discriminant_drawTerritorialMap(Discriminant me,Graphics g,bool discriminantDirections,integer d1,integer d2,double xmin,double xmax,double ymin,double ymax,double fontSize,bool poolCovarianceMatrices,bool garnish)329 void Discriminant_drawTerritorialMap (Discriminant me, Graphics g, bool discriminantDirections,
330 	integer d1, integer d2, double xmin, double xmax, double ymin, double ymax, double fontSize, bool poolCovarianceMatrices, bool garnish)
331 {
332 	(void) me; (void) g; (void) discriminantDirections; (void) d1; (void) d2;
333 	(void) xmin; (void) xmax; (void) ymin;
334 	(void) ymax; (void) fontSize; (void) poolCovarianceMatrices; (void) garnish;
335 
336 }
337 
Discriminant_drawConcentrationEllipses(Discriminant me,Graphics g,double scale,bool confidence,conststring32 label,bool discriminantDirections,integer d1,integer d2,double xmin,double xmax,double ymin,double ymax,double fontSize,bool garnish)338 void Discriminant_drawConcentrationEllipses (Discriminant me, Graphics g, double scale, bool confidence,
339 	conststring32 label, bool discriminantDirections,
340 	integer d1, integer d2, double xmin, double xmax, double ymin, double ymax, double fontSize, bool garnish) {
341 	const integer numberOfFunctions = Discriminant_getNumberOfFunctions (me);
342 
343 	if (! discriminantDirections) {
344 		SSCPList_drawConcentrationEllipses (my groups.get(), g, scale, confidence, label, d1, d2, xmin, xmax, ymin, ymax, fontSize, garnish);
345 		return;
346 	}
347 
348 	Melder_require (numberOfFunctions > 1,
349 		U"Nothing drawn because there is only one dimension in the discriminant space.");
350 
351 	// Project SSCPs on eigenvectors.
352 
353 	if (d1 == 0 && d2 == 0) {
354 		d1 = 1;
355 		d2 = std::min (numberOfFunctions, d1 + 1);
356 	} else if (d1 < 0 || d2 > numberOfFunctions) {
357 		return;
358 	}
359 
360 	autoSSCPList thee = SSCPList_toTwoDimensions (my groups.get(), my eigen -> eigenvectors.row (d1), my eigen -> eigenvectors.row (d2));
361 
362 	SSCPList_drawConcentrationEllipses (thee.get(), g, scale, confidence, label, 1, 2, xmin, xmax, ymin, ymax, fontSize, 0);
363 
364 	if (garnish) {
365 		char32 llabel [40];
366 		Graphics_drawInnerBox (g);
367 		Graphics_marksLeft (g, 2, true, true, false);
368 		Melder_sprint (llabel,40, U"function ", d2);
369 		Graphics_textLeft (g, true, llabel);
370 		Graphics_marksBottom (g, 2, true, true, false);
371 		Melder_sprint (llabel,40, U"function ", d1);
372 		Graphics_textBottom (g, true, llabel);
373 	}
374 }
375 
376 /* End of file Discriminant.cpp */
377