1
2 /******************************************************************************
3 *
4 * This file is part of meryl-utility, a collection of miscellaneous code
5 * used by Meryl, Canu and others.
6 *
7 * This software is based on:
8 * 'Canu' v2.0 (https://github.com/marbl/canu)
9 * which is based on:
10 * 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
11 * the 'kmer package' r1994 (http://kmer.sourceforge.net)
12 *
13 * Except as indicated otherwise, this is a 'United States Government Work',
14 * and is released in the public domain.
15 *
16 * File 'README.licenses' in the root directory of this distribution
17 * contains full conditions and disclaimers.
18 */
19
20 #include "stddev.H"
21 #include "mt19937ar.H"
22
23 // g++ -Wall -o stddevTest -I. -I.. stddevTest.C
24
25 void
testInsert(void)26 testInsert(void) {
27 stdDev<uint32> sdu;
28 stdDev<int32> sdi;
29 stdDev<double> sdd;
30
31 sdu.insert((uint32)2);
32 sdu.insert((uint32)4);
33 sdu.insert((uint32)4);
34 sdu.insert((uint32)4);
35 sdu.insert((uint32)5);
36 sdu.insert((uint32)5);
37 sdu.insert((uint32)7);
38 sdu.insert((uint32)9);
39
40 sdi.insert((uint32)2);
41 sdi.insert((uint32)4);
42 sdi.insert((uint32)4);
43 sdi.insert((uint32)4);
44 sdi.insert((uint32)5);
45 sdi.insert((uint32)5);
46 sdi.insert((uint32)7);
47 sdi.insert((uint32)9);
48
49 sdd.insert((uint32)2);
50 sdd.insert((uint32)4);
51 sdd.insert((uint32)4);
52 sdd.insert((uint32)4);
53 sdd.insert((uint32)5);
54 sdd.insert((uint32)5);
55 sdd.insert((uint32)7);
56 sdd.insert((uint32)9);
57
58 fprintf(stderr, "Expect mean=5, variance=%f, stddev=%f\n", 32.0 / 7.0, sqrt(32.0 / 7.0));
59
60 fprintf(stderr, " uint32 size %u mean %f variance %f stddev %f\n",
61 sdu.size(), sdu.mean(), sdu.variance(), sdu.stddev());
62 fprintf(stderr, " int32 size %u mean %f variance %f stddev %f\n",
63 sdi.size(), sdi.mean(), sdi.variance(), sdi.stddev());
64 fprintf(stderr, " double size %u mean %f variance %f stddev %f\n",
65 sdd.size(), sdd.mean(), sdd.variance(), sdd.stddev());
66
67 assert(sdu.variance() == 32.0 / 7.0);
68 assert(sdi.variance() == 32.0 / 7.0);
69 assert(sdd.variance() == 32.0 / 7.0);
70
71 fprintf(stderr, "\n\n");
72 }
73
74
75
76 void
testRemove(void)77 testRemove(void) {
78 double values[10] = { 1, 2, 3, 4, 9, 8, 7, 6, 20, 30 };
79
80 stdDev<double> sd;
81
82 fprintf(stderr, "Expect final to be zero, and insert() == remove().\n");
83
84 for (int ii=0; ii<10; ii++) {
85 sd.insert(values[ii]);
86 fprintf(stderr, "insert[%2d] mean %8.4f stddev %8.4f\n", ii+1, sd.mean(), sd.stddev());
87 }
88
89 assert(sd.mean() == 9.0);
90
91 fprintf(stderr, "\n");
92
93 for (int ii=9; ii>=0; ii--) {
94 sd.remove(values[ii]);
95 fprintf(stderr, "remove[%2d] mean %8.4f stddev %8.4f\n", ii, sd.mean(), sd.stddev());
96 }
97
98 assert(sd.mean() == 0.0);
99 assert(sd.stddev() == 0.0);
100 }
101
102
103
104 void
testBig(uint32 nSamples)105 testBig(uint32 nSamples) {
106 histogramStatistics hist;
107 mtRandom mt(10 + nSamples);
108
109 fprintf(stderr, "\n");
110 fprintf(stderr, "testBig for nSamples %u\n", nSamples);
111
112 for (uint32 ii=0; ii<nSamples; ii++) {
113 uint32 val = (mt.mtRandom32() % 10);
114 uint32 num = (mt.mtRandom32() % 1000);
115
116 //fprintf(stderr, "INSERT val %u num %u\n", val, num);
117
118 hist.add(val, num);
119 }
120
121 hist.finalizeData();
122
123 fprintf(stderr, " size: %lu\n", hist.numberOfObjects());
124 fprintf(stderr, " mean: %f +- %f\n", hist.mean(), hist.stddev());
125 fprintf(stderr, " median: %lu +- %lu\n", hist.median(), hist.mad());
126 }
127
128
129
130 // This is testing for an odd bit of apparent numerical instability where
131 // the second to last remove() resulted in a negative variance. d=0.0019
132 // was the original case, but many others failed too.
133 void
testStability(void)134 testStability(void) {
135 double sum = 0.0;
136
137 fprintf(stderr, "\n");
138 fprintf(stderr, "testStability (shouldn't crash)\n");
139
140 for (double d = 0.0000; d < 0.5000; d += 0.0001) {
141 stdDev<double> sd;
142
143 sd.insert(0.000000);
144 sd.insert(d);
145 sd.insert(0.000000);
146
147 sd.remove(0.000000);
148 sd.remove(0.000000); // Fails here; the two if's in remove() resolve.
149
150 sum += sd.mean(); // Add d.
151
152 assert(d - 0.00001 <= sd.mean());
153 assert(sd.mean() <= d + 0.00001);
154 assert(sd.variance() == 0.0);
155
156 sd.remove(d);
157
158 sum += sd.mean(); // Add zero.
159
160 assert(sd.mean() == 0.0);
161 assert(sd.variance() == 0.0);
162 }
163
164 fprintf(stderr, " %18.16f\n", sum);
165 }
166
167
168
169 // Same idea, but this one fails before we hit the
170 // reset for one item. Grrrr!
171 void
testStability2(uint32 n)172 testStability2(uint32 n) {
173 double sum = 0.0;
174 stdDev<double> sd;
175
176 if (n == 1) {
177 fprintf(stderr, "\n");
178 fprintf(stderr, "testStability2 (values should be positive zero)\n");
179 }
180
181 for (uint32 ii=0; ii<n; ii++)
182 sd.insert(0.000000);
183
184 sd.insert(0.000190);
185 sd.remove(0.000190);
186 fprintf(stderr, "%2u %26.24f\n", n, sd.variance());
187 assert(sd.variance() >= 0.0);
188
189 sd.insert(0.000220);
190 sd.remove(0.000220);
191 fprintf(stderr, "%2u %26.24f\n", n, sd.variance());
192 assert(sd.variance() >= 0.0);
193
194 for (uint32 ii=0; ii<n; ii++)
195 sd.remove(0.000000);
196 }
197
198
199
200 int
main(int argc,char ** argv)201 main(int argc, char **argv) {
202
203 testInsert();
204 testRemove();
205
206 testBig(1);
207 testBig(2);
208 testBig(3);
209 testBig(4);
210 testBig(5);
211 testBig(10);
212 testBig(100);
213 testBig(1000);
214
215 testStability();
216
217 testStability2(1);
218 testStability2(2);
219 testStability2(3);
220 testStability2(4);
221
222 fprintf(stderr, "\n");
223 fprintf(stderr, "Success!\n");
224
225 exit(0);
226 }
227