1 // -*- C++ -*-
2 /**
3 * @brief Class for a product-kernel multidimensional mixture.
4 *
5 * Copyright 2005-2021 Airbus-EDF-IMACS-ONERA-Phimeca
6 *
7 * This library is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library. If not, see <http://www.gnu.org/licenses/>.
19 *
20 */
21 #include <cmath>
22 #include "openturns/KernelMixture.hxx"
23 #include "openturns/RandomGenerator.hxx"
24 #include "openturns/Exception.hxx"
25 #include "openturns/PersistentObjectFactory.hxx"
26 #include "openturns/Brent.hxx"
27 #include "openturns/Os.hxx"
28
29 BEGIN_NAMESPACE_OPENTURNS
30
31 CLASSNAMEINIT(KernelMixture)
32
33 static const Factory<KernelMixture> Factory_KernelMixture;
34
35 /* Default constructor */
KernelMixture()36 KernelMixture::KernelMixture()
37 : ContinuousDistribution()
38 , p_kernel_(Distribution().getImplementation())
39 , bandwidth_(0)
40 , bandwidthInverse_(0)
41 , normalizationFactor_(0.0)
42 , sample_(1, 1)
43 , pdfApproximationCDF_()
44 , cdfApproximation_()
45 , pdfApproximationCCDF_()
46 , ccdfApproximation_()
47 , useApproximatePDFCDF_(false)
48 {
49 setName("KernelMixture");
50 setBandwidth(Point(1, 1.0));
51 }
52
53 /* Parameters constructor */
KernelMixture(const Distribution & kernel,const Point & bandwidth,const Sample & sample)54 KernelMixture::KernelMixture(const Distribution & kernel,
55 const Point & bandwidth,
56 const Sample & sample)
57 : ContinuousDistribution()
58 , p_kernel_(kernel.getImplementation())
59 , bandwidth_(0)
60 , bandwidthInverse_(0)
61 , normalizationFactor_(0.0)
62 , sample_(sample)
63 , pdfApproximationCDF_()
64 , cdfApproximation_()
65 , pdfApproximationCCDF_()
66 , ccdfApproximation_()
67 , useApproximatePDFCDF_(false)
68 {
69 setName("KernelMixture");
70 // We check if the given kernel is 1-D (product kernel)
71 if (kernel.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: only 1D kernel is allowed for multidimensional product kernels";
72 if (sample.getSize() == 0) throw InvalidArgumentException(HERE) << "Error: cannot build a KernelMixture based on an empty sample.";
73 setDimension(sample.getDimension());
74 // This call set also the range.
75 setBandwidth(bandwidth);
76 if ((getDimension() == 1) && (sample.getSize() >= ResourceMap::GetAsUnsignedInteger("KernelMixture-SmallSize")) && (sample.getSize() < ResourceMap::GetAsUnsignedInteger("KernelMixture-LargeSize")))
77 {
78 // Here we use the implementation provided by the DistributionImplementation class instead of the ContinuousDistribution class in order to use both the PDF and the CDF
79 Collection<PiecewiseHermiteEvaluation> coll(DistributionImplementation::interpolatePDFCDF(ResourceMap::GetAsUnsignedInteger("KernelMixture-PDFCDFDiscretization")));
80 pdfApproximationCDF_ = coll[0];
81 cdfApproximation_ = coll[1];
82 pdfApproximationCCDF_ = coll[2];
83 ccdfApproximation_ = coll[3];
84 useApproximatePDFCDF_ = true;
85 }
86 setParallel(p_kernel_->isParallel());
87 }
88
89 /* Comparison operator */
operator ==(const KernelMixture & other) const90 Bool KernelMixture::operator ==(const KernelMixture & other) const
91 {
92 if (this == &other) return true;
93 if (useApproximatePDFCDF_) return (bandwidth_ == other.bandwidth_) && (*p_kernel_ == *other.p_kernel_) && (pdfApproximationCDF_ == other.pdfApproximationCDF_) && (cdfApproximation_ == other.cdfApproximation_) && (pdfApproximationCCDF_ == other.pdfApproximationCCDF_) && (ccdfApproximation_ == other.ccdfApproximation_);
94 return (bandwidth_ == other.bandwidth_) && (*p_kernel_ == *other.p_kernel_) && (sample_ == other.sample_);
95 }
96
equals(const DistributionImplementation & other) const97 Bool KernelMixture::equals(const DistributionImplementation & other) const
98 {
99 const KernelMixture* p_other = dynamic_cast<const KernelMixture*>(&other);
100 return p_other && (*this == *p_other);
101 }
102
103 /* String converter */
__repr__() const104 String KernelMixture::__repr__() const
105 {
106 OSS oss;
107 oss << "class=" << KernelMixture::GetClassName()
108 << " name=" << getName()
109 << " kernel=" << p_kernel_->__repr__()
110 << " bandwidth=" << bandwidth_
111 << " sample=" << sample_;
112 return oss;
113 }
114
__str__(const String & offset) const115 String KernelMixture::__str__(const String & offset) const
116 {
117 OSS oss;
118 oss << getClassName() << "(kernel = " << p_kernel_->__str__() << ", bandwidth = " << bandwidth_.__str__() << ", sample = " << Os::GetEndOfLine() << offset << sample_.__str__(offset);
119 return oss;
120 }
121
122
123 /* Compute the numerical range of the distribution given the parameters values */
computeRange()124 void KernelMixture::computeRange()
125 {
126 const Interval kernelRange(p_kernel_->getRange());
127 const UnsignedInteger dimension = getDimension();
128 const Point lowerBound(sample_.getMin() + kernelRange.getLowerBound()[0] * bandwidth_);
129 const Point upperBound(sample_.getMax() + kernelRange.getUpperBound()[0] * bandwidth_);
130 const Interval::BoolCollection finiteLowerBound(dimension, kernelRange.getFiniteLowerBound()[0]);
131 const Interval::BoolCollection finiteUpperBound(dimension, kernelRange.getFiniteUpperBound()[0]);
132 setRange(Interval(lowerBound, upperBound, finiteLowerBound, finiteUpperBound));
133 }
134
135 /* Kernel accessor */
setKernel(const Distribution & kernel)136 void KernelMixture::setKernel(const Distribution & kernel)
137 {
138 // We check if the kernel is 1D
139 if (kernel.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: the kernel must be 1D for product kernel mixture";
140 p_kernel_ = kernel.getImplementation();
141 isAlreadyComputedMean_ = false;
142 isAlreadyComputedCovariance_ = false;
143 computeRange();
144 }
145
getKernel() const146 Distribution KernelMixture::getKernel() const
147 {
148 return *p_kernel_;
149 }
150
151 /* Sample accessor */
setInternalSample(const Sample & sample)152 void KernelMixture::setInternalSample(const Sample & sample)
153 {
154 if (sample.getSize() == 0) throw InvalidArgumentException(HERE) << "Error: cannot build a KernelMixture based on an empty sample.";
155 if (sample.getDimension() != getDimension()) throw InvalidArgumentException(HERE) << "Error: the given sample has dimension=" << sample.getDimension() << ", expected dimension=" << getDimension() << ".";
156 sample_ = sample;
157 computeRange();
158 if ((getDimension() == 1) && (sample.getSize() >= ResourceMap::GetAsUnsignedInteger("KernelMixture-SmallSize")) && (sample.getSize() < ResourceMap::GetAsUnsignedInteger("KernelMixture-LargeSize")))
159 {
160 // Here we use the implementation provided by the DistributionImplementation class instead of the ContinuousDistribution class in order to use both the PDF and the CDF
161 Collection<PiecewiseHermiteEvaluation> coll(DistributionImplementation::interpolatePDFCDF(ResourceMap::GetAsUnsignedInteger("KernelMixture-PDFCDFDiscretization")));
162 pdfApproximationCDF_ = coll[0];
163 cdfApproximation_ = coll[1];
164 pdfApproximationCCDF_ = coll[2];
165 ccdfApproximation_ = coll[3];
166 useApproximatePDFCDF_ = true;
167 }
168 isAlreadyComputedMean_ = false;
169 isAlreadyComputedCovariance_ = false;
170 }
171
getInternalSample() const172 Sample KernelMixture::getInternalSample() const
173 {
174 return sample_;
175 }
176
177
178 /* Bandwidth accessor */
setBandwidth(const Point & bandwidth)179 void KernelMixture::setBandwidth(const Point & bandwidth)
180 {
181 const UnsignedInteger dimension = getDimension();
182 normalizationFactor_ = sample_.getSize();
183 if (bandwidth.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the dimensions of the bandwidth and the sample must be equal";
184 bandwidthInverse_ = Point(dimension);
185 for (UnsignedInteger i = 0; i < dimension; ++i)
186 {
187 const Scalar hi = bandwidth[i];
188 if (!(hi > 0.0)) throw InvalidArgumentException(HERE) << "Error: the bandwidth components must be > 0, here bandwidth=" << bandwidth;
189 bandwidthInverse_[i] = 1.0 / hi;
190 normalizationFactor_ *= hi;
191 }
192 bandwidth_ = bandwidth;
193 normalizationFactor_ = 1.0 / normalizationFactor_;
194 isAlreadyComputedMean_ = false;
195 isAlreadyComputedCovariance_ = false;
196 computeRange();
197 }
198
199 /* Distribution collection accessor */
getBandwidth() const200 Point KernelMixture::getBandwidth() const
201 {
202 return bandwidth_;
203 }
204
205 /* Virtual constructor */
clone() const206 KernelMixture * KernelMixture::clone() const
207 {
208 return new KernelMixture(*this);
209 }
210
211 /* Get one realization of the KernelMixture */
getRealization() const212 Point KernelMixture::getRealization() const
213 {
214 // Select the atom uniformly amongst the possible points
215 Point result(sample_[RandomGenerator::IntegerGenerate(sample_.getSize())]);
216 // Then add a random noise according to the product kernel
217 const UnsignedInteger dimension = getDimension();
218 const Sample kernelSample(p_kernel_->getSample(dimension));
219 for (UnsignedInteger i = 0; i < dimension; ++i) result[i] += bandwidth_[i] * kernelSample(i, 0);
220 return result;
221 }
222
223 /* Get the DDF of the KernelMixture */
computeDDF(const Point & point) const224 Point KernelMixture::computeDDF(const Point & point) const
225 {
226 const UnsignedInteger dimension = getDimension();
227 if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
228
229 Point ddfValue(dimension, 0.0);
230 // Quick rejection test
231 if (!getRange().numericallyContains(point)) return ddfValue;
232 const UnsignedInteger size = sample_.getSize();
233 for(UnsignedInteger i = 0; i < size; ++i)
234 {
235 Point atom(dimension, 0.0);
236 Point kernelPdfAtom(dimension, 0.0);
237 Scalar pdfAtom = 1.0;
238 for (UnsignedInteger j = 0; j < dimension; ++j)
239 {
240 atom[j] = (point[j] - sample_(i, j)) * bandwidthInverse_[j];
241 kernelPdfAtom[j] = p_kernel_->computePDF(Point(1, atom[j]));
242 pdfAtom *= kernelPdfAtom[j];
243 }
244 for (UnsignedInteger j = 0; j < dimension; ++j)
245 {
246 // Only aggregate the values associated with kernelPdfAtom>0
247 if (kernelPdfAtom[j] > 0.0)
248 ddfValue[j] += pdfAtom / kernelPdfAtom[j] * p_kernel_->computeDDF(Point(1, atom[j]))[0] * bandwidthInverse_[j];
249 }
250 } /* end for */
251 return normalizationFactor_ * ddfValue;
252 }
253
254 /* Get the PDF of the KernelMixture */
computePDF(const Point & point) const255 Scalar KernelMixture::computePDF(const Point & point) const
256 {
257 const UnsignedInteger dimension = getDimension();
258 if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
259 if (useApproximatePDFCDF_)
260 {
261 if (point[0] < getMean()[0]) return pdfApproximationCDF_(point)[0];
262 else return pdfApproximationCCDF_(point)[0];
263 }
264 Scalar pdfValue = 0.0;
265 const UnsignedInteger size = sample_.getSize();
266 if (dimension == 1)
267 {
268 const Scalar x = point[0];
269 const Scalar h = bandwidth_[0];
270 for(UnsignedInteger i = 0; i < size; ++i) pdfValue += p_kernel_->computePDF((x - sample_(i, 0)) / h);
271 return pdfValue / (h * size);
272 }
273 // Quick rejection test
274 //if (!getRange().numericallyContains(point)) return pdfValue;
275 //const Scalar pdfEpsilon = p_kernel_->getPDFEpsilon();
276 for(UnsignedInteger i = 0; i < size; ++i)
277 {
278 Scalar pdfAtom = p_kernel_->computePDF((point[0] - sample_(i, 0)) * bandwidthInverse_[0]);
279 for (UnsignedInteger j = 1; j < dimension; ++j)
280 {
281 //if (pdfAtom < pdfEpsilon) break;
282 pdfAtom *= p_kernel_->computePDF((point[j] - sample_(i, j)) * bandwidthInverse_[j]);
283 }
284 pdfValue += pdfAtom;
285 } /* end for */
286 return normalizationFactor_ * pdfValue;
287 }
288
289 /* Get the CDF of the KernelMixture */
computeCDF(const Point & point) const290 Scalar KernelMixture::computeCDF(const Point & point) const
291 {
292 const UnsignedInteger dimension = getDimension();
293 if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
294 if (useApproximatePDFCDF_)
295 {
296 if (point[0] < getMean()[0]) return cdfApproximation_(point)[0];
297 else return 1.0 - ccdfApproximation_(point)[0];
298 }
299 Scalar cdfValue = 0.0;
300 const UnsignedInteger size = sample_.getSize();
301 if (dimension == 1)
302 {
303 const Scalar x = point[0];
304 const Scalar h = bandwidth_[0];
305 for(UnsignedInteger i = 0; i < size; ++i)
306 cdfValue += p_kernel_->computeCDF((x - sample_(i, 0)) / h);
307 return cdfValue / size;
308 }
309 // Check against the range of the distribution
310 Bool allTooLarge = true;
311 Bool oneTooSmall = false;
312 const Point lower(getRange().getLowerBound());
313 const Point upper(getRange().getUpperBound());
314 for (UnsignedInteger i = 0; i < dimension; ++i)
315 {
316 allTooLarge = allTooLarge && (point[i] >= upper[i]);
317 oneTooSmall = oneTooSmall || (point[i] <= lower[i]);
318 }
319 if (allTooLarge) return 1.0;
320 if (oneTooSmall) return 0.0;
321 const Scalar cdfEpsilon = p_kernel_->getCDFEpsilon();
322 for (UnsignedInteger i = 0; i < size; ++i)
323 {
324 Scalar cdfAtom = p_kernel_->computeCDF((point[0] - sample_(i, 0)) * bandwidthInverse_[0]);
325 for (UnsignedInteger j = 1; j < dimension; ++j)
326 {
327 if (cdfAtom < cdfEpsilon) break;
328 cdfAtom *= p_kernel_->computeCDF((point[j] - sample_(i, j)) * bandwidthInverse_[j]);
329 }
330 cdfValue += cdfAtom;
331 } /* end for */
332 return cdfValue / size;
333 }
334
335 /* Get the complementary CDF of the distribution */
computeComplementaryCDF(const Point & point) const336 Scalar KernelMixture::computeComplementaryCDF(const Point & point) const
337 {
338 const UnsignedInteger dimension = getDimension();
339 if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
340 if (useApproximatePDFCDF_)
341 {
342 if (point[0] < getMean()[0]) return 1.0 - cdfApproximation_(point)[0];
343 else return ccdfApproximation_(point)[0];
344 }
345 // More accurate computation for 1D case...
346 if (dimension == 1) return computeSurvivalFunction(point);
347 // ... than in the general case
348 return DistributionImplementation::computeComplementaryCDF(point);
349 }
350
351 /* Get the survival function of the KernelMixture */
computeSurvivalFunction(const Point & point) const352 Scalar KernelMixture::computeSurvivalFunction(const Point & point) const
353 {
354 const UnsignedInteger dimension = getDimension();
355 if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
356 if (useApproximatePDFCDF_)
357 {
358 if (point[0] < getMean()[0]) return 1.0 - cdfApproximation_(point)[0];
359 else return ccdfApproximation_(point)[0];
360 }
361 // Check against the range of the distribution
362 Bool oneTooLarge = true;
363 Bool allTooSmall = false;
364 const Point lower(getRange().getLowerBound());
365 const Point upper(getRange().getUpperBound());
366 for (UnsignedInteger i = 0; i < dimension; ++i)
367 {
368 oneTooLarge = oneTooLarge && (point[i] >= upper[i]);
369 allTooSmall = allTooSmall || (point[i] <= lower[i]);
370 }
371 if (oneTooLarge) return 0.0;
372 if (allTooSmall) return 1.0;
373 const Scalar cdfEpsilon = p_kernel_->getCDFEpsilon();
374 Scalar survivalValue = 0.0;
375 const UnsignedInteger size = sample_.getSize();
376 for (UnsignedInteger i = 0; i < size; ++i)
377 {
378 Scalar cdfAtom = p_kernel_->computeSurvivalFunction((point[0] - sample_(i, 0)) * bandwidthInverse_[0]);
379 for (UnsignedInteger j = 1; j < dimension; ++j)
380 {
381 if (cdfAtom < cdfEpsilon) break;
382 cdfAtom *= p_kernel_->computeSurvivalFunction((point[j] - sample_(i, j)) * bandwidthInverse_[j]);
383 }
384 survivalValue += cdfAtom;
385 } /* end for */
386 return survivalValue / size;
387 }
388
389 /* Get the probability content of an interval */
computeProbability(const Interval & interval) const390 Scalar KernelMixture::computeProbability(const Interval & interval) const
391 {
392 const UnsignedInteger dimension = getDimension();
393 if (interval.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given interval must have dimension=" << dimension << ", here dimension=" << interval.getDimension();
394 const Interval reducedInterval(interval.intersect(getRange()));
395 if (reducedInterval == getRange()) return 1.0;
396 if (reducedInterval.isEmpty()) return 0.0;
397 const Point lowerBound(reducedInterval.getLowerBound());
398 const Point upperBound(reducedInterval.getUpperBound());
399 if (useApproximatePDFCDF_)
400 {
401 const Scalar mean = getMean()[0];
402 if (lowerBound[0] > mean) return ccdfApproximation_(lowerBound)[0] - ccdfApproximation_(upperBound)[0];
403 else return cdfApproximation_(upperBound)[0] - cdfApproximation_(lowerBound)[0];
404 }
405 Scalar probability = 0.0;
406 const UnsignedInteger size = sample_.getSize();
407 if (dimension == 1)
408 {
409 const Scalar hInverse = bandwidthInverse_[0];
410 for(UnsignedInteger i = 0; i < size; ++i)
411 probability += p_kernel_->computeProbability(Interval((lowerBound[0] - sample_(i, 0)) * hInverse, (upperBound[0] - sample_(i, 0)) * hInverse));
412 return probability / size;
413 }
414 const Scalar probabilityEpsilon = p_kernel_->getCDFEpsilon();
415 for (UnsignedInteger i = 0; i < size; ++i)
416 {
417 Scalar probabilityAtom = p_kernel_->computeProbability(Interval((lowerBound[0] - sample_(i, 0)) * bandwidthInverse_[0], (upperBound[0] - sample_(i, 0)) * bandwidthInverse_[0]));
418 for (UnsignedInteger j = 1; j < dimension; ++j)
419 {
420 if (probabilityAtom < probabilityEpsilon) break;
421 probabilityAtom *= p_kernel_->computeProbability(Interval((lowerBound[j] - sample_(i, j)) * bandwidthInverse_[j], (upperBound[j] - sample_(i, j)) * bandwidthInverse_[j]));
422 }
423 probability += probabilityAtom;
424 } /* end for */
425 return probability / size;
426 }
427
428 /* Compute the quantile function of the distribution */
computeScalarQuantile(const Scalar prob,const Bool tail) const429 Scalar KernelMixture::computeScalarQuantile(const Scalar prob,
430 const Bool tail) const
431 {
432 if (dimension_ != 1) throw InvalidDimensionException(HERE) << "Error: the method computeScalarQuantile is only defined for 1D distributions";
433 if (!useApproximatePDFCDF_) return DistributionImplementation::computeScalarQuantile(prob, tail);
434 const Scalar a = getRange().getLowerBound()[0];
435 const Scalar b = getRange().getUpperBound()[0];
436 if (prob <= 0.0) return (tail ? b : a);
437 if (prob >= 1.0) return (tail ? a : b);
438 const UnsignedInteger n = cdfApproximation_.getLocations().getSize();
439 if (tail)
440 {
441 // Here we have to solve ComplementaryCDF(x) = prob which is mathematically
442 // equivalent to CDF(x) = 1 - prob, but numerically different with an
443 // accuracy that depends on prob.
444 // The cut-off is around the mean value
445 if (prob <= ccdfApproximation_.getValues()(0, 0)) return Brent(quantileEpsilon_, cdfEpsilon_, cdfEpsilon_, quantileIterations_).solve(ccdfApproximation_, prob, ccdfApproximation_.getLocations()[0], ccdfApproximation_.getLocations()[n - 1], ccdfApproximation_.getValues()(0, 0), ccdfApproximation_.getValues()(n - 1, 0));
446 return Brent(quantileEpsilon_, cdfEpsilon_, cdfEpsilon_, quantileIterations_).solve(cdfApproximation_, 1.0 - prob, cdfApproximation_.getLocations()[0], cdfApproximation_.getLocations()[n - 1], cdfApproximation_.getValues()(0, 0), cdfApproximation_.getValues()(n - 1, 0));
447 }
448 // Here we have to solve CDF(x) = prob which is mathematically
449 // equivalent to ComplementaryCDF(x) = 1 - prob, but numerically
450 // different with an accuracy that depends on prob.
451 // The cut-off is around the mean value
452 if (prob <= cdfApproximation_.getValues()(n - 1, 0)) return Brent(quantileEpsilon_, cdfEpsilon_, cdfEpsilon_, quantileIterations_).solve(cdfApproximation_, prob, cdfApproximation_.getLocations()[0], cdfApproximation_.getLocations()[n - 1], cdfApproximation_.getValues()(0, 0), cdfApproximation_.getValues()(n - 1, 0));
453 return Brent(quantileEpsilon_, cdfEpsilon_, cdfEpsilon_, quantileIterations_).solve(ccdfApproximation_, 1.0 - prob, ccdfApproximation_.getLocations()[0], ccdfApproximation_.getLocations()[n - 1], ccdfApproximation_.getValues()(0, 0), ccdfApproximation_.getValues()(n - 1, 0));
454 }
455
456 /* Get the characteristic function of the distribution, i.e. phi(u) = E(exp(I*u*X)) */
computeCharacteristicFunction(const Scalar x) const457 Complex KernelMixture::computeCharacteristicFunction(const Scalar x) const
458 {
459 if (x == 0.0) return 1.0;
460 Complex cfValue(0.0);
461 const UnsignedInteger size = sample_.getSize();
462 for(UnsignedInteger i = 0; i < size; ++i)
463 {
464 cfValue += p_kernel_->computeCharacteristicFunction(x * bandwidth_[0]) * std::exp(Complex(0.0, sample_(i, 0) * x));
465 } /* end for */
466 return cfValue * (1.0 / size);
467 }
468
469 /* Get the PDF gradient of the distribution */
computePDFGradient(const Point & point) const470 Point KernelMixture::computePDFGradient(const Point & point) const
471 {
472 const UnsignedInteger dimension = getDimension();
473 if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
474
475 throw NotYetImplementedException(HERE) << "In KernelMixture::computePDFGradient(const Point & point) const";
476 }
477
478 /* Get the CDF gradient of the distribution */
computeCDFGradient(const Point & point) const479 Point KernelMixture::computeCDFGradient(const Point & point) const
480 {
481 const UnsignedInteger dimension = getDimension();
482 if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
483
484 throw NotYetImplementedException(HERE) << "In KernelMixture::computeCDFGradient(const Point & point) const";
485 }
486
487 /* Compute the PDF of Xi | X1, ..., Xi-1. x = Xi, y = (X1,...,Xi-1)
488 */
computeConditionalPDF(const Scalar x,const Point & y) const489 Scalar KernelMixture::computeConditionalPDF(const Scalar x,
490 const Point & y) const
491 {
492 const UnsignedInteger conditioningDimension = y.getDimension();
493 if (conditioningDimension >= getDimension()) throw InvalidArgumentException(HERE) << "Error: cannot compute a conditional PDF with a conditioning point of dimension greater or equal to the distribution dimension.";
494 // Special case for no conditioning or independent copula
495 if ((conditioningDimension == 0) || (hasIndependentCopula())) return getMarginal(conditioningDimension).computePDF(x);
496 // Build the conditional mixture weights
497 const UnsignedInteger size = sample_.getSize();
498 Scalar jointPDF = 0.0;
499 Scalar marginalPDF = 0.0;
500 for (UnsignedInteger i = 0; i < size; ++i)
501 {
502 Scalar marginalAtomPDF = 1.0;
503 for (UnsignedInteger j = 0; j < conditioningDimension; ++j)
504 marginalAtomPDF *= p_kernel_->computePDF((y[j] - sample_(i, j)) / bandwidth_[j]);
505 marginalPDF += marginalAtomPDF;
506 jointPDF += marginalAtomPDF * p_kernel_->computePDF((x - sample_(i, conditioningDimension)) / bandwidth_[conditioningDimension]);
507 }
508 if (marginalPDF <= 0.0) return 0.0;
509 // No need to normalize by 1/h as it simplifies
510 return jointPDF / marginalPDF;
511 }
512
computeSequentialConditionalPDF(const Point & x) const513 Point KernelMixture::computeSequentialConditionalPDF(const Point & x) const
514 {
515 Point result(dimension_);
516 const UnsignedInteger size = sample_.getSize();
517 Point atomsValues(size);
518 Scalar pdfConditioning = 0.0;
519 Scalar currentX = x[0];
520 Scalar currentH = bandwidth_[0];
521 for (UnsignedInteger i = 0; i < size; ++i)
522 {
523 atomsValues[i] = p_kernel_->computePDF((currentX - sample_(i, 0)) / currentH);
524 pdfConditioning += atomsValues[i];
525 }
526 result[0] = pdfConditioning / (size * currentH);
527 for (UnsignedInteger conditioningDimension = 1; conditioningDimension < dimension_; ++conditioningDimension)
528 {
529 // Return the result as soon as a conditional pdf is zero
530 if (pdfConditioning == 0) return result;
531 currentX = x[conditioningDimension];
532 currentH = bandwidth_[conditioningDimension];
533 Scalar pdfConditioned = 0.0;
534 for (UnsignedInteger i = 0; i < size; ++i)
535 {
536 atomsValues[i] *= p_kernel_->computePDF((currentX - sample_(i, conditioningDimension)) / currentH);
537 pdfConditioned += atomsValues[i];
538 }
539 result[conditioningDimension] = pdfConditioned / pdfConditioning;
540 pdfConditioning = pdfConditioned;
541 } // conditioningDimension
542 return result;
543 }
544
545 /* Compute the CDF of Xi | X1, ..., Xi-1. x = Xi, y = (X1,...,Xi-1) */
computeConditionalCDF(const Scalar x,const Point & y) const546 Scalar KernelMixture::computeConditionalCDF(const Scalar x,
547 const Point & y) const
548 {
549 const UnsignedInteger conditioningDimension = y.getDimension();
550 if (conditioningDimension >= getDimension()) throw InvalidArgumentException(HERE) << "Error: cannot compute a conditional CDF with a conditioning point of dimension greater or equal to the distribution dimension.";
551 // Special case for no conditioning or independent copula
552 if ((conditioningDimension == 0) || (hasIndependentCopula())) return getMarginal(conditioningDimension).computeCDF(x);
553 // Build the conditional mixture weights
554 const UnsignedInteger size = sample_.getSize();
555 Scalar jointCDF = 0.0;
556 Scalar marginalPDF = 0.0;
557 Scalar h = bandwidth_[conditioningDimension];
558 for (UnsignedInteger i = 0; i < size; ++i)
559 {
560 Scalar marginalAtomPDF = p_kernel_->computePDF((y[0] - sample_(i, 0)) / bandwidth_[0]);
561 for (UnsignedInteger j = 1; j < conditioningDimension; ++j)
562 marginalAtomPDF *= p_kernel_->computePDF((y[j] - sample_(i, j)) / bandwidth_[j]);
563 marginalPDF += marginalAtomPDF;
564 jointCDF += marginalAtomPDF * p_kernel_->computeCDF((x - sample_(i, conditioningDimension)) / h);
565 }
566 if (marginalPDF <= 0.0) return 0.0;
567 // No need to normalize by 1/h as it simplifies
568 return std::min(1.0, jointCDF / marginalPDF);
569 }
570
computeSequentialConditionalCDF(const Point & x) const571 Point KernelMixture::computeSequentialConditionalCDF(const Point & x) const
572 {
573 /* pdf(x_n|x_0,...,x_{n-1})=pdf(x_1,...,x_n)/pdf(x_1,...,x_{n-1})
574 cdf(x_n|x_0,...,x_{n-1})=\int_{-\infty}^{x_n}pdf(x_1,...,t)/pdf(x_1,...,x_{n-1})dt
575 \int_{-\infty}^{x_n}pdf(x_1,...,t)dt=1/N\sum_{i=1}^N\int_{-\infty}^{x_n}[\prod_{j=1}^{n-1} k((x_j-X^i_j)/h_j)/h_j]k((t-X^i_n)/h_n)/h_n dt
576 =1/N\sum_{i=1}^N P_{n-1}^i K((x_n-X^i_n)/h_n)dt
577 and
578 pdf(x_1,...,x_{n-1})=1/N\sum_{i=1}^N\prod_{j=1}^{n-1}k((x_j-X^i_j)/h_j)/h_j=1/N\sum_{i=1}^N P_{n-1}^i
579 */
580 Point result(dimension_);
581 const UnsignedInteger size = sample_.getSize();
582 Point atomsValues(size);
583 Scalar currentX = x[0];
584 Scalar currentH = bandwidth_[0];
585 Scalar pdfConditioning = 0.0;
586 Scalar pdfConditioned = 0.0;
587 Scalar cdfConditioned = 0.0;
588 for (UnsignedInteger i = 0; i < size; ++i)
589 {
590 const Scalar kI = p_kernel_->computePDF((currentX - sample_(i, 0)) / currentH) / currentH;
591 cdfConditioned += p_kernel_->computeCDF((currentX - sample_(i, 0)) / currentH);
592 atomsValues[i] = kI;
593 pdfConditioning += kI;
594 }
595 result[0] = cdfConditioned / size;
596 for (UnsignedInteger conditioningDimension = 1; conditioningDimension < dimension_; ++conditioningDimension)
597 {
598 // Return the result as soon as a conditional pdf is zero
599 if (pdfConditioning == 0) return result;
600 currentX = x[conditioningDimension];
601 currentH = bandwidth_[conditioningDimension];
602 pdfConditioned = 0.0;
603 cdfConditioned = 0.0;
604 for (UnsignedInteger i = 0; i < size; ++i)
605 {
606 cdfConditioned += atomsValues[i] * p_kernel_->computeCDF((currentX - sample_(i, conditioningDimension)) / currentH);
607 atomsValues[i] *= p_kernel_->computePDF((currentX - sample_(i, conditioningDimension)) / currentH) / currentH;
608 pdfConditioned += atomsValues[i];
609 }
610 result[conditioningDimension] = cdfConditioned / pdfConditioning;
611 pdfConditioning = pdfConditioned;
612 } // conditioningDimension
613 return result;
614 }
615
616 /* Get the i-th marginal distribution */
getMarginal(const UnsignedInteger i) const617 Distribution KernelMixture::getMarginal(const UnsignedInteger i) const
618 {
619 const UnsignedInteger dimension = getDimension();
620 if (i >= dimension) throw InvalidArgumentException(HERE) << "The index of a marginal distribution must be in the range [0, dim-1]";
621 // Special case for dimension 1
622 if (dimension == 1) return clone();
623 // General case
624 KernelMixture::Implementation marginal(new KernelMixture(*p_kernel_, Point(1, bandwidth_[i]), sample_.getMarginal(i)));
625 marginal->setDescription(Description(1, getDescription()[i]));
626 return marginal;
627 }
628
629 /* Get the distribution of the marginal distribution corresponding to indices dimensions */
getMarginal(const Indices & indices) const630 Distribution KernelMixture::getMarginal(const Indices & indices) const
631 {
632 const UnsignedInteger dimension = getDimension();
633 if (!indices.check(dimension)) throw InvalidArgumentException(HERE) << "The indices of a marginal distribution must be in the range [0, dim-1] and must be different";
634 // Special case for dimension 1
635 if (dimension == 1) return clone();
636 // General case
637 const UnsignedInteger marginalDimension = indices.getSize();
638 Description description(getDescription());
639 Description marginalDescription(marginalDimension);
640 Point marginalBandwidth(marginalDimension);
641 for (UnsignedInteger i = 0; i < marginalDimension; ++i)
642 {
643 const UnsignedInteger index_i = indices[i];
644 marginalBandwidth[i] = bandwidth_[index_i];
645 marginalDescription[i] = description[index_i];
646 }
647 KernelMixture::Implementation marginal(new KernelMixture(*p_kernel_, marginalBandwidth, sample_.getMarginal(indices)));
648 marginal->setDescription(marginalDescription);
649 return marginal;
650 }
651
652 /* Compute the mean of the KernelMixture
653 * PDF(x) = C\sum_{i=1}^N\prod_{j=1}^n K((X^i_j-x_j)/h_j),
654 * where:
655 * C = \frac{1}{N\prod_{k=1}^n h_k}
656 * mu_j = \int_{-\infty}^{\infty} x_j\frac{1}{Nh_j}\sum_{i=1}^N K((x_j - X^i_j) / h_j) dx_j
657 = \int_{-\infty}^{\infty} \frac{1}{Nh_j}\sum_{i=1}^N(h_jt + X^i_j)K(t) h_j dt
658 = \frac{1}{Nh_j}\sum_{i=1}^NX^i_j + \int_{-\infty}^{\infty} \frac{1}{N}\sum_{i=1}^Nh_jtK(t) dt
659 = mu_sample_j + h_j mu_K
660 */
computeMean() const661 void KernelMixture::computeMean() const
662 {
663 // We know that the kernel is 1D, so its mean value is actually a scalar
664 const Scalar meanKernel = p_kernel_->getMean()[0];
665 mean_ = sample_.computeMean();
666 isAlreadyComputedMean_ = true;
667 // Special case for symmetric kernel
668 if (meanKernel == 0.0) return;
669 // General case
670 mean_ += meanKernel * bandwidth_;
671 }
672
673 /* Compute the covariance of the KernelMixture
674 Covariance(KernelMixture) = (1-1/N) Covariance(sample) + Covariance(kernel) * diag(bandwidth[i]^2)
675 */
computeCovariance() const676 void KernelMixture::computeCovariance() const
677 {
678 const UnsignedInteger dimension = getDimension();
679 // We know that the kernel is 1D, so its standard deviation is actually a scalar
680 const Scalar sigmaKernel = p_kernel_->getStandardDeviation()[0];
681 // Covariance(sample) term, with the proper scaling
682 covariance_ = CovarianceMatrix(dimension, Collection<Scalar>(sample_.computeCovariance().getImplementation()->operator*(1.0 - 1.0 / sample_.getSize())));
683 // Add the diagonal kernel covariance contribution
684 for (UnsignedInteger i = 0; i < dimension; ++i)
685 covariance_(i, i) += std::pow(bandwidth_[i] * sigmaKernel, 2);
686 isAlreadyComputedCovariance_ = true;
687 }
688
689 /* Get the standard deviation of the distribution. We don't use the square root of the covariance since it involves a O(dim^2) computation where only a O(dim) computation is required.
690 std = [var_sample + h^2 var_K]^(1/2)
691 */
getStandardDeviation() const692 Point KernelMixture::getStandardDeviation() const
693 {
694 const UnsignedInteger dimension = getDimension();
695 // We know that the kernel is 1D, so its standard deviation is actually a scalar
696 const Scalar sigmaKernel = p_kernel_->getStandardDeviation()[0];
697 Point result(sample_.computeCenteredMoment(2));
698 for (UnsignedInteger i = 0; i < dimension; ++i)
699 result[i] = std::sqrt(result[i] + std::pow(bandwidth_[i] * sigmaKernel, 2));
700 return result;
701 }
702
703 /* Get the skewness of the distribution:
704 skew = [skew_sample * std_sample^3 + h^3 * skew_K * std_K^3] / std^3
705 */
getSkewness() const706 Point KernelMixture::getSkewness() const
707 {
708 const UnsignedInteger dimension = getDimension();
709 // We know that the kernel is 1D, so its standard deviation is actually a scalar
710 const Scalar sigmaKernel = p_kernel_->getStandardDeviation()[0];
711 // We know that the kernel is 1D, so its skewness is actually a scalar
712 const Scalar skewnessKernel = p_kernel_->getSkewness()[0];
713 // Standard deviation of the KernelMixture
714 const Point sigma(getStandardDeviation());
715 Point result(sample_.computeCenteredMoment(3));
716 for (UnsignedInteger i = 0; i < dimension; ++i)
717 result[i] = (result[i] + std::pow(bandwidth_[i] * sigmaKernel, 3) * skewnessKernel) / std::pow(sigma[i], 3);
718 return result;
719 }
720
721 /* Get the kurtosis of the distribution:
722 kurt = [kurt_sample * std_sample^4 + h^4 * kurt_K * std_K^4 + 6 * h^2 * var_sample * var_K] / std^4
723 */
getKurtosis() const724 Point KernelMixture::getKurtosis() const
725 {
726 const UnsignedInteger dimension = getDimension();
727 // We know that the kernel is 1D, so its standard deviation is actually a scalar
728 const Scalar sigmaKernel = p_kernel_->getStandardDeviation()[0];
729 // We know that the kernel is 1D, so its skewness is actually a scalar
730 const Scalar kurtosisKernel = p_kernel_->getKurtosis()[0];
731 // Standard deviation of the sample
732 const Point varSample(sample_.computeCenteredMoment(2));
733 // Standard deviation of the KernelMixture
734 const Point sigma(getStandardDeviation());
735 Point result(sample_.computeCenteredMoment(4));
736 for (UnsignedInteger i = 0; i < dimension; ++i)
737 result[i] = (result[i] + std::pow(bandwidth_[i] * sigmaKernel, 4) * kurtosisKernel + 6.0 * varSample[i] * std::pow(bandwidth_[i] * sigmaKernel, 2)) / std::pow(sigma[i], 4);
738 return result;
739 }
740
741 /* Parameters value and description accessor */
getParametersCollection() const742 KernelMixture::PointWithDescriptionCollection KernelMixture::getParametersCollection() const
743 {
744 const UnsignedInteger dimension = getDimension();
745 const UnsignedInteger size = sample_.getSize();
746 PointWithDescriptionCollection parameters(dimension + (dimension > 1 ? 1 : 0));
747 // The marginal parameters : the sample and the bandwidth
748 for (UnsignedInteger i = 0; i < dimension; ++i)
749 {
750 PointWithDescription marginalParameters(size + 1);
751 Description description(marginalParameters.getDimension());
752 for (UnsignedInteger j = 0; j < size; ++j)
753 {
754 marginalParameters[j] = sample_(j, i);
755 if (dimension > 1) description[j] = (OSS() << "x_" << j << "^" << i);
756 else description[j] = (OSS() << "x_" << j);
757 }
758 marginalParameters[size] = bandwidth_[i];
759 if (dimension > 1) description[size] = (OSS() << "h_" << i);
760 else description[size] = "h";
761 marginalParameters.setDescription(description);
762 parameters[i] = marginalParameters;
763 }
764 // The dependence parameters is the union of all the parameters as they all contribute to the copula, presented in a different way
765 if (dimension > 1)
766 {
767 PointWithDescription dependence(dimension * (size + 1));
768 Description description(dependence.getDimension());
769 UnsignedInteger index = 0;
770 for (UnsignedInteger i = 0; i < size; ++i)
771 for (UnsignedInteger j = 0; j < dimension; ++j)
772 {
773 dependence[index] = sample_(i, j);
774 description[index] = (OSS() << "x_" << i << "^" << j);
775 ++index;
776 }
777 for (UnsignedInteger i = 0; i < dimension; ++i)
778 {
779 dependence[index] = bandwidth_[i];
780 description[index] = (OSS() << "h_" << i);
781 ++index;
782 }
783 dependence.setDescription(description);
784 parameters[dimension] = dependence;
785 }
786 return parameters;
787 } // getParametersCollection
788
789
getParameter() const790 Point KernelMixture::getParameter() const
791 {
792 const UnsignedInteger size = sample_.getSize();
793 Point parameter;
794 for (UnsignedInteger i = 0; i < size; ++ i)
795 {
796 parameter.add(sample_[i]);
797 }
798 parameter.add(bandwidth_);
799 return parameter;
800 }
801
getParameterDescription() const802 Description KernelMixture::getParameterDescription() const
803 {
804 const UnsignedInteger dimension = getDimension();
805 const UnsignedInteger size = sample_.getSize();
806 Description description;
807 for (UnsignedInteger i = 0; i < size; ++ i)
808 {
809 if (dimension > 1)
810 for (UnsignedInteger j = 0; j < dimension; ++ j)
811 description.add(OSS() << "x_" << i << "^" << j);
812 else
813 description.add(OSS() << "x_" << i);
814 }
815
816 if (dimension > 1)
817 for (UnsignedInteger j = 0; j < dimension; ++ j)
818 description.add(OSS() << "h_" << j);
819 else
820 description.add("h");
821 return description;
822 }
823
824
setParameter(const Point & parameter)825 void KernelMixture::setParameter(const Point & parameter)
826 {
827 const UnsignedInteger dimension = getDimension();
828 const UnsignedInteger size = sample_.getSize();
829 if (parameter.getDimension() != (dimension * (size + 1))) throw InvalidArgumentException(HERE) << "Expected " << (dimension * (size + 1)) << " parameters";
830 UnsignedInteger index = 0;
831 for (UnsignedInteger i = 0; i < size; ++ i)
832 {
833 for (UnsignedInteger j = 0; j < dimension; ++ j)
834 {
835 sample_(i, j) = parameter[index];
836 ++ index;
837 }
838 }
839 for (UnsignedInteger j = 0; j < dimension; ++ j)
840 {
841 bandwidth_[j] = parameter[index];
842 ++ index;
843 }
844 // Use the constructor in order to recompute the PDF/CDF approximation if needed
845 const Scalar w = getWeight();
846 *this = KernelMixture(*p_kernel_, bandwidth_, sample_);
847 setWeight(w);
848 }
849
850
851 /* Check if the distribution is elliptical */
isElliptical() const852 Bool KernelMixture::isElliptical() const
853 {
854 // No chance to have something symmetrical if sample size > 2
855 if (sample_.getSize() > 2) return false;
856 // In dimension 1, elliptical == symmetric
857 if (getDimension() == 1) return p_kernel_->isElliptical();
858 // In dimension > 1, only samples with 1 point and Normal kernels lead to an elliptical distribution
859 return (sample_.getSize() == 1) && (p_kernel_->getClassName() == "Normal");
860 }
861
862 /* Check if the distribution is continuos */
isContinuous() const863 Bool KernelMixture::isContinuous() const
864 {
865 return p_kernel_->isContinuous();
866 }
867
868 /* Tell if the distribution has elliptical copula */
hasEllipticalCopula() const869 Bool KernelMixture::hasEllipticalCopula() const
870 {
871 // In 1D, all the distributions have an elliptical copula
872 if (getDimension() == 1) return true;
873 return false;
874 }
875
876 /* Tell if the distribution has independent copula */
hasIndependentCopula() const877 Bool KernelMixture::hasIndependentCopula() const
878 {
879 // In 1D, all the distributions have an independent copula
880 return (getDimension() == 1);
881 }
882
883 /* Method save() stores the object through the StorageManager */
save(Advocate & adv) const884 void KernelMixture::save(Advocate & adv) const
885 {
886 ContinuousDistribution::save(adv);
887 adv.saveAttribute( "kernel_", Distribution(p_kernel_) );
888 adv.saveAttribute( "bandwidth_", bandwidth_ );
889 adv.saveAttribute( "bandwidthInverse_", bandwidthInverse_ );
890 adv.saveAttribute( "normalizationFactor_", normalizationFactor_ );
891 adv.saveAttribute( "sample_", sample_ );
892 adv.saveAttribute( "pdfApproximationCDF_", pdfApproximationCDF_ );
893 adv.saveAttribute( "cdfApproximation_", cdfApproximation_ );
894 adv.saveAttribute( "pdfApproximationCCDF_", pdfApproximationCCDF_ );
895 adv.saveAttribute( "ccdfApproximation_", ccdfApproximation_ );
896 adv.saveAttribute( "useApproximatePDFCDF_", useApproximatePDFCDF_ );
897 }
898
899 /* Method load() reloads the object from the StorageManager */
load(Advocate & adv)900 void KernelMixture::load(Advocate & adv)
901 {
902 ContinuousDistribution::load(adv);
903 Distribution kernel;
904 adv.loadAttribute( "kernel_", kernel );
905 p_kernel_ = kernel.getImplementation();
906 adv.loadAttribute( "bandwidth_", bandwidth_ );
907 adv.loadAttribute( "bandwidthInverse_", bandwidthInverse_ );
908 adv.loadAttribute( "normalizationFactor_", normalizationFactor_ );
909 adv.loadAttribute( "sample_", sample_ );
910 adv.loadAttribute( "pdfApproximationCDF_", pdfApproximationCDF_ );
911 adv.loadAttribute( "cdfApproximation_", cdfApproximation_ );
912 adv.loadAttribute( "pdfApproximationCCDF_", pdfApproximationCCDF_ );
913 adv.loadAttribute( "ccdfApproximation_", ccdfApproximation_ );
914 adv.loadAttribute( "useApproximatePDFCDF_", useApproximatePDFCDF_ );
915 computeRange();
916 }
917
918 END_NAMESPACE_OPENTURNS
919