1 //                                               -*- C++ -*-
2 /**
3  *  @brief Class for a product-kernel multidimensional mixture.
4  *
5  *  Copyright 2005-2021 Airbus-EDF-IMACS-ONERA-Phimeca
6  *
7  *  This library is free software: you can redistribute it and/or modify
8  *  it under the terms of the GNU Lesser General Public License as published by
9  *  the Free Software Foundation, either version 3 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This library is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU Lesser General Public License for more details.
16  *
17  *  You should have received a copy of the GNU Lesser General Public License
18  *  along with this library.  If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 #include <cmath>
22 #include "openturns/KernelMixture.hxx"
23 #include "openturns/RandomGenerator.hxx"
24 #include "openturns/Exception.hxx"
25 #include "openturns/PersistentObjectFactory.hxx"
26 #include "openturns/Brent.hxx"
27 #include "openturns/Os.hxx"
28 
29 BEGIN_NAMESPACE_OPENTURNS
30 
31 CLASSNAMEINIT(KernelMixture)
32 
33 static const Factory<KernelMixture> Factory_KernelMixture;
34 
35 /* Default constructor */
KernelMixture()36 KernelMixture::KernelMixture()
37   : ContinuousDistribution()
38   , p_kernel_(Distribution().getImplementation())
39   , bandwidth_(0)
40   , bandwidthInverse_(0)
41   , normalizationFactor_(0.0)
42   , sample_(1, 1)
43   , pdfApproximationCDF_()
44   , cdfApproximation_()
45   , pdfApproximationCCDF_()
46   , ccdfApproximation_()
47   , useApproximatePDFCDF_(false)
48 {
49   setName("KernelMixture");
50   setBandwidth(Point(1, 1.0));
51 }
52 
53 /* Parameters constructor */
KernelMixture(const Distribution & kernel,const Point & bandwidth,const Sample & sample)54 KernelMixture::KernelMixture(const Distribution & kernel,
55                              const Point & bandwidth,
56                              const Sample & sample)
57   : ContinuousDistribution()
58   , p_kernel_(kernel.getImplementation())
59   , bandwidth_(0)
60   , bandwidthInverse_(0)
61   , normalizationFactor_(0.0)
62   , sample_(sample)
63   , pdfApproximationCDF_()
64   , cdfApproximation_()
65   , pdfApproximationCCDF_()
66   , ccdfApproximation_()
67   , useApproximatePDFCDF_(false)
68 {
69   setName("KernelMixture");
70   // We check if the given kernel is 1-D (product kernel)
71   if (kernel.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: only 1D kernel is allowed for multidimensional product kernels";
72   if (sample.getSize() == 0) throw InvalidArgumentException(HERE) << "Error: cannot build a KernelMixture based on an empty sample.";
73   setDimension(sample.getDimension());
74   // This call set also the range.
75   setBandwidth(bandwidth);
76   if ((getDimension() == 1) && (sample.getSize() >= ResourceMap::GetAsUnsignedInteger("KernelMixture-SmallSize")) && (sample.getSize() < ResourceMap::GetAsUnsignedInteger("KernelMixture-LargeSize")))
77   {
78     // Here we use the implementation provided by the DistributionImplementation class instead of the ContinuousDistribution class in order to use both the PDF and the CDF
79     Collection<PiecewiseHermiteEvaluation> coll(DistributionImplementation::interpolatePDFCDF(ResourceMap::GetAsUnsignedInteger("KernelMixture-PDFCDFDiscretization")));
80     pdfApproximationCDF_ = coll[0];
81     cdfApproximation_ = coll[1];
82     pdfApproximationCCDF_ = coll[2];
83     ccdfApproximation_ = coll[3];
84     useApproximatePDFCDF_ = true;
85   }
86   setParallel(p_kernel_->isParallel());
87 }
88 
89 /* Comparison operator */
operator ==(const KernelMixture & other) const90 Bool KernelMixture::operator ==(const KernelMixture & other) const
91 {
92   if (this == &other) return true;
93   if (useApproximatePDFCDF_) return (bandwidth_ == other.bandwidth_) && (*p_kernel_ == *other.p_kernel_) && (pdfApproximationCDF_ == other.pdfApproximationCDF_) && (cdfApproximation_ == other.cdfApproximation_) && (pdfApproximationCCDF_ == other.pdfApproximationCCDF_) && (ccdfApproximation_ == other.ccdfApproximation_);
94   return (bandwidth_ == other.bandwidth_) && (*p_kernel_ == *other.p_kernel_) && (sample_ == other.sample_);
95 }
96 
equals(const DistributionImplementation & other) const97 Bool KernelMixture::equals(const DistributionImplementation & other) const
98 {
99   const KernelMixture* p_other = dynamic_cast<const KernelMixture*>(&other);
100   return p_other && (*this == *p_other);
101 }
102 
103 /* String converter */
__repr__() const104 String KernelMixture::__repr__() const
105 {
106   OSS oss;
107   oss << "class=" << KernelMixture::GetClassName()
108       << " name=" << getName()
109       << " kernel=" << p_kernel_->__repr__()
110       << " bandwidth=" << bandwidth_
111       << " sample=" << sample_;
112   return oss;
113 }
114 
__str__(const String & offset) const115 String KernelMixture::__str__(const String & offset) const
116 {
117   OSS oss;
118   oss << getClassName() << "(kernel = " << p_kernel_->__str__() << ", bandwidth = " << bandwidth_.__str__() << ", sample = " << Os::GetEndOfLine() << offset << sample_.__str__(offset);
119   return oss;
120 }
121 
122 
123 /* Compute the numerical range of the distribution given the parameters values */
computeRange()124 void KernelMixture::computeRange()
125 {
126   const Interval kernelRange(p_kernel_->getRange());
127   const UnsignedInteger dimension = getDimension();
128   const Point lowerBound(sample_.getMin() + kernelRange.getLowerBound()[0] * bandwidth_);
129   const Point upperBound(sample_.getMax() + kernelRange.getUpperBound()[0] * bandwidth_);
130   const Interval::BoolCollection finiteLowerBound(dimension, kernelRange.getFiniteLowerBound()[0]);
131   const Interval::BoolCollection finiteUpperBound(dimension, kernelRange.getFiniteUpperBound()[0]);
132   setRange(Interval(lowerBound, upperBound, finiteLowerBound, finiteUpperBound));
133 }
134 
135 /* Kernel accessor */
setKernel(const Distribution & kernel)136 void KernelMixture::setKernel(const Distribution & kernel)
137 {
138   // We check if the kernel is 1D
139   if (kernel.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: the kernel must be 1D for product kernel mixture";
140   p_kernel_ = kernel.getImplementation();
141   isAlreadyComputedMean_ = false;
142   isAlreadyComputedCovariance_ = false;
143   computeRange();
144 }
145 
getKernel() const146 Distribution KernelMixture::getKernel() const
147 {
148   return *p_kernel_;
149 }
150 
151 /* Sample accessor */
setInternalSample(const Sample & sample)152 void KernelMixture::setInternalSample(const Sample & sample)
153 {
154   if (sample.getSize() == 0) throw InvalidArgumentException(HERE) << "Error: cannot build a KernelMixture based on an empty sample.";
155   if (sample.getDimension() != getDimension()) throw InvalidArgumentException(HERE) << "Error: the given sample has dimension=" << sample.getDimension() << ", expected dimension=" << getDimension() << ".";
156   sample_ = sample;
157   computeRange();
158   if ((getDimension() == 1) && (sample.getSize() >= ResourceMap::GetAsUnsignedInteger("KernelMixture-SmallSize")) && (sample.getSize() < ResourceMap::GetAsUnsignedInteger("KernelMixture-LargeSize")))
159   {
160     // Here we use the implementation provided by the DistributionImplementation class instead of the ContinuousDistribution class in order to use both the PDF and the CDF
161     Collection<PiecewiseHermiteEvaluation> coll(DistributionImplementation::interpolatePDFCDF(ResourceMap::GetAsUnsignedInteger("KernelMixture-PDFCDFDiscretization")));
162     pdfApproximationCDF_ = coll[0];
163     cdfApproximation_ = coll[1];
164     pdfApproximationCCDF_ = coll[2];
165     ccdfApproximation_ = coll[3];
166     useApproximatePDFCDF_ = true;
167   }
168   isAlreadyComputedMean_ = false;
169   isAlreadyComputedCovariance_ = false;
170 }
171 
getInternalSample() const172 Sample KernelMixture::getInternalSample() const
173 {
174   return sample_;
175 }
176 
177 
178 /* Bandwidth accessor */
setBandwidth(const Point & bandwidth)179 void KernelMixture::setBandwidth(const Point & bandwidth)
180 {
181   const UnsignedInteger dimension = getDimension();
182   normalizationFactor_ = sample_.getSize();
183   if (bandwidth.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the dimensions of the bandwidth and the sample must be equal";
184   bandwidthInverse_ = Point(dimension);
185   for (UnsignedInteger i = 0; i < dimension; ++i)
186   {
187     const Scalar hi = bandwidth[i];
188     if (!(hi > 0.0)) throw InvalidArgumentException(HERE) << "Error: the bandwidth components must be > 0, here bandwidth=" << bandwidth;
189     bandwidthInverse_[i] = 1.0 / hi;
190     normalizationFactor_ *= hi;
191   }
192   bandwidth_ = bandwidth;
193   normalizationFactor_ = 1.0 / normalizationFactor_;
194   isAlreadyComputedMean_ = false;
195   isAlreadyComputedCovariance_ = false;
196   computeRange();
197 }
198 
199 /* Distribution collection accessor */
getBandwidth() const200 Point KernelMixture::getBandwidth() const
201 {
202   return bandwidth_;
203 }
204 
205 /* Virtual constructor */
clone() const206 KernelMixture * KernelMixture::clone() const
207 {
208   return new KernelMixture(*this);
209 }
210 
211 /* Get one realization of the KernelMixture */
getRealization() const212 Point KernelMixture::getRealization() const
213 {
214   // Select the atom uniformly amongst the possible points
215   Point result(sample_[RandomGenerator::IntegerGenerate(sample_.getSize())]);
216   // Then add a random noise according to the product kernel
217   const UnsignedInteger dimension = getDimension();
218   const Sample kernelSample(p_kernel_->getSample(dimension));
219   for (UnsignedInteger i = 0; i < dimension; ++i) result[i] += bandwidth_[i] * kernelSample(i, 0);
220   return result;
221 }
222 
223 /* Get the DDF of the KernelMixture */
computeDDF(const Point & point) const224 Point KernelMixture::computeDDF(const Point & point) const
225 {
226   const UnsignedInteger dimension = getDimension();
227   if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
228 
229   Point ddfValue(dimension, 0.0);
230   // Quick rejection test
231   if (!getRange().numericallyContains(point)) return ddfValue;
232   const UnsignedInteger size = sample_.getSize();
233   for(UnsignedInteger i = 0; i < size; ++i)
234   {
235     Point atom(dimension, 0.0);
236     Point kernelPdfAtom(dimension, 0.0);
237     Scalar pdfAtom = 1.0;
238     for (UnsignedInteger j = 0; j < dimension; ++j)
239     {
240       atom[j] = (point[j] - sample_(i, j)) * bandwidthInverse_[j];
241       kernelPdfAtom[j] = p_kernel_->computePDF(Point(1, atom[j]));
242       pdfAtom *= kernelPdfAtom[j];
243     }
244     for (UnsignedInteger j = 0; j < dimension; ++j)
245     {
246       // Only aggregate the values associated with kernelPdfAtom>0
247       if (kernelPdfAtom[j] > 0.0)
248         ddfValue[j] += pdfAtom / kernelPdfAtom[j] * p_kernel_->computeDDF(Point(1, atom[j]))[0] * bandwidthInverse_[j];
249     }
250   } /* end for */
251   return normalizationFactor_ * ddfValue;
252 }
253 
254 /* Get the PDF of the KernelMixture */
computePDF(const Point & point) const255 Scalar KernelMixture::computePDF(const Point & point) const
256 {
257   const UnsignedInteger dimension = getDimension();
258   if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
259   if (useApproximatePDFCDF_)
260   {
261     if (point[0] < getMean()[0]) return pdfApproximationCDF_(point)[0];
262     else return pdfApproximationCCDF_(point)[0];
263   }
264   Scalar pdfValue = 0.0;
265   const UnsignedInteger size = sample_.getSize();
266   if (dimension == 1)
267   {
268     const Scalar x = point[0];
269     const Scalar h = bandwidth_[0];
270     for(UnsignedInteger i = 0; i < size; ++i) pdfValue += p_kernel_->computePDF((x - sample_(i, 0)) / h);
271     return pdfValue / (h * size);
272   }
273   // Quick rejection test
274   //if (!getRange().numericallyContains(point)) return pdfValue;
275   //const Scalar pdfEpsilon = p_kernel_->getPDFEpsilon();
276   for(UnsignedInteger i = 0; i < size; ++i)
277   {
278     Scalar pdfAtom = p_kernel_->computePDF((point[0] - sample_(i, 0)) * bandwidthInverse_[0]);
279     for (UnsignedInteger j = 1; j < dimension; ++j)
280     {
281       //if (pdfAtom < pdfEpsilon) break;
282       pdfAtom *= p_kernel_->computePDF((point[j] - sample_(i, j)) * bandwidthInverse_[j]);
283     }
284     pdfValue += pdfAtom;
285   } /* end for */
286   return normalizationFactor_ * pdfValue;
287 }
288 
289 /* Get the CDF of the KernelMixture */
computeCDF(const Point & point) const290 Scalar KernelMixture::computeCDF(const Point & point) const
291 {
292   const UnsignedInteger dimension = getDimension();
293   if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
294   if (useApproximatePDFCDF_)
295   {
296     if (point[0] < getMean()[0]) return cdfApproximation_(point)[0];
297     else return 1.0 - ccdfApproximation_(point)[0];
298   }
299   Scalar cdfValue = 0.0;
300   const UnsignedInteger size = sample_.getSize();
301   if (dimension == 1)
302   {
303     const Scalar x = point[0];
304     const Scalar h = bandwidth_[0];
305     for(UnsignedInteger i = 0; i < size; ++i)
306       cdfValue += p_kernel_->computeCDF((x - sample_(i, 0)) / h);
307     return cdfValue / size;
308   }
309   // Check against the range of the distribution
310   Bool allTooLarge = true;
311   Bool oneTooSmall = false;
312   const Point lower(getRange().getLowerBound());
313   const Point upper(getRange().getUpperBound());
314   for (UnsignedInteger i = 0; i < dimension; ++i)
315   {
316     allTooLarge = allTooLarge && (point[i] >= upper[i]);
317     oneTooSmall = oneTooSmall || (point[i] <= lower[i]);
318   }
319   if (allTooLarge) return 1.0;
320   if (oneTooSmall) return 0.0;
321   const Scalar cdfEpsilon = p_kernel_->getCDFEpsilon();
322   for (UnsignedInteger i = 0; i < size; ++i)
323   {
324     Scalar cdfAtom = p_kernel_->computeCDF((point[0] - sample_(i, 0)) * bandwidthInverse_[0]);
325     for (UnsignedInteger j = 1; j < dimension; ++j)
326     {
327       if (cdfAtom < cdfEpsilon) break;
328       cdfAtom *= p_kernel_->computeCDF((point[j] - sample_(i, j)) * bandwidthInverse_[j]);
329     }
330     cdfValue += cdfAtom;
331   } /* end for */
332   return cdfValue / size;
333 }
334 
335 /* Get the complementary CDF of the distribution */
computeComplementaryCDF(const Point & point) const336 Scalar KernelMixture::computeComplementaryCDF(const Point & point) const
337 {
338   const UnsignedInteger dimension = getDimension();
339   if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
340   if (useApproximatePDFCDF_)
341   {
342     if (point[0] < getMean()[0]) return 1.0 - cdfApproximation_(point)[0];
343     else return ccdfApproximation_(point)[0];
344   }
345   // More accurate computation for 1D case...
346   if (dimension == 1) return computeSurvivalFunction(point);
347   // ... than in the general case
348   return DistributionImplementation::computeComplementaryCDF(point);
349 }
350 
351 /* Get the survival function of the KernelMixture */
computeSurvivalFunction(const Point & point) const352 Scalar KernelMixture::computeSurvivalFunction(const Point & point) const
353 {
354   const UnsignedInteger dimension = getDimension();
355   if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
356   if (useApproximatePDFCDF_)
357   {
358     if (point[0] < getMean()[0]) return 1.0 - cdfApproximation_(point)[0];
359     else return ccdfApproximation_(point)[0];
360   }
361   // Check against the range of the distribution
362   Bool oneTooLarge = true;
363   Bool allTooSmall = false;
364   const Point lower(getRange().getLowerBound());
365   const Point upper(getRange().getUpperBound());
366   for (UnsignedInteger i = 0; i < dimension; ++i)
367   {
368     oneTooLarge = oneTooLarge && (point[i] >= upper[i]);
369     allTooSmall = allTooSmall || (point[i] <= lower[i]);
370   }
371   if (oneTooLarge) return 0.0;
372   if (allTooSmall) return 1.0;
373   const Scalar cdfEpsilon = p_kernel_->getCDFEpsilon();
374   Scalar survivalValue = 0.0;
375   const UnsignedInteger size = sample_.getSize();
376   for (UnsignedInteger i = 0; i < size; ++i)
377   {
378     Scalar cdfAtom = p_kernel_->computeSurvivalFunction((point[0] - sample_(i, 0)) * bandwidthInverse_[0]);
379     for (UnsignedInteger j = 1; j < dimension; ++j)
380     {
381       if (cdfAtom < cdfEpsilon) break;
382       cdfAtom *= p_kernel_->computeSurvivalFunction((point[j] - sample_(i, j)) * bandwidthInverse_[j]);
383     }
384     survivalValue += cdfAtom;
385   } /* end for */
386   return survivalValue / size;
387 }
388 
389 /* Get the probability content of an interval */
computeProbability(const Interval & interval) const390 Scalar KernelMixture::computeProbability(const Interval & interval) const
391 {
392   const UnsignedInteger dimension = getDimension();
393   if (interval.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given interval must have dimension=" << dimension << ", here dimension=" << interval.getDimension();
394   const Interval reducedInterval(interval.intersect(getRange()));
395   if (reducedInterval == getRange()) return 1.0;
396   if (reducedInterval.isEmpty()) return 0.0;
397   const Point lowerBound(reducedInterval.getLowerBound());
398   const Point upperBound(reducedInterval.getUpperBound());
399   if (useApproximatePDFCDF_)
400   {
401     const Scalar mean = getMean()[0];
402     if (lowerBound[0] > mean) return ccdfApproximation_(lowerBound)[0] - ccdfApproximation_(upperBound)[0];
403     else return cdfApproximation_(upperBound)[0] - cdfApproximation_(lowerBound)[0];
404   }
405   Scalar probability = 0.0;
406   const UnsignedInteger size = sample_.getSize();
407   if (dimension == 1)
408   {
409     const Scalar hInverse = bandwidthInverse_[0];
410     for(UnsignedInteger i = 0; i < size; ++i)
411       probability += p_kernel_->computeProbability(Interval((lowerBound[0] - sample_(i, 0)) * hInverse, (upperBound[0] - sample_(i, 0)) * hInverse));
412     return probability / size;
413   }
414   const Scalar probabilityEpsilon = p_kernel_->getCDFEpsilon();
415   for (UnsignedInteger i = 0; i < size; ++i)
416   {
417     Scalar probabilityAtom = p_kernel_->computeProbability(Interval((lowerBound[0] - sample_(i, 0)) * bandwidthInverse_[0], (upperBound[0] - sample_(i, 0)) * bandwidthInverse_[0]));
418     for (UnsignedInteger j = 1; j < dimension; ++j)
419     {
420       if (probabilityAtom < probabilityEpsilon) break;
421       probabilityAtom *= p_kernel_->computeProbability(Interval((lowerBound[j] - sample_(i, j)) * bandwidthInverse_[j], (upperBound[j] - sample_(i, j)) * bandwidthInverse_[j]));
422     }
423     probability += probabilityAtom;
424   } /* end for */
425   return probability / size;
426 }
427 
428 /* Compute the quantile function of the distribution */
computeScalarQuantile(const Scalar prob,const Bool tail) const429 Scalar KernelMixture::computeScalarQuantile(const Scalar prob,
430     const Bool tail) const
431 {
432   if (dimension_ != 1) throw InvalidDimensionException(HERE) << "Error: the method computeScalarQuantile is only defined for 1D distributions";
433   if (!useApproximatePDFCDF_) return DistributionImplementation::computeScalarQuantile(prob, tail);
434   const Scalar a = getRange().getLowerBound()[0];
435   const Scalar b = getRange().getUpperBound()[0];
436   if (prob <= 0.0) return (tail ? b : a);
437   if (prob >= 1.0) return (tail ? a : b);
438   const UnsignedInteger n = cdfApproximation_.getLocations().getSize();
439   if (tail)
440   {
441     // Here we have to solve ComplementaryCDF(x) = prob which is mathematically
442     // equivalent to CDF(x) = 1 - prob, but numerically different with an
443     // accuracy that depends on prob.
444     // The cut-off is around the mean value
445     if (prob <= ccdfApproximation_.getValues()(0, 0)) return Brent(quantileEpsilon_, cdfEpsilon_, cdfEpsilon_, quantileIterations_).solve(ccdfApproximation_, prob, ccdfApproximation_.getLocations()[0], ccdfApproximation_.getLocations()[n - 1], ccdfApproximation_.getValues()(0, 0), ccdfApproximation_.getValues()(n - 1, 0));
446     return Brent(quantileEpsilon_, cdfEpsilon_, cdfEpsilon_, quantileIterations_).solve(cdfApproximation_, 1.0 - prob, cdfApproximation_.getLocations()[0], cdfApproximation_.getLocations()[n - 1], cdfApproximation_.getValues()(0, 0), cdfApproximation_.getValues()(n - 1, 0));
447   }
448   // Here we have to solve CDF(x) = prob which is mathematically
449   // equivalent to ComplementaryCDF(x) = 1 - prob, but numerically
450   // different with an accuracy that depends on prob.
451   // The cut-off is around the mean value
452   if (prob <= cdfApproximation_.getValues()(n - 1, 0)) return Brent(quantileEpsilon_, cdfEpsilon_, cdfEpsilon_, quantileIterations_).solve(cdfApproximation_, prob, cdfApproximation_.getLocations()[0], cdfApproximation_.getLocations()[n - 1], cdfApproximation_.getValues()(0, 0), cdfApproximation_.getValues()(n - 1, 0));
453   return Brent(quantileEpsilon_, cdfEpsilon_, cdfEpsilon_, quantileIterations_).solve(ccdfApproximation_, 1.0 - prob, ccdfApproximation_.getLocations()[0], ccdfApproximation_.getLocations()[n - 1], ccdfApproximation_.getValues()(0, 0), ccdfApproximation_.getValues()(n - 1, 0));
454 }
455 
456 /* Get the characteristic function of the distribution, i.e. phi(u) = E(exp(I*u*X)) */
computeCharacteristicFunction(const Scalar x) const457 Complex KernelMixture::computeCharacteristicFunction(const Scalar x) const
458 {
459   if (x == 0.0) return 1.0;
460   Complex cfValue(0.0);
461   const UnsignedInteger size = sample_.getSize();
462   for(UnsignedInteger i = 0; i < size; ++i)
463   {
464     cfValue += p_kernel_->computeCharacteristicFunction(x * bandwidth_[0]) * std::exp(Complex(0.0, sample_(i, 0) * x));
465   } /* end for */
466   return cfValue * (1.0 / size);
467 }
468 
469 /* Get the PDF gradient of the distribution */
computePDFGradient(const Point & point) const470 Point KernelMixture::computePDFGradient(const Point & point) const
471 {
472   const UnsignedInteger dimension = getDimension();
473   if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
474 
475   throw NotYetImplementedException(HERE) << "In KernelMixture::computePDFGradient(const Point & point) const";
476 }
477 
478 /* Get the CDF gradient of the distribution */
computeCDFGradient(const Point & point) const479 Point KernelMixture::computeCDFGradient(const Point & point) const
480 {
481   const UnsignedInteger dimension = getDimension();
482   if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
483 
484   throw NotYetImplementedException(HERE) << "In KernelMixture::computeCDFGradient(const Point & point) const";
485 }
486 
487 /* Compute the PDF of Xi | X1, ..., Xi-1. x = Xi, y = (X1,...,Xi-1)
488 */
computeConditionalPDF(const Scalar x,const Point & y) const489 Scalar KernelMixture::computeConditionalPDF(const Scalar x,
490     const Point & y) const
491 {
492   const UnsignedInteger conditioningDimension = y.getDimension();
493   if (conditioningDimension >= getDimension()) throw InvalidArgumentException(HERE) << "Error: cannot compute a conditional PDF with a conditioning point of dimension greater or equal to the distribution dimension.";
494   // Special case for no conditioning or independent copula
495   if ((conditioningDimension == 0) || (hasIndependentCopula())) return getMarginal(conditioningDimension).computePDF(x);
496   // Build the conditional mixture weights
497   const UnsignedInteger size = sample_.getSize();
498   Scalar jointPDF = 0.0;
499   Scalar marginalPDF = 0.0;
500   for (UnsignedInteger i = 0; i < size; ++i)
501   {
502     Scalar marginalAtomPDF = 1.0;
503     for (UnsignedInteger j = 0; j < conditioningDimension; ++j)
504       marginalAtomPDF *= p_kernel_->computePDF((y[j] - sample_(i, j)) / bandwidth_[j]);
505     marginalPDF += marginalAtomPDF;
506     jointPDF += marginalAtomPDF * p_kernel_->computePDF((x - sample_(i, conditioningDimension)) / bandwidth_[conditioningDimension]);
507   }
508   if (marginalPDF <= 0.0) return 0.0;
509   // No need to normalize by 1/h as it simplifies
510   return jointPDF / marginalPDF;
511 }
512 
computeSequentialConditionalPDF(const Point & x) const513 Point KernelMixture::computeSequentialConditionalPDF(const Point & x) const
514 {
515   Point result(dimension_);
516   const UnsignedInteger size = sample_.getSize();
517   Point atomsValues(size);
518   Scalar pdfConditioning = 0.0;
519   Scalar currentX = x[0];
520   Scalar currentH = bandwidth_[0];
521   for (UnsignedInteger i = 0; i < size; ++i)
522   {
523     atomsValues[i] = p_kernel_->computePDF((currentX - sample_(i, 0)) / currentH);
524     pdfConditioning += atomsValues[i];
525   }
526   result[0] = pdfConditioning / (size * currentH);
527   for (UnsignedInteger conditioningDimension = 1; conditioningDimension < dimension_; ++conditioningDimension)
528   {
529     // Return the result as soon as a conditional pdf is zero
530     if (pdfConditioning == 0) return result;
531     currentX = x[conditioningDimension];
532     currentH = bandwidth_[conditioningDimension];
533     Scalar pdfConditioned = 0.0;
534     for (UnsignedInteger i = 0; i < size; ++i)
535     {
536       atomsValues[i] *= p_kernel_->computePDF((currentX - sample_(i, conditioningDimension)) / currentH);
537       pdfConditioned += atomsValues[i];
538     }
539     result[conditioningDimension] = pdfConditioned / pdfConditioning;
540     pdfConditioning = pdfConditioned;
541   } // conditioningDimension
542   return result;
543 }
544 
545 /* Compute the CDF of Xi | X1, ..., Xi-1. x = Xi, y = (X1,...,Xi-1) */
computeConditionalCDF(const Scalar x,const Point & y) const546 Scalar KernelMixture::computeConditionalCDF(const Scalar x,
547     const Point & y) const
548 {
549   const UnsignedInteger conditioningDimension = y.getDimension();
550   if (conditioningDimension >= getDimension()) throw InvalidArgumentException(HERE) << "Error: cannot compute a conditional CDF with a conditioning point of dimension greater or equal to the distribution dimension.";
551   // Special case for no conditioning or independent copula
552   if ((conditioningDimension == 0) || (hasIndependentCopula())) return getMarginal(conditioningDimension).computeCDF(x);
553   // Build the conditional mixture weights
554   const UnsignedInteger size = sample_.getSize();
555   Scalar jointCDF = 0.0;
556   Scalar marginalPDF = 0.0;
557   Scalar h = bandwidth_[conditioningDimension];
558   for (UnsignedInteger i = 0; i < size; ++i)
559   {
560     Scalar marginalAtomPDF = p_kernel_->computePDF((y[0] - sample_(i, 0)) / bandwidth_[0]);
561     for (UnsignedInteger j = 1; j < conditioningDimension; ++j)
562       marginalAtomPDF *= p_kernel_->computePDF((y[j] - sample_(i, j)) / bandwidth_[j]);
563     marginalPDF += marginalAtomPDF;
564     jointCDF += marginalAtomPDF * p_kernel_->computeCDF((x - sample_(i, conditioningDimension)) / h);
565   }
566   if (marginalPDF <= 0.0) return 0.0;
567   // No need to normalize by 1/h as it simplifies
568   return std::min(1.0, jointCDF / marginalPDF);
569 }
570 
computeSequentialConditionalCDF(const Point & x) const571 Point KernelMixture::computeSequentialConditionalCDF(const Point & x) const
572 {
573   /* pdf(x_n|x_0,...,x_{n-1})=pdf(x_1,...,x_n)/pdf(x_1,...,x_{n-1})
574      cdf(x_n|x_0,...,x_{n-1})=\int_{-\infty}^{x_n}pdf(x_1,...,t)/pdf(x_1,...,x_{n-1})dt
575      \int_{-\infty}^{x_n}pdf(x_1,...,t)dt=1/N\sum_{i=1}^N\int_{-\infty}^{x_n}[\prod_{j=1}^{n-1} k((x_j-X^i_j)/h_j)/h_j]k((t-X^i_n)/h_n)/h_n dt
576      =1/N\sum_{i=1}^N P_{n-1}^i K((x_n-X^i_n)/h_n)dt
577      and
578      pdf(x_1,...,x_{n-1})=1/N\sum_{i=1}^N\prod_{j=1}^{n-1}k((x_j-X^i_j)/h_j)/h_j=1/N\sum_{i=1}^N P_{n-1}^i
579   */
580   Point result(dimension_);
581   const UnsignedInteger size = sample_.getSize();
582   Point atomsValues(size);
583   Scalar currentX = x[0];
584   Scalar currentH = bandwidth_[0];
585   Scalar pdfConditioning = 0.0;
586   Scalar pdfConditioned = 0.0;
587   Scalar cdfConditioned = 0.0;
588   for (UnsignedInteger i = 0; i < size; ++i)
589   {
590     const Scalar kI = p_kernel_->computePDF((currentX - sample_(i, 0)) / currentH) / currentH;
591     cdfConditioned += p_kernel_->computeCDF((currentX - sample_(i, 0)) / currentH);
592     atomsValues[i] = kI;
593     pdfConditioning += kI;
594   }
595   result[0] = cdfConditioned / size;
596   for (UnsignedInteger conditioningDimension = 1; conditioningDimension < dimension_; ++conditioningDimension)
597   {
598     // Return the result as soon as a conditional pdf is zero
599     if (pdfConditioning == 0) return result;
600     currentX = x[conditioningDimension];
601     currentH = bandwidth_[conditioningDimension];
602     pdfConditioned = 0.0;
603     cdfConditioned = 0.0;
604     for (UnsignedInteger i = 0; i < size; ++i)
605     {
606       cdfConditioned += atomsValues[i] * p_kernel_->computeCDF((currentX - sample_(i, conditioningDimension)) / currentH);
607       atomsValues[i] *= p_kernel_->computePDF((currentX - sample_(i, conditioningDimension)) / currentH) / currentH;
608       pdfConditioned += atomsValues[i];
609     }
610     result[conditioningDimension] = cdfConditioned / pdfConditioning;
611     pdfConditioning = pdfConditioned;
612   } // conditioningDimension
613   return result;
614 }
615 
616 /* Get the i-th marginal distribution */
getMarginal(const UnsignedInteger i) const617 Distribution KernelMixture::getMarginal(const UnsignedInteger i) const
618 {
619   const UnsignedInteger dimension = getDimension();
620   if (i >= dimension) throw InvalidArgumentException(HERE) << "The index of a marginal distribution must be in the range [0, dim-1]";
621   // Special case for dimension 1
622   if (dimension == 1) return clone();
623   // General case
624   KernelMixture::Implementation marginal(new KernelMixture(*p_kernel_, Point(1, bandwidth_[i]), sample_.getMarginal(i)));
625   marginal->setDescription(Description(1, getDescription()[i]));
626   return marginal;
627 }
628 
629 /* Get the distribution of the marginal distribution corresponding to indices dimensions */
getMarginal(const Indices & indices) const630 Distribution KernelMixture::getMarginal(const Indices & indices) const
631 {
632   const UnsignedInteger dimension = getDimension();
633   if (!indices.check(dimension)) throw InvalidArgumentException(HERE) << "The indices of a marginal distribution must be in the range [0, dim-1] and must be different";
634   // Special case for dimension 1
635   if (dimension == 1) return clone();
636   // General case
637   const UnsignedInteger marginalDimension = indices.getSize();
638   Description description(getDescription());
639   Description marginalDescription(marginalDimension);
640   Point marginalBandwidth(marginalDimension);
641   for (UnsignedInteger i = 0; i < marginalDimension; ++i)
642   {
643     const UnsignedInteger index_i = indices[i];
644     marginalBandwidth[i] = bandwidth_[index_i];
645     marginalDescription[i] = description[index_i];
646   }
647   KernelMixture::Implementation marginal(new KernelMixture(*p_kernel_, marginalBandwidth, sample_.getMarginal(indices)));
648   marginal->setDescription(marginalDescription);
649   return marginal;
650 }
651 
652 /* Compute the mean of the KernelMixture
653  * PDF(x) = C\sum_{i=1}^N\prod_{j=1}^n K((X^i_j-x_j)/h_j),
654  * where:
655  * C = \frac{1}{N\prod_{k=1}^n h_k}
656  * mu_j = \int_{-\infty}^{\infty} x_j\frac{1}{Nh_j}\sum_{i=1}^N K((x_j - X^i_j) / h_j) dx_j
657  = \int_{-\infty}^{\infty} \frac{1}{Nh_j}\sum_{i=1}^N(h_jt + X^i_j)K(t) h_j dt
658  = \frac{1}{Nh_j}\sum_{i=1}^NX^i_j + \int_{-\infty}^{\infty} \frac{1}{N}\sum_{i=1}^Nh_jtK(t) dt
659  = mu_sample_j + h_j mu_K
660 */
computeMean() const661 void KernelMixture::computeMean() const
662 {
663   // We know that the kernel is 1D, so its mean value is actually a scalar
664   const Scalar meanKernel = p_kernel_->getMean()[0];
665   mean_ = sample_.computeMean();
666   isAlreadyComputedMean_ = true;
667   // Special case for symmetric kernel
668   if (meanKernel == 0.0) return;
669   // General case
670   mean_ += meanKernel * bandwidth_;
671 }
672 
673 /* Compute the covariance of the KernelMixture
674    Covariance(KernelMixture) = (1-1/N) Covariance(sample) + Covariance(kernel) * diag(bandwidth[i]^2)
675 */
computeCovariance() const676 void KernelMixture::computeCovariance() const
677 {
678   const UnsignedInteger dimension = getDimension();
679   // We know that the kernel is 1D, so its standard deviation is actually a scalar
680   const Scalar sigmaKernel = p_kernel_->getStandardDeviation()[0];
681   // Covariance(sample) term, with the proper scaling
682   covariance_ = CovarianceMatrix(dimension, Collection<Scalar>(sample_.computeCovariance().getImplementation()->operator*(1.0 - 1.0 / sample_.getSize())));
683   // Add the diagonal kernel covariance contribution
684   for (UnsignedInteger i = 0; i < dimension; ++i)
685     covariance_(i, i) += std::pow(bandwidth_[i] * sigmaKernel, 2);
686   isAlreadyComputedCovariance_ = true;
687 }
688 
689 /* Get the standard deviation of the distribution. We don't use the square root of the covariance since it involves a O(dim^2) computation where only a O(dim) computation is required.
690    std = [var_sample + h^2 var_K]^(1/2)
691 */
getStandardDeviation() const692 Point KernelMixture::getStandardDeviation() const
693 {
694   const UnsignedInteger dimension = getDimension();
695   // We know that the kernel is 1D, so its standard deviation is actually a scalar
696   const Scalar sigmaKernel = p_kernel_->getStandardDeviation()[0];
697   Point result(sample_.computeCenteredMoment(2));
698   for (UnsignedInteger i = 0; i < dimension; ++i)
699     result[i] = std::sqrt(result[i] + std::pow(bandwidth_[i] * sigmaKernel, 2));
700   return result;
701 }
702 
703 /* Get the skewness of the distribution:
704    skew = [skew_sample * std_sample^3 + h^3 * skew_K * std_K^3] / std^3
705 */
getSkewness() const706 Point KernelMixture::getSkewness() const
707 {
708   const UnsignedInteger dimension = getDimension();
709   // We know that the kernel is 1D, so its standard deviation is actually a scalar
710   const Scalar sigmaKernel = p_kernel_->getStandardDeviation()[0];
711   // We know that the kernel is 1D, so its skewness is actually a scalar
712   const Scalar skewnessKernel = p_kernel_->getSkewness()[0];
713   // Standard deviation of the KernelMixture
714   const Point sigma(getStandardDeviation());
715   Point result(sample_.computeCenteredMoment(3));
716   for (UnsignedInteger i = 0; i < dimension; ++i)
717     result[i] = (result[i] + std::pow(bandwidth_[i] * sigmaKernel, 3) * skewnessKernel) / std::pow(sigma[i], 3);
718   return result;
719 }
720 
721 /* Get the kurtosis of the distribution:
722    kurt = [kurt_sample * std_sample^4 + h^4 * kurt_K * std_K^4 + 6 * h^2 * var_sample * var_K] / std^4
723 */
getKurtosis() const724 Point KernelMixture::getKurtosis() const
725 {
726   const UnsignedInteger dimension = getDimension();
727   // We know that the kernel is 1D, so its standard deviation is actually a scalar
728   const Scalar sigmaKernel = p_kernel_->getStandardDeviation()[0];
729   // We know that the kernel is 1D, so its skewness is actually a scalar
730   const Scalar kurtosisKernel = p_kernel_->getKurtosis()[0];
731   // Standard deviation of the sample
732   const Point varSample(sample_.computeCenteredMoment(2));
733   // Standard deviation of the KernelMixture
734   const Point sigma(getStandardDeviation());
735   Point result(sample_.computeCenteredMoment(4));
736   for (UnsignedInteger i = 0; i < dimension; ++i)
737     result[i] = (result[i] + std::pow(bandwidth_[i] * sigmaKernel, 4) * kurtosisKernel + 6.0 * varSample[i] * std::pow(bandwidth_[i] * sigmaKernel, 2)) / std::pow(sigma[i], 4);
738   return result;
739 }
740 
741 /* Parameters value and description accessor */
getParametersCollection() const742 KernelMixture::PointWithDescriptionCollection KernelMixture::getParametersCollection() const
743 {
744   const UnsignedInteger dimension = getDimension();
745   const UnsignedInteger size = sample_.getSize();
746   PointWithDescriptionCollection parameters(dimension + (dimension > 1 ? 1 : 0));
747   // The marginal parameters : the sample and the bandwidth
748   for (UnsignedInteger i = 0; i < dimension; ++i)
749   {
750     PointWithDescription marginalParameters(size + 1);
751     Description description(marginalParameters.getDimension());
752     for (UnsignedInteger j = 0; j < size; ++j)
753     {
754       marginalParameters[j] = sample_(j, i);
755       if (dimension > 1) description[j] = (OSS() << "x_" << j << "^" << i);
756       else description[j] = (OSS() << "x_" << j);
757     }
758     marginalParameters[size] = bandwidth_[i];
759     if (dimension > 1) description[size] = (OSS() << "h_" << i);
760     else description[size] = "h";
761     marginalParameters.setDescription(description);
762     parameters[i] = marginalParameters;
763   }
764   // The dependence parameters is the union of all the parameters as they all contribute to the copula, presented in a different way
765   if (dimension > 1)
766   {
767     PointWithDescription dependence(dimension * (size + 1));
768     Description description(dependence.getDimension());
769     UnsignedInteger index = 0;
770     for (UnsignedInteger i = 0; i < size; ++i)
771       for (UnsignedInteger j = 0; j < dimension; ++j)
772       {
773         dependence[index] = sample_(i, j);
774         description[index] = (OSS() << "x_" << i << "^" << j);
775         ++index;
776       }
777     for (UnsignedInteger i = 0; i < dimension; ++i)
778     {
779       dependence[index] = bandwidth_[i];
780       description[index] = (OSS() << "h_" << i);
781       ++index;
782     }
783     dependence.setDescription(description);
784     parameters[dimension] = dependence;
785   }
786   return parameters;
787 } // getParametersCollection
788 
789 
getParameter() const790 Point KernelMixture::getParameter() const
791 {
792   const UnsignedInteger size = sample_.getSize();
793   Point parameter;
794   for (UnsignedInteger i = 0; i < size; ++ i)
795   {
796     parameter.add(sample_[i]);
797   }
798   parameter.add(bandwidth_);
799   return parameter;
800 }
801 
getParameterDescription() const802 Description KernelMixture::getParameterDescription() const
803 {
804   const UnsignedInteger dimension = getDimension();
805   const UnsignedInteger size = sample_.getSize();
806   Description description;
807   for (UnsignedInteger i = 0; i < size; ++ i)
808   {
809     if (dimension > 1)
810       for (UnsignedInteger j = 0; j < dimension; ++ j)
811         description.add(OSS() << "x_" << i << "^" << j);
812     else
813       description.add(OSS() << "x_" << i);
814   }
815 
816   if (dimension > 1)
817     for (UnsignedInteger j = 0; j < dimension; ++ j)
818       description.add(OSS() << "h_" << j);
819   else
820     description.add("h");
821   return description;
822 }
823 
824 
setParameter(const Point & parameter)825 void KernelMixture::setParameter(const Point & parameter)
826 {
827   const UnsignedInteger dimension = getDimension();
828   const UnsignedInteger size = sample_.getSize();
829   if (parameter.getDimension() != (dimension * (size + 1))) throw InvalidArgumentException(HERE) << "Expected " << (dimension * (size + 1)) << " parameters";
830   UnsignedInteger index = 0;
831   for (UnsignedInteger i = 0; i < size; ++ i)
832   {
833     for (UnsignedInteger j = 0; j < dimension; ++ j)
834     {
835       sample_(i, j) = parameter[index];
836       ++ index;
837     }
838   }
839   for (UnsignedInteger j = 0; j < dimension; ++ j)
840   {
841     bandwidth_[j] = parameter[index];
842     ++ index;
843   }
844   // Use the constructor in order to recompute the PDF/CDF approximation if needed
845   const Scalar w = getWeight();
846   *this = KernelMixture(*p_kernel_, bandwidth_, sample_);
847   setWeight(w);
848 }
849 
850 
851 /* Check if the distribution is elliptical */
isElliptical() const852 Bool KernelMixture::isElliptical() const
853 {
854   // No chance to have something symmetrical if sample size > 2
855   if (sample_.getSize() > 2) return false;
856   // In dimension 1, elliptical == symmetric
857   if (getDimension() == 1) return p_kernel_->isElliptical();
858   // In dimension > 1, only samples with 1 point and Normal kernels lead to an elliptical distribution
859   return (sample_.getSize() == 1) && (p_kernel_->getClassName() == "Normal");
860 }
861 
862 /* Check if the distribution is continuos */
isContinuous() const863 Bool KernelMixture::isContinuous() const
864 {
865   return p_kernel_->isContinuous();
866 }
867 
868 /* Tell if the distribution has elliptical copula */
hasEllipticalCopula() const869 Bool KernelMixture::hasEllipticalCopula() const
870 {
871   // In 1D, all the distributions have an elliptical copula
872   if (getDimension() == 1) return true;
873   return false;
874 }
875 
876 /* Tell if the distribution has independent copula */
hasIndependentCopula() const877 Bool KernelMixture::hasIndependentCopula() const
878 {
879   // In 1D, all the distributions have an independent copula
880   return (getDimension() == 1);
881 }
882 
883 /* Method save() stores the object through the StorageManager */
save(Advocate & adv) const884 void KernelMixture::save(Advocate & adv) const
885 {
886   ContinuousDistribution::save(adv);
887   adv.saveAttribute( "kernel_", Distribution(p_kernel_) );
888   adv.saveAttribute( "bandwidth_", bandwidth_ );
889   adv.saveAttribute( "bandwidthInverse_", bandwidthInverse_ );
890   adv.saveAttribute( "normalizationFactor_", normalizationFactor_ );
891   adv.saveAttribute( "sample_", sample_ );
892   adv.saveAttribute( "pdfApproximationCDF_", pdfApproximationCDF_ );
893   adv.saveAttribute( "cdfApproximation_", cdfApproximation_ );
894   adv.saveAttribute( "pdfApproximationCCDF_", pdfApproximationCCDF_ );
895   adv.saveAttribute( "ccdfApproximation_", ccdfApproximation_ );
896   adv.saveAttribute( "useApproximatePDFCDF_", useApproximatePDFCDF_ );
897 }
898 
899 /* Method load() reloads the object from the StorageManager */
load(Advocate & adv)900 void KernelMixture::load(Advocate & adv)
901 {
902   ContinuousDistribution::load(adv);
903   Distribution kernel;
904   adv.loadAttribute( "kernel_", kernel );
905   p_kernel_ = kernel.getImplementation();
906   adv.loadAttribute( "bandwidth_", bandwidth_ );
907   adv.loadAttribute( "bandwidthInverse_", bandwidthInverse_ );
908   adv.loadAttribute( "normalizationFactor_", normalizationFactor_ );
909   adv.loadAttribute( "sample_", sample_ );
910   adv.loadAttribute( "pdfApproximationCDF_", pdfApproximationCDF_ );
911   adv.loadAttribute( "cdfApproximation_", cdfApproximation_ );
912   adv.loadAttribute( "pdfApproximationCCDF_", pdfApproximationCCDF_ );
913   adv.loadAttribute( "ccdfApproximation_", ccdfApproximation_ );
914   adv.loadAttribute( "useApproximatePDFCDF_", useApproximatePDFCDF_ );
915   computeRange();
916 }
917 
918 END_NAMESPACE_OPENTURNS
919