1 //                                               -*- C++ -*-
2 /**
3  *  @brief The UserDefined distribution
4  *
5  *  Copyright 2005-2021 Airbus-EDF-IMACS-ONERA-Phimeca
6  *
7  *  This library is free software: you can redistribute it and/or modify
8  *  it under the terms of the GNU Lesser General Public License as published by
9  *  the Free Software Foundation, either version 3 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This library is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU Lesser General Public License for more details.
16  *
17  *  You should have received a copy of the GNU Lesser General Public License
18  *  along with this library.  If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 #include <cstdlib>
22 #include <cmath>
23 #include "openturns/UserDefined.hxx"
24 #include "openturns/RandomGenerator.hxx"
25 #include "openturns/SpecFunc.hxx"
26 #include "openturns/DistFunc.hxx"
27 #include "openturns/PersistentObjectFactory.hxx"
28 
29 BEGIN_NAMESPACE_OPENTURNS
30 
31 CLASSNAMEINIT(UserDefined)
32 static const Factory<UserDefined> Factory_UserDefined;
33 
34 
35 /* Default constructor */
UserDefined()36 UserDefined::UserDefined()
37   : DiscreteDistribution()
38   , points_(1, 1)
39   , probabilities_(1, 1.0)
40   , cumulativeProbabilities_(1, 1.0)
41   , hasUniformWeights_(true)
42   , base_(0)
43   , alias_(0)
44 {
45   setName("UserDefined");
46   setData(Sample(1, 1), Point(1, 1.0));
47 }
48 
49 /* Constructor from a sample */
UserDefined(const Sample & sample)50 UserDefined::UserDefined(const Sample & sample)
51   : DiscreteDistribution()
52   , points_(0, 0)
53   , probabilities_(0)
54   , cumulativeProbabilities_(0)
55   , hasUniformWeights_(true)
56   , base_(0)
57   , alias_(0)
58 {
59   setName("UserDefined");
60   const UnsignedInteger size = sample.getSize();
61   // We set the dimension of the UserDefined distribution
62   // This call set also the range
63   setData(sample, Point(size, 1.0 / size));
64   if ((getDimension() == 1) || (sample.getSize() <= ResourceMap::GetAsUnsignedInteger("UserDefined-SmallSize"))) compactSupport();
65   if(!sample.getDescription().isBlank()) setDescription(sample.getDescription());
66 }
67 
68 /* Constructor from a sample and the associated weights */
UserDefined(const Sample & sample,const Point & weights)69 UserDefined::UserDefined(const Sample & sample,
70                          const Point & weights)
71   : DiscreteDistribution()
72   , points_(0, 0)
73   , probabilities_(0)
74   , cumulativeProbabilities_(0)
75   , hasUniformWeights_(false)
76   , base_(0)
77   , alias_(0)
78 {
79   setName("UserDefined");
80   // We set the dimension of the UserDefined distribution
81   // This call set also the range
82   setData(sample, weights);
83   if ((getDimension() == 1) || (sample.getSize() <= ResourceMap::GetAsUnsignedInteger("UserDefined-SmallSize"))) compactSupport();
84   if(!sample.getDescription().isBlank()) setDescription(sample.getDescription());
85 }
86 
87 /* Comparison operator */
operator ==(const UserDefined & other) const88 Bool UserDefined::operator ==(const UserDefined & other) const
89 {
90   if (this == &other) return true;
91   return (points_ == other.points_) && (probabilities_ == other.probabilities_);
92 }
93 
equals(const DistributionImplementation & other) const94 Bool UserDefined::equals(const DistributionImplementation & other) const
95 {
96   const UserDefined* p_other = dynamic_cast<const UserDefined*>(&other);
97   return p_other && (*this == *p_other);
98 }
99 
100 /* String converter */
__repr__() const101 String UserDefined::__repr__() const
102 {
103   OSS oss;
104   oss << "class=" << UserDefined::GetClassName()
105       << " name=" << getName()
106       << " dimension=" << getDimension()
107       << " points=" << points_
108       << " probabilities=" << probabilities_;
109   return oss;
110 }
111 
__str__(const String &) const112 String UserDefined::__str__(const String & ) const
113 {
114   OSS oss;
115   oss << getClassName() << "(";
116   String separator("");
117   for (UnsignedInteger i = 0; i < points_.getSize(); ++i)
118   {
119     oss << separator << "{x = " << Point(points_[i]).__str__() << ", p = " << probabilities_[i] << "}";
120     separator = ", ";
121   }
122   oss << ")";
123   return oss;
124 }
125 
126 /* Virtual constructor */
clone() const127 UserDefined * UserDefined::clone() const
128 {
129   return new UserDefined(*this);
130 }
131 
132 /* Get one realization of the distribution */
getRealization() const133 Point UserDefined::getRealization() const
134 {
135   const UnsignedInteger size = points_.getSize();
136   UnsignedInteger index = 0;
137   // Efficient algorithm for uniform weights
138   if (hasUniformWeights_) index = RandomGenerator::IntegerGenerate(size);
139   // Alias method for nonuniform weights
140   else
141   {
142     index = base_.getSize() ? DistFunc::rDiscrete(base_, alias_) : DistFunc::rDiscrete(probabilities_, base_, alias_);
143   }
144   return points_[index];
145 }
146 
147 /* Get a sample of the distribution */
getSample(const UnsignedInteger size) const148 Sample UserDefined::getSample(const UnsignedInteger size) const
149 {
150   const UnsignedInteger supportSize = points_.getSize();
151   Indices indices;
152   // Efficient algorithm for uniform weights
153   if (hasUniformWeights_)
154   {
155     Collection<UnsignedInteger> values(RandomGenerator::IntegerGenerate(size, supportSize));
156     indices = Indices(values.begin(), values.end());
157   }
158   // Alias method for nonuniform weights
159   else
160   {
161     if (!base_.getSize())
162       (void) DistFunc::rDiscrete(probabilities_, base_, alias_);
163     indices = DistFunc::rDiscrete(base_, alias_, size);
164   }
165   return points_.select(indices);
166 }
167 
168 /* Get the PDF of the distribution */
computePDF(const Point & point) const169 Scalar UserDefined::computePDF(const Point & point) const
170 {
171   const UnsignedInteger dimension = getDimension();
172   if (point.getDimension() != dimension) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << dimension << ", here dimension=" << point.getDimension();
173 
174   const UnsignedInteger size = points_.getSize();
175   Scalar pdf = 0.0;
176   // Quick search for 1D case
177   const Scalar x = point[0];
178   UnsignedInteger upper = size - 1;
179   Scalar xUpper = points_(upper, 0);
180   if (x > xUpper + supportEpsilon_) return 0.0;
181   UnsignedInteger lower = 0;
182   Scalar xLower = points_(lower, 0);
183   if (x < xLower - supportEpsilon_) return 0.0;
184   // Use bisection search of the correct index
185   while (upper - lower > 1)
186   {
187     // The integer arithmetic ensure that middle will be strictly between lower and upper as far as upper - lower > 1
188     const UnsignedInteger middle = (upper + lower) / 2;
189     const Scalar xMiddle = points_(middle, 0);
190     if (xMiddle > x + supportEpsilon_)
191     {
192       upper = middle;
193       xUpper = xMiddle;
194     }
195     else
196     {
197       lower = middle;
198       xLower = xMiddle;
199     }
200   } // while
201   // At this point we have upper == lower or upper == lower + 1, with lower - epsilon <= x < upper + epsilon
202   SignedInteger index = upper;
203   while ((index < static_cast<SignedInteger>(size)) && (std::abs(x - points_(index, 0)) <= supportEpsilon_))
204   {
205     if ((point - points_[index]).norm() <= supportEpsilon_) pdf += probabilities_[index];
206     ++ index;
207   }
208   index = upper;
209   --index;
210   while ((index >= 0) && (std::abs(x - points_(index, 0)) <= supportEpsilon_))
211   {
212     if ((point - points_[index]).norm() <= supportEpsilon_) pdf += probabilities_[index];
213     --index;
214   }
215   return pdf;
216 }
217 
218 /* Get the CDF of the distribution */
computeCDF(const Point & point) const219 Scalar UserDefined::computeCDF(const Point & point) const
220 {
221   if (point.getDimension() != getDimension()) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << getDimension() << ", here dimension=" << point.getDimension();
222 
223   Scalar cdf = 0.0;
224   const UnsignedInteger size = points_.getSize();
225   const UnsignedInteger dimension = getDimension();
226   // Quick search for 1D case
227   if (dimension == 1)
228   {
229     const Scalar x = point[0];
230     UnsignedInteger upper = size - 1;
231     Scalar xUpper = points_(upper, 0);
232     if (x > xUpper - supportEpsilon_) return 1.0;
233     UnsignedInteger lower = 0;
234     Scalar xLower = points_(lower, 0);
235     if (x <= xLower - supportEpsilon_) return 0.0;
236     // Use dichotomic search of the correct index
237     while (upper - lower > 1)
238     {
239       // The integer arithmetic insure that middle will be strictly between lower and upper as far as upper - lower > 1
240       const UnsignedInteger middle = (upper + lower) / 2;
241       const Scalar xMiddle = points_(middle, 0);
242       if (xMiddle > x + supportEpsilon_)
243       {
244         upper = middle;
245         xUpper = xMiddle;
246       }
247       else
248       {
249         lower = middle;
250         xLower = xMiddle;
251       }
252     } // while
253     // At this point we have upper == lower or upper == lower + 1, with lower - epsilon <= x < upper + epsilon
254     // If xLower < x < xUpper, the contribution of lower must be taken into account, else it
255     // must be discarded
256     if (x <= xUpper - supportEpsilon_) return cumulativeProbabilities_[lower];
257     return cumulativeProbabilities_[upper];
258   }
259   // Dimension > 1
260   for (UnsignedInteger i = 0; i < size; ++i)
261   {
262     const Point x(points_[i]);
263     UnsignedInteger j = 0;
264     while ((j < dimension) && (x[j] <= point[j] + supportEpsilon_)) ++j;
265     if (j == dimension) cdf += probabilities_[i];
266   }
267   return cdf;
268 }
269 
270 /* Get the PDF gradient of the distribution */
computePDFGradient(const Point & point) const271 Point UserDefined::computePDFGradient(const Point & point) const
272 {
273   if (point.getDimension() != getDimension()) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << getDimension() << ", here dimension=" << point.getDimension();
274 
275   const UnsignedInteger size = points_.getSize();
276   const UnsignedInteger dimension = getDimension();
277   Point pdfGradient((dimension + 1) * size, 0.0);
278   for (UnsignedInteger i = 0; i < size; ++i)
279   {
280     if ((point - points_[i]).norm() < supportEpsilon_)
281     {
282       pdfGradient[i] = 1.0;
283       break;
284     }
285   }
286   return pdfGradient;
287 }
288 
289 
290 /* Get the CDF gradient of the distribution */
computeCDFGradient(const Point & point) const291 Point UserDefined::computeCDFGradient(const Point & point) const
292 {
293   if (point.getDimension() != getDimension()) throw InvalidArgumentException(HERE) << "Error: the given point must have dimension=" << getDimension() << ", here dimension=" << point.getDimension();
294 
295   const UnsignedInteger size = points_.getSize();
296   const UnsignedInteger dimension = getDimension();
297   Point cdfGradient((dimension + 1) * size, 0.0);
298   for (UnsignedInteger i = 0; i < size; ++i)
299   {
300     const Point x(points_[i]);
301     UnsignedInteger j = 0;
302     while ((j < dimension) && (point[j] <= x[j])) ++j;
303     if (j == dimension) cdfGradient[i] = 1.0;
304   }
305   return cdfGradient;
306 }
307 
308 /* Compute the numerical range of the distribution given the parameters values */
computeRange()309 void UserDefined::computeRange()
310 {
311   const UnsignedInteger size = points_.getSize();
312   const UnsignedInteger dimension = getDimension();
313   // Return an empty interval for the empty collection case
314   if (size == 0)
315   {
316     setRange(Interval(Point(dimension, 1.0), Point(dimension, 0.0)));
317     return;
318   }
319   // The number of points is finite, so are the bounds
320   const Interval::BoolCollection finiteLowerBound(dimension, true);
321   const Interval::BoolCollection finiteUpperBound(dimension, true);
322   Point lowerBound(points_[0]);
323   Point upperBound(lowerBound);
324   for (UnsignedInteger i = 1; i < size; ++i)
325   {
326     const Point pt(points_[i]);
327     for (UnsignedInteger j = 0; j < dimension; ++j)
328     {
329       const Scalar x = pt[j];
330       if (x < lowerBound[j]) lowerBound[j] = x;
331       if (x > upperBound[j]) upperBound[j] = x;
332     }
333   }
334   setRange(Interval(lowerBound, upperBound, finiteLowerBound, finiteUpperBound));
335 }
336 
337 /* Get the support of a discrete distribution that intersect a given interval */
getSupport(const Interval & interval) const338 Sample UserDefined::getSupport(const Interval & interval) const
339 {
340   if (interval.getDimension() != getDimension()) throw InvalidArgumentException(HERE) << "Error: the given interval has a dimension that does not match the distribution dimension.";
341   Sample result(0, getDimension());
342   const UnsignedInteger size = points_.getSize();
343   for (UnsignedInteger i = 0; i < size; ++i)
344   {
345     const Point x(points_[i]);
346     if (interval.contains(x)) result.add(x);
347   }
348   return result;
349 }
350 
351 /* Tell if the distribution is integer valued */
isIntegral() const352 Bool UserDefined::isIntegral() const
353 {
354   if (getDimension() != 1) return false;
355   const UnsignedInteger size = points_.getSize();
356   for (UnsignedInteger i = 0; i < size; ++i)
357   {
358     const Scalar x = points_(i, 0);
359     if (std::abs(x - round(x)) >= supportEpsilon_) return false;
360   }
361   return true;
362 }
363 
364 /* Compute the mean of the distribution */
computeMean() const365 void UserDefined::computeMean() const
366 {
367   const UnsignedInteger size = points_.getSize();
368   Point mean(getDimension());
369   for (UnsignedInteger i = 0; i < size; ++i) mean += probabilities_[i] * points_[i];
370   mean_ = mean;
371   isAlreadyComputedMean_ = true;
372 }
373 
374 /* Compute the covariance of the distribution */
computeCovariance() const375 void UserDefined::computeCovariance() const
376 {
377   const UnsignedInteger size = points_.getSize();
378   const UnsignedInteger dimension = getDimension();
379   covariance_ = CovarianceMatrix(dimension);
380   for (UnsignedInteger i = 0; i < dimension; ++i) covariance_(i, i) = 0.0;
381   const Point mean(getMean());
382   for (UnsignedInteger k = 0; k < size; ++k)
383   {
384     const Point xK(points_[k] - mean);
385     const Scalar pK = probabilities_[k];
386     for (UnsignedInteger i = 0; i < dimension; ++i)
387       for (UnsignedInteger j = 0; j <= i; ++j)
388         covariance_(i, j) += pK * xK[i] * xK[j];
389   }
390   isAlreadyComputedCovariance_ = true;
391 }
392 
393 /* Parameters value and description accessor */
getParametersCollection() const394 UserDefined::PointWithDescriptionCollection UserDefined::getParametersCollection() const
395 {
396   const UnsignedInteger dimension = getDimension();
397   PointWithDescriptionCollection parameters(dimension + 1);
398   const UnsignedInteger size = points_.getSize();
399   // Loop over the dimension to extract the marginal coordinates of the support
400   for (UnsignedInteger i = 0; i < dimension; ++i)
401   {
402     PointWithDescription point(size);
403     Description description(size);
404     for (UnsignedInteger j = 0; j < size; ++j)
405     {
406       point[j] = points_(j, i);
407       OSS oss;
408       oss << "X^" << i << "_" << j;
409       description[j] = oss;
410     }
411     point.setDescription(description);
412     parameters[i] = point;
413   }
414   // Loop over the size to extract the probabilities, seen as the dependence parameters
415   PointWithDescription point(size);
416   Description description(size);
417   for (UnsignedInteger i = 0; i < size; ++i)
418   {
419     point[i] = probabilities_[i];
420     OSS oss;
421     oss << "probabilities_" << i;
422     description[i] = oss;
423   }
424   point.setDescription(description);
425   point.setName(getDescription()[0]);
426   parameters[dimension] = point;
427   return parameters;
428 }
429 
430 /* Parameters value accessor */
getParameter() const431 Point UserDefined::getParameter() const
432 {
433   const UnsignedInteger dimension = getDimension();
434   const UnsignedInteger size = points_.getSize();
435   Point point((dimension + 1) * size);
436   for (UnsignedInteger i = 0; i < dimension; ++ i)
437   {
438     for (UnsignedInteger j = 0; j < size; ++ j)
439     {
440       point[i * size + j] = points_(j, i);
441     }
442   }
443   for (UnsignedInteger i = 0; i < size; ++ i)
444   {
445     point[dimension * size + i] = probabilities_[i];
446   }
447   return point;
448 }
449 
450 /* Parameters description accessor */
getParameterDescription() const451 Description UserDefined::getParameterDescription() const
452 {
453   const UnsignedInteger dimension = getDimension();
454   const UnsignedInteger size = points_.getSize();
455   Description description((dimension + 1) * size);
456   for (UnsignedInteger i = 0; i < dimension; ++ i)
457   {
458     for (UnsignedInteger j = 0; j < size; ++ j)
459     {
460       description[i * size + j] = (OSS() << "X^" << i << "_" << j);
461     }
462   }
463   for (UnsignedInteger i = 0; i < size; ++ i)
464   {
465     description[dimension * size + i] = (OSS() << "probabilities_" << i);
466   }
467   return description;
468 }
469 
setParameter(const Point & parameter)470 void UserDefined::setParameter(const Point & parameter)
471 {
472   const UnsignedInteger dimension = getDimension();
473   const UnsignedInteger size = points_.getSize();
474   if (parameter.getSize() != (dimension + 1) * size)
475     throw InvalidArgumentException(HERE) << "Expected " << (dimension + 1) * size << " parameters";
476   for (UnsignedInteger i = 0; i < dimension; ++ i)
477   {
478     for (UnsignedInteger j = 0; j < size; ++ j)
479     {
480       points_(j, i) = parameter[i * size + j];
481     }
482   }
483   for (UnsignedInteger i = 0; i < size; ++ i)
484   {
485     probabilities_[i] = parameter[dimension * size + i];
486   }
487   setData(points_, probabilities_);
488 }
489 
490 /* Get the i-th marginal distribution */
getMarginal(const UnsignedInteger i) const491 Distribution UserDefined::getMarginal(const UnsignedInteger i) const
492 {
493   const UnsignedInteger dimension = getDimension();
494   if (i >= dimension) throw InvalidArgumentException(HERE) << "The index of a marginal distribution must be in the range [0, dim-1]";
495   // Special case for dimension 1
496   if (dimension == 1) return clone();
497   // General case
498   UserDefined::Implementation marginal(new UserDefined(points_.getMarginal(i), probabilities_));
499   marginal->setDescription(Description(1, getDescription()[i]));
500   return marginal;
501 }
502 
503 /* Get the distribution of the marginal distribution corresponding to indices dimensions */
getMarginal(const Indices & indices) const504 Distribution UserDefined::getMarginal(const Indices & indices) const
505 {
506   const UnsignedInteger dimension = getDimension();
507   if (!indices.check(dimension)) throw InvalidArgumentException(HERE) << "The indices of a marginal distribution must be in the range [0, dim-1] and must be different";
508   // Special case for dimension 1
509   if (dimension == 1) return clone();
510   // General case
511   const UnsignedInteger outputDimension = indices.getSize();
512   Description description(getDescription());
513   Description marginalDescription(outputDimension);
514   for (UnsignedInteger i = 0; i < outputDimension; ++i)
515   {
516     const UnsignedInteger index_i = indices[i];
517     marginalDescription[i] = description[index_i];
518   }
519   UserDefined::Implementation marginal(new UserDefined(points_.getMarginal(indices), probabilities_));
520   marginal->setDescription(marginalDescription);
521   return marginal;
522 } // getMarginal(Indices)
523 
524 /* Interface specific to UserDefined */
525 
setData(const Sample & sample,const Point & weights)526 void UserDefined::setData(const Sample & sample,
527                           const Point & weights)
528 {
529   const UnsignedInteger size = sample.getSize();
530   if (size == 0) throw InvalidArgumentException(HERE) << "Error: the collection is empty";
531   if (weights.getDimension() != size) throw InvalidArgumentException(HERE) << "Error: cannot build a UserDefined distribution if the weights don't have the same dimension as the sample size.";
532   hasUniformWeights_ = true;
533   const UnsignedInteger dimension = sample[0].getDimension();
534   if (dimension == 0) throw InvalidArgumentException(HERE) << "Error: the points in the collection must have a dimension > 0";
535   // Check if all the given probabilities are >= 0
536   // Check if all the points have the same dimension
537   for (UnsignedInteger i = 1; i < size; ++i) if (sample[i].getDimension() != dimension) throw InvalidArgumentException(HERE) << "UserDefined distribution must have all its point with the same dimension";
538   setDimension(dimension);
539   // First, sort the collection such that the sample made with the first component is in ascending order
540   Sample weightedData(size, dimension + 1);
541   for (UnsignedInteger i = 0; i < size; ++i)
542   {
543     const Point x(sample[i]);
544     for (UnsignedInteger j = 0; j < dimension; ++j) weightedData(i, j) = x[j];
545     weightedData(i, dimension) = weights[i];
546   }
547   // Sort the pairs
548   weightedData = weightedData.sortAccordingToAComponent(0);
549   // Check the probabilities and normalize them
550   const Scalar firstProbability = weightedData(0, dimension);
551   Scalar sum = 0.0;
552   cumulativeProbabilities_ = Point(size);
553   for (UnsignedInteger i = 0; i < size; ++i)
554   {
555     const Scalar p = weightedData(i, dimension);
556     if (!(p >= 0.0)) throw InvalidArgumentException(HERE) << "UserDefined distribution must have positive probabilities";
557     sum += p;
558     cumulativeProbabilities_[i] = sum;
559     hasUniformWeights_ = hasUniformWeights_ && (std::abs(p - firstProbability) < pdfEpsilon_);
560   }
561   if (sum <= 0.0) throw InvalidArgumentException(HERE) << "Error: the sum of probabilities is zero.";
562   // Normalize the probabilities
563   for (UnsignedInteger i = 0; i < size; ++i)
564   {
565     weightedData(i, dimension) /= sum;
566     cumulativeProbabilities_[i] /= sum;
567   }
568   points_ = Sample(size, dimension);
569   probabilities_ = Point(size);
570   for (UnsignedInteger i = 0; i < size; ++i)
571   {
572     Point x(dimension);
573     for (UnsignedInteger j = 0; j < dimension; ++j) x[j] = weightedData(i, j);
574     points_[i] = x;
575     probabilities_[i] = std::max(0.0, std::min(1.0, weightedData(i, dimension)));
576   }
577   // We augment slightly the last cumulative probability, which should be equal to 1.0 but we enforce a value > 1.0.
578   cumulativeProbabilities_[size - 1] = 1.0 + 2.0 * supportEpsilon_;
579   isAlreadyComputedMean_ = false;
580   isAlreadyComputedCovariance_ = false;
581   isAlreadyCreatedGeneratingFunction_ = false;
582   computeRange();
583   // trigger reset of alias method
584   base_.clear();
585   alias_.clear();
586 }
587 
588 
getX() const589 Sample UserDefined::getX() const
590 {
591   return points_;
592 }
593 
594 
getP() const595 Point UserDefined::getP() const
596 {
597   return probabilities_;
598 }
599 
600 
601 /* Quantile computation for dimension=1 */
computeScalarQuantile(const Scalar prob,const Bool tail) const602 Scalar UserDefined::computeScalarQuantile(const Scalar prob,
603     const Bool tail) const
604 {
605   if (dimension_ != 1) throw InvalidDimensionException(HERE) << "Error: the method computeScalarQuantile is only defined for 1D distributions";
606   UnsignedInteger index = 0;
607   const Scalar p = tail ? 1 - prob : prob;
608   while (cumulativeProbabilities_[index] < p) ++index;
609   return points_(index, 0);
610 }
611 
612 /* Merge the identical points of the support */
compactSupport(const Scalar epsilon)613 void UserDefined::compactSupport(const Scalar epsilon)
614 {
615   // No compaction if epsilon is negative
616   if (epsilon < 0.0) return;
617   const UnsignedInteger size = points_.getSize();
618   if (size == 0) return;
619   const UnsignedInteger dimension = getDimension();
620   Sample compactX(0, dimension);
621   Point compactP(0);
622   if (dimension > 1)
623   {
624     // Build a hash table with rounded components
625     const UnsignedInteger hashSize = 511;
626     Collection<Indices> indices(hashSize);
627     Collection<Indices> residuals(hashSize);
628     union
629     {
630       Scalar *s;
631       Unsigned64BitsInteger *u64;
632     } typepun;
633     for (UnsignedInteger i = 0; i < size; ++i)
634     {
635       const Point x(points_[i]);
636       Scalar roundedComponent = x[0];
637       if (epsilon > 0.0) roundedComponent = epsilon * round(roundedComponent / epsilon);
638       typepun.s = &roundedComponent;
639       UnsignedInteger index = *typepun.u64;
640       for (UnsignedInteger j = 1; j < dimension; ++j)
641       {
642         roundedComponent = x[j];
643         if (epsilon > 0.0) roundedComponent = epsilon * round(roundedComponent / epsilon);
644         // XOR based hash function on the binary representation of the floating point coordinates
645         typepun.s = &roundedComponent;
646         index ^= *typepun.u64;
647       }
648       const UnsignedInteger hash = index % hashSize;
649       const UnsignedInteger quotient = index / hashSize;
650       indices[hash].add(i);
651       residuals[hash].add(quotient);
652     }
653     // Now, we just have to inspect the points with the same hash
654     for (UnsignedInteger i = 0; i < hashSize; ++i)
655     {
656       const Indices bucket(indices[i]);
657       const Indices keys(residuals[i]);
658       const UnsignedInteger bucketSize = bucket.getSize();
659       if (bucketSize == 0) continue;
660       if (bucketSize == 1)
661       {
662         compactX.add(points_[bucket[0]]);
663         compactP.add(probabilities_[bucket[0]]);
664       }
665       else
666       {
667         // New weights for the points in the bucket
668         Point weights(bucketSize);
669         // Here we have to detect and merge all the duplicates
670         // The ith atom is removed if flagToRemove[i] > 0
671         Indices flagToRemove(bucketSize, 0);
672         for (UnsignedInteger j = 0; j < bucketSize; ++j)
673         {
674           const UnsignedInteger currentIndex = bucket[j];
675           const UnsignedInteger currentKey = keys[j];
676           weights[j] = probabilities_[currentIndex];
677           const Point current(points_[currentIndex]);
678           if (flagToRemove[j] == 0)
679           {
680             for (UnsignedInteger k = j + 1; k < bucketSize; ++k)
681             {
682               const UnsignedInteger candidateIndex = bucket[k];
683               const UnsignedInteger candidateKey = keys[k];
684               const Point candidate(points_[candidateIndex]);
685               if ((currentKey == candidateKey) && (current - candidate).norm() <= epsilon)
686               {
687                 flagToRemove[k] = 1;
688                 weights[j] += probabilities_[candidateIndex];
689               }
690             } // k
691           } // flagToRemove
692         } // j
693         // We keep all the points that must not be removed
694         for (UnsignedInteger j = 0; j < bucketSize; ++j)
695         {
696           if (flagToRemove[j] == 0)
697           {
698             compactX.add(points_[bucket[j]]);
699             compactP.add(weights[j]);
700           }
701         }
702       } // bucketSize > 1
703     } // Loop over the hash table
704     setData(compactX, compactP);
705     return;
706   }
707   Scalar lastLocation = points_(0, 0);
708   Scalar lastWeight = probabilities_[0];
709   for (UnsignedInteger i = 1; i < size; ++i)
710   {
711     const Scalar currentLocation = points_(i, 0);
712     const Scalar currentWeight = probabilities_[i];
713     // The current point must be merged
714     if (std::abs(currentLocation - lastLocation) <= epsilon) lastWeight += probabilities_[i];
715     else
716     {
717       compactX.add(Point(1, lastLocation));
718       compactP.add(std::max(0.0, std::min(1.0, lastWeight)));
719       lastLocation = currentLocation;
720       lastWeight = currentWeight;
721     }
722   }
723   compactX.add(Point(1, lastLocation));
724   compactP.add(std::max(0.0, std::min(1.0, lastWeight)));
725   setData(compactX, compactP);
726 }
727 
728 /* Tell if the distribution has an elliptical copula */
hasEllipticalCopula() const729 Bool UserDefined::hasEllipticalCopula() const
730 {
731   return points_.getSize() == 1;
732 }
733 
734 
735 /* Tell if the distribution has independent copula */
hasIndependentCopula() const736 Bool UserDefined::hasIndependentCopula() const
737 {
738   return (dimension_ == 1) || (points_.getSize() == 1);
739 }
740 
741 
742 /* Method save() stores the object through the StorageManager */
save(Advocate & adv) const743 void UserDefined::save(Advocate & adv) const
744 {
745   DiscreteDistribution::save(adv);
746   adv.saveAttribute( "points_", points_ );
747   adv.saveAttribute( "probabilities_", probabilities_ );
748   adv.saveAttribute( "cumulativeProbabilities_", cumulativeProbabilities_ );
749   adv.saveAttribute( "hasUniformWeights_", hasUniformWeights_ );
750 }
751 
752 /* Method load() reloads the object from the StorageManager */
load(Advocate & adv)753 void UserDefined::load(Advocate & adv)
754 {
755   DiscreteDistribution::load(adv);
756   adv.loadAttribute( "points_", points_ );
757   adv.loadAttribute( "probabilities_", probabilities_ );
758   adv.loadAttribute( "cumulativeProbabilities_", cumulativeProbabilities_ );
759   adv.loadAttribute( "hasUniformWeights_", hasUniformWeights_ );
760   computeRange();
761 }
762 
763 END_NAMESPACE_OPENTURNS
764