1 // negative_binomial_example1.cpp
2 
3 // Copyright Paul A. Bristow 2007, 2010.
4 
5 // Use, modification and distribution are subject to the
6 // Boost Software License, Version 1.0.
7 // (See accompanying file LICENSE_1_0.txt
8 // or copy at http://www.boost.org/LICENSE_1_0.txt)
9 
10 // Example 1 of using negative_binomial distribution.
11 
12 //[negative_binomial_eg1_1
13 
14 /*`
15 Based on [@http://en.wikipedia.org/wiki/Negative_binomial_distribution
16 a problem by Dr. Diane Evans,
17 Professor of Mathematics at Rose-Hulman Institute of Technology].
18 
19 Pat is required to sell candy bars to raise money for the 6th grade field trip.
20 There are thirty houses in the neighborhood,
21 and Pat is not supposed to return home until five candy bars have been sold.
22 So the child goes door to door, selling candy bars.
23 At each house, there is a 0.4 probability (40%) of selling one candy bar
24 and a 0.6 probability (60%) of selling nothing.
25 
26 What is the probability mass (density) function (pdf) for selling the last (fifth)
27 candy bar at the nth house?
28 
29 The Negative Binomial(r, p) distribution describes the probability of k failures
30 and r successes in k+r Bernoulli(p) trials with success on the last trial.
31 (A [@http://en.wikipedia.org/wiki/Bernoulli_distribution Bernoulli trial]
32 is one with only two possible outcomes, success of failure,
33 and p is the probability of success).
34 See also [@ http://en.wikipedia.org/wiki/Bernoulli_distribution Bernoulli distribution]
35 and [@http://www.math.uah.edu/stat/bernoulli/Introduction.xhtml Bernoulli applications].
36 
37 In this example, we will deliberately produce a variety of calculations
38 and outputs to demonstrate the ways that the negative binomial distribution
39 can be implemented with this library: it is also deliberately over-commented.
40 
41 First we need to #define macros to control the error and discrete handling policies.
42 For this simple example, we want to avoid throwing
43 an exception (the default policy) and just return infinity.
44 We want to treat the distribution as if it was continuous,
45 so we choose a discrete_quantile policy of real,
46 rather than the default policy integer_round_outwards.
47 */
48 #define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
49 #define BOOST_MATH_DISCRETE_QUANTILE_POLICY real
50 /*`
51 After that we need some includes to provide easy access to the negative binomial distribution,
52 [caution It is vital to #include distributions etc *after* the above #defines]
53 and we need some std library iostream, of course.
54 */
55 #include <boost/math/distributions/negative_binomial.hpp>
56   // for negative_binomial_distribution
57   using boost::math::negative_binomial; // typedef provides default type is double.
58   using  ::boost::math::pdf; // Probability mass function.
59   using  ::boost::math::cdf; // Cumulative density function.
60   using  ::boost::math::quantile;
61 
62 #include <iostream>
63   using std::cout; using std::endl;
64   using std::noshowpoint; using std::fixed; using std::right; using std::left;
65 #include <iomanip>
66   using std::setprecision; using std::setw;
67 
68 #include <limits>
69   using std::numeric_limits;
70 //] [negative_binomial_eg1_1]
71 
main()72 int main()
73 {
74   cout <<"Selling candy bars - using the negative binomial distribution."
75     << "\nby Dr. Diane Evans,"
76     "\nProfessor of Mathematics at Rose-Hulman Institute of Technology,"
77     << "\nsee http://en.wikipedia.org/wiki/Negative_binomial_distribution\n"
78     << endl;
79   cout << endl;
80   cout.precision(5);
81   // None of the values calculated have a useful accuracy as great this, but
82   // INF shows wrongly with < 5 !
83   // https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=240227
84 //[negative_binomial_eg1_2
85 /*`
86 It is always sensible to use try and catch blocks because defaults policies are to
87 throw an exception if anything goes wrong.
88 
89 A simple catch block (see below) will ensure that you get a
90 helpful error message instead of an abrupt program abort.
91 */
92   try
93   {
94 /*`
95 Selling five candy bars means getting five successes, so successes r = 5.
96 The total number of trials (n, in this case, houses visited) this takes is therefore
97   = sucesses + failures or k + r = k + 5.
98 */
99     double sales_quota = 5; // Pat's sales quota - successes (r).
100 /*`
101 At each house, there is a 0.4 probability (40%) of selling one candy bar
102 and a 0.6 probability (60%) of selling nothing.
103 */
104     double success_fraction = 0.4; // success_fraction (p) - so failure_fraction is 0.6.
105 /*`
106 The Negative Binomial(r, p) distribution describes the probability of k failures
107 and r successes in k+r Bernoulli(p) trials with success on the last trial.
108 (A [@http://en.wikipedia.org/wiki/Bernoulli_distribution Bernoulli trial]
109 is one with only two possible outcomes, success of failure,
110 and p is the probability of success).
111 
112 We therefore start by constructing a negative binomial distribution
113 with parameters sales_quota (required successes) and probability of success.
114 */
115     negative_binomial nb(sales_quota, success_fraction); // type double by default.
116 /*`
117 To confirm, display the success_fraction & successes parameters of the distribution.
118 */
119     cout << "Pat has a sales per house success rate of " << success_fraction
120       << ".\nTherefore he would, on average, sell " << nb.success_fraction() * 100
121       << " bars after trying 100 houses." << endl;
122 
123     int all_houses = 30; // The number of houses on the estate.
124 
125     cout << "With a success rate of " << nb.success_fraction()
126       << ", he might expect, on average,\n"
127         "to need to visit about " << success_fraction * all_houses
128         << " houses in order to sell all " << nb.successes() << " bars. " << endl;
129 /*`
130 [pre
131 Pat has a sales per house success rate of 0.4.
132 Therefore he would, on average, sell 40 bars after trying 100 houses.
133 With a success rate of 0.4, he might expect, on average,
134 to need to visit about 12 houses in order to sell all 5 bars.
135 ]
136 
137 The random variable of interest is the number of houses
138 that must be visited to sell five candy bars,
139 so we substitute k = n - 5 into a negative_binomial(5, 0.4)
140 and obtain the __pdf of the distribution of houses visited.
141 Obviously, the best possible case is that Pat makes sales on all the first five houses.
142 
143 We calculate this using the pdf function:
144 */
145     cout << "Probability that Pat finishes on the " << sales_quota << "th house is "
146       << pdf(nb, 5 - sales_quota) << endl; // == pdf(nb, 0)
147 /*`
148 Of course, he could not finish on fewer than 5 houses because he must sell 5 candy bars.
149 So the 5th house is the first that he could possibly finish on.
150 
151 To finish on or before the 8th house, Pat must finish at the 5th, 6th, 7th or 8th house.
152 The probability that he will finish on *exactly* ( == ) on any house
153 is the Probability Density Function (pdf).
154 */
155     cout << "Probability that Pat finishes on the 6th house is "
156       << pdf(nb, 6 - sales_quota) << endl;
157     cout << "Probability that Pat finishes on the 7th house is "
158       << pdf(nb, 7 - sales_quota) << endl;
159     cout << "Probability that Pat finishes on the 8th house is "
160       << pdf(nb, 8 - sales_quota) << endl;
161 /*`
162 [pre
163 Probability that Pat finishes on the 6th house is 0.03072
164 Probability that Pat finishes on the 7th house is 0.055296
165 Probability that Pat finishes on the 8th house is 0.077414
166 ]
167 
168 The sum of the probabilities for these houses is the Cumulative Distribution Function (cdf).
169 We can calculate it by adding the individual probabilities.
170 */
171     cout << "Probability that Pat finishes on or before the 8th house is sum "
172       "\n" << "pdf(sales_quota) + pdf(6) + pdf(7) + pdf(8) = "
173       // Sum each of the mass/density probabilities for houses sales_quota = 5, 6, 7, & 8.
174       << pdf(nb, 5 - sales_quota) // 0 failures.
175         + pdf(nb, 6 - sales_quota) // 1 failure.
176         + pdf(nb, 7 - sales_quota) // 2 failures.
177         + pdf(nb, 8 - sales_quota) // 3 failures.
178       << endl;
179 /*`[pre
180 pdf(sales_quota) + pdf(6) + pdf(7) + pdf(8) = 0.17367
181 ]
182 
183 Or, usually better, by using the negative binomial *cumulative* distribution function.
184 */
185     cout << "\nProbability of selling his quota of " << sales_quota
186       << " bars\non or before the " << 8 << "th house is "
187       << cdf(nb, 8 - sales_quota) << endl;
188 /*`[pre
189 Probability of selling his quota of 5 bars on or before the 8th house is 0.17367
190 ]*/
191     cout << "\nProbability that Pat finishes exactly on the 10th house is "
192       << pdf(nb, 10 - sales_quota) << endl;
193     cout << "\nProbability of selling his quota of " << sales_quota
194       << " bars\non or before the " << 10 << "th house is "
195       << cdf(nb, 10 - sales_quota) << endl;
196 /*`
197 [pre
198 Probability that Pat finishes exactly on the 10th house is 0.10033
199 Probability of selling his quota of 5 bars on or before the 10th house is 0.3669
200 ]*/
201     cout << "Probability that Pat finishes exactly on the 11th house is "
202       << pdf(nb, 11 - sales_quota) << endl;
203     cout << "\nProbability of selling his quota of " << sales_quota
204       << " bars\non or before the " << 11 << "th house is "
205       << cdf(nb, 11 - sales_quota) << endl;
206 /*`[pre
207 Probability that Pat finishes on the 11th house is 0.10033
208 Probability of selling his quota of 5 candy bars
209 on or before the 11th house is 0.46723
210 ]*/
211     cout << "Probability that Pat finishes exactly on the 12th house is "
212       << pdf(nb, 12 - sales_quota) << endl;
213 
214     cout << "\nProbability of selling his quota of " << sales_quota
215       << " bars\non or before the " << 12 << "th house is "
216       << cdf(nb, 12 - sales_quota) << endl;
217 /*`[pre
218 Probability that Pat finishes on the 12th house is 0.094596
219 Probability of selling his quota of 5 candy bars
220 on or before the 12th house is 0.56182
221 ]
222 Finally consider the risk of Pat not selling his quota of 5 bars
223 even after visiting all the houses.
224 Calculate the probability that he /will/ sell on
225 or before the last house:
226 Calculate the probability that he would sell all his quota on the very last house.
227 */
228     cout << "Probability that Pat finishes on the " << all_houses
229       << " house is " << pdf(nb, all_houses - sales_quota) << endl;
230 /*`
231 Probability of selling his quota of 5 bars on the 30th house is
232 [pre
233 Probability that Pat finishes on the 30 house is 0.00069145
234 ]
235 when he'd be very unlucky indeed!
236 
237 What is the probability that Pat exhausts all 30 houses in the neighborhood,
238 and *still* doesn't sell the required 5 candy bars?
239 */
240     cout << "\nProbability of selling his quota of " << sales_quota
241       << " bars\non or before the " << all_houses << "th house is "
242       << cdf(nb, all_houses - sales_quota) << endl;
243 /*`
244 [pre
245 Probability of selling his quota of 5 bars
246 on or before the 30th house is 0.99849
247 ]
248 
249 So the risk of failing even after visiting all the houses is 1 - this probability,
250   ``1 - cdf(nb, all_houses - sales_quota``
251 But using this expression may cause serious inaccuracy,
252 so it would be much better to use the complement of the cdf:
253 So the risk of failing even at, or after, the 31th (non-existent) houses is 1 - this probability,
254   ``1 - cdf(nb, all_houses - sales_quota)``
255 But using this expression may cause serious inaccuracy.
256 So it would be much better to use the __complement of the cdf (see __why_complements).
257 */
258     cout << "\nProbability of failing to sell his quota of " << sales_quota
259       << " bars\neven after visiting all " << all_houses << " houses is "
260       << cdf(complement(nb, all_houses - sales_quota)) << endl;
261 /*`
262 [pre
263 Probability of failing to sell his quota of 5 bars
264 even after visiting all 30 houses is 0.0015101
265 ]
266 We can also use the quantile (percentile), the inverse of the cdf, to
267 predict which house Pat will finish on.  So for the 8th house:
268 */
269  double p = cdf(nb, (8 - sales_quota));
270  cout << "Probability of meeting sales quota on or before 8th house is "<< p << endl;
271 /*`
272 [pre
273 Probability of meeting sales quota on or before 8th house is 0.174
274 ]
275 */
276     cout << "If the confidence of meeting sales quota is " << p
277         << ", then the finishing house is " << quantile(nb, p) + sales_quota << endl;
278 
279     cout<< " quantile(nb, p) = " << quantile(nb, p) << endl;
280 /*`
281 [pre
282 If the confidence of meeting sales quota is 0.17367, then the finishing house is 8
283 ]
284 Demanding absolute certainty that all 5 will be sold,
285 implies an infinite number of trials.
286 (Of course, there are only 30 houses on the estate,
287 so he can't ever be *certain* of selling his quota).
288 */
289     cout << "If the confidence of meeting sales quota is " << 1.
290         << ", then the finishing house is " << quantile(nb, 1) + sales_quota << endl;
291     //  1.#INF == infinity.
292 /*`[pre
293 If the confidence of meeting sales quota is 1, then the finishing house is 1.#INF
294 ]
295 And similarly for a few other probabilities:
296 */
297     cout << "If the confidence of meeting sales quota is " << 0.
298         << ", then the finishing house is " << quantile(nb, 0.) + sales_quota << endl;
299 
300     cout << "If the confidence of meeting sales quota is " << 0.5
301         << ", then the finishing house is " << quantile(nb, 0.5) + sales_quota << endl;
302 
303     cout << "If the confidence of meeting sales quota is " << 1 - 0.00151 // 30 th
304         << ", then the finishing house is " << quantile(nb, 1 - 0.00151) + sales_quota << endl;
305 /*`
306 [pre
307 If the confidence of meeting sales quota is 0, then the finishing house is 5
308 If the confidence of meeting sales quota is 0.5, then the finishing house is 11.337
309 If the confidence of meeting sales quota is 0.99849, then the finishing house is 30
310 ]
311 
312 Notice that because we chose a discrete quantile policy of real,
313 the result can be an 'unreal' fractional house.
314 
315 If the opposite is true, we don't want to assume any confidence, then this is tantamount
316 to assuming that all the first sales_quota trials will be successful sales.
317 */
318     cout << "If confidence of meeting quota is zero\n(we assume all houses are successful sales)"
319       ", then finishing house is " << sales_quota << endl;
320 /*`
321 [pre
322 If confidence of meeting quota is zero (we assume all houses are successful sales), then finishing house is 5
323 If confidence of meeting quota is 0, then finishing house is 5
324 ]
325 We can list quantiles for a few probabilities:
326 */
327 
328     double ps[] = {0., 0.001, 0.01, 0.05, 0.1, 0.5, 0.9, 0.95, 0.99, 0.999, 1.};
329     // Confidence as fraction = 1-alpha, as percent =  100 * (1-alpha[i]) %
330     cout.precision(3);
331     for (unsigned i = 0; i < sizeof(ps)/sizeof(ps[0]); i++)
332     {
333       cout << "If confidence of meeting quota is " << ps[i]
334         << ", then finishing house is " << quantile(nb, ps[i]) + sales_quota
335         << endl;
336    }
337 
338 /*`
339 [pre
340 If confidence of meeting quota is 0, then finishing house is 5
341 If confidence of meeting quota is 0.001, then finishing house is 5
342 If confidence of meeting quota is 0.01, then finishing house is 5
343 If confidence of meeting quota is 0.05, then finishing house is 6.2
344 If confidence of meeting quota is 0.1, then finishing house is 7.06
345 If confidence of meeting quota is 0.5, then finishing house is 11.3
346 If confidence of meeting quota is 0.9, then finishing house is 17.8
347 If confidence of meeting quota is 0.95, then finishing house is 20.1
348 If confidence of meeting quota is 0.99, then finishing house is 24.8
349 If confidence of meeting quota is 0.999, then finishing house is 31.1
350 If confidence of meeting quota is 1, then finishing house is 1.#INF
351 ]
352 
353 We could have applied a ceil function to obtain a 'worst case' integer value for house.
354 ``ceil(quantile(nb, ps[i]))``
355 
356 Or, if we had used the default discrete quantile policy, integer_outside, by omitting
357 ``#define BOOST_MATH_DISCRETE_QUANTILE_POLICY real``
358 we would have achieved the same effect.
359 
360 The real result gives some suggestion which house is most likely.
361 For example, compare the real and integer_outside for 95% confidence.
362 
363 [pre
364 If confidence of meeting quota is 0.95, then finishing house is 20.1
365 If confidence of meeting quota is 0.95, then finishing house is 21
366 ]
367 The real value 20.1 is much closer to 20 than 21, so integer_outside is pessimistic.
368 We could also use integer_round_nearest policy to suggest that 20 is more likely.
369 
370 Finally, we can tabulate the probability for the last sale being exactly on each house.
371 */
372    cout << "\nHouse for " << sales_quota << "th (last) sale.  Probability (%)" << endl;
373    cout.precision(5);
374    for (int i = (int)sales_quota; i < all_houses+1; i++)
375    {
376      cout << left << setw(3) << i << "                             " << setw(8) << cdf(nb, i - sales_quota)  << endl;
377    }
378    cout << endl;
379 /*`
380 [pre
381 House for 5 th (last) sale.  Probability (%)
382 5                               0.01024
383 6                               0.04096
384 7                               0.096256
385 8                               0.17367
386 9                               0.26657
387 10                              0.3669
388 11                              0.46723
389 12                              0.56182
390 13                              0.64696
391 14                              0.72074
392 15                              0.78272
393 16                              0.83343
394 17                              0.874
395 18                              0.90583
396 19                              0.93039
397 20                              0.94905
398 21                              0.96304
399 22                              0.97342
400 23                              0.98103
401 24                              0.98655
402 25                              0.99053
403 26                              0.99337
404 27                              0.99539
405 28                              0.99681
406 29                              0.9978
407 30                              0.99849
408 ]
409 
410 As noted above, using a catch block is always a good idea, even if you do not expect to use it.
411 */
412   }
413   catch(const std::exception& e)
414   { // Since we have set an overflow policy of ignore_error,
415     // an overflow exception should never be thrown.
416      std::cout << "\nMessage from thrown exception was:\n " << e.what() << std::endl;
417 /*`
418 For example, without a ignore domain error policy, if we asked for ``pdf(nb, -1)`` for example, we would get:
419 [pre
420 Message from thrown exception was:
421  Error in function boost::math::pdf(const negative_binomial_distribution<double>&, double):
422  Number of failures argument is -1, but must be >= 0 !
423 ]
424 */
425 //] [/ negative_binomial_eg1_2]
426   }
427    return 0;
428 }  // int main()
429 
430 
431 /*
432 
433 Output is:
434 
435 Selling candy bars - using the negative binomial distribution.
436 by Dr. Diane Evans,
437 Professor of Mathematics at Rose-Hulman Institute of Technology,
438 see http://en.wikipedia.org/wiki/Negative_binomial_distribution
439 Pat has a sales per house success rate of 0.4.
440 Therefore he would, on average, sell 40 bars after trying 100 houses.
441 With a success rate of 0.4, he might expect, on average,
442 to need to visit about 12 houses in order to sell all 5 bars.
443 Probability that Pat finishes on the 5th house is 0.01024
444 Probability that Pat finishes on the 6th house is 0.03072
445 Probability that Pat finishes on the 7th house is 0.055296
446 Probability that Pat finishes on the 8th house is 0.077414
447 Probability that Pat finishes on or before the 8th house is sum
448 pdf(sales_quota) + pdf(6) + pdf(7) + pdf(8) = 0.17367
449 Probability of selling his quota of 5 bars
450 on or before the 8th house is 0.17367
451 Probability that Pat finishes exactly on the 10th house is 0.10033
452 Probability of selling his quota of 5 bars
453 on or before the 10th house is 0.3669
454 Probability that Pat finishes exactly on the 11th house is 0.10033
455 Probability of selling his quota of 5 bars
456 on or before the 11th house is 0.46723
457 Probability that Pat finishes exactly on the 12th house is 0.094596
458 Probability of selling his quota of 5 bars
459 on or before the 12th house is 0.56182
460 Probability that Pat finishes on the 30 house is 0.00069145
461 Probability of selling his quota of 5 bars
462 on or before the 30th house is 0.99849
463 Probability of failing to sell his quota of 5 bars
464 even after visiting all 30 houses is 0.0015101
465 Probability of meeting sales quota on or before 8th house is 0.17367
466 If the confidence of meeting sales quota is 0.17367, then the finishing house is 8
467  quantile(nb, p) = 3
468 If the confidence of meeting sales quota is 1, then the finishing house is 1.#INF
469 If the confidence of meeting sales quota is 0, then the finishing house is 5
470 If the confidence of meeting sales quota is 0.5, then the finishing house is 11.337
471 If the confidence of meeting sales quota is 0.99849, then the finishing house is 30
472 If confidence of meeting quota is zero
473 (we assume all houses are successful sales), then finishing house is 5
474 If confidence of meeting quota is 0, then finishing house is 5
475 If confidence of meeting quota is 0.001, then finishing house is 5
476 If confidence of meeting quota is 0.01, then finishing house is 5
477 If confidence of meeting quota is 0.05, then finishing house is 6.2
478 If confidence of meeting quota is 0.1, then finishing house is 7.06
479 If confidence of meeting quota is 0.5, then finishing house is 11.3
480 If confidence of meeting quota is 0.9, then finishing house is 17.8
481 If confidence of meeting quota is 0.95, then finishing house is 20.1
482 If confidence of meeting quota is 0.99, then finishing house is 24.8
483 If confidence of meeting quota is 0.999, then finishing house is 31.1
484 If confidence of meeting quota is 1, then finishing house is 1.#J
485 House for 5th (last) sale.  Probability (%)
486 5                               0.01024
487 6                               0.04096
488 7                               0.096256
489 8                               0.17367
490 9                               0.26657
491 10                              0.3669
492 11                              0.46723
493 12                              0.56182
494 13                              0.64696
495 14                              0.72074
496 15                              0.78272
497 16                              0.83343
498 17                              0.874
499 18                              0.90583
500 19                              0.93039
501 20                              0.94905
502 21                              0.96304
503 22                              0.97342
504 23                              0.98103
505 24                              0.98655
506 25                              0.99053
507 26                              0.99337
508 27                              0.99539
509 28                              0.99681
510 29                              0.9978
511 30                              0.99849
512 
513 */
514