1 /*
2  * $Id: random.i,v 1.1 2005-09-18 22:06:05 dhmunro Exp $
3  * Random numbers with various distributions.
4  */
5 /* Copyright (c) 2005, The Regents of the University of California.
6  * All rights reserved.
7  * This file is part of yorick (http://yorick.sourceforge.net).
8  * Read the accompanying LICENSE file for details.
9  */
10 
11 /* Contents:
12 
13    random_x   - avoids the 2.e9 bins of the random function at a
14                 cost of calling random twice, can be used as a
15                 drop-in replacement for random
16    random_u   - convenience routine to give uniform deviate on an
17                 interval other than (0,1)
18    random_n   - return gaussian deviate
19    random_ipq - arbitrary piecewise linear deviate, with optional
20                 power law or exponential tails
21    random_rej - generic implementation of rejection method, can be
22                 used either in conjunction with a piecewise linear
23                 bounding function, or an arbitrary bounding function
24                 (in the latter case, the inverse of the integral of
25                 the bounding function must be supplied as well)
26    poisson    - return poisson deviate
27 
28    In all cases, these routines accept a dimlist or arguments to
29    determine the dimensions of the returned random deviates.
30    Furthermore, in all cases you will get back the same sequence
31    of deviates no matter what the dimensionality of the calls --
32    for example, if at some point the call to random_n(5) returns
33    [.3,-.1,-.8,1.1,-.9], then if instead random_n(3) followed by
34    random_n(2) had been called, the return values would have been
35    [.3,-.1,-.8] and [1.1,-.9].
36  */
37 
38 /* ------------------------------------------------------------------------ */
39 
40 func build_dimlist(&dimlist, arg)
41 /* DOCUMENT build_dimlist, dimlist, next_argument
42      build a DIMLIST, as used in the array function.  Use like this:
43 
44      func your_function(arg1, arg2, etc, dimlist, ..)
45      {
46        while (more_args()) build_dimlist, dimlist, next_arg();
47        ...
48      }
49 
50      After this, DIMLIST will be an array of the form
51      [#dims, dim1, dim2, ...], compounded from the multiple arguments
52      in the same way as the array function.  If no DIMLIST arguments
53      given, DIMLIST will be [] instead of [0], which will act the
54      same in most situations.  If that possibility is unacceptible,
55      you may add
56        if (is_void(dimlist)) dimlist= [0];
57      after the while loop.
58  */
59 {
60   if (is_void(dimlist)) dimlist= [0];
61   else if (!dimsof(dimlist)(1)) dimlist= [1,dimlist];
62   if (is_void(arg)) return;
63   if (!dimsof(arg)(1)) {
64     grow, dimlist, arg;
65     dimlist(1)+= 1;
66   } else {
67     n= arg(1);
68     grow, dimlist, arg(2:1+n);
69     dimlist(1)+= n;
70   }
71 }
72 
73 /* ------------------------------------------------------------------------ */
74 
random_x(dimlist,..)75 func random_x(dimlist, ..)
76 /* DOCUMENT random_x(dimlist)
77 
78      same as random(DIMLIST), except that random_x calls random
79      twice at each point, to avoid the defect that random only
80      can produce about 2.e9 numbers on the interval (0.,1.) (see
81      random for an explanation of these bins).
82 
83      You may set random=random_x to get these "better" random
84      numbers in every call to random.
85 
86      Unlike random, there is a chance in 1.e15 or so that random_x
87      may return exactly 1.0 or 0.0 (the latter may not be possible
88      with IEEE standard arithmetic, while the former apparently is).
89      Since cosmic rays are far more likely, you may as well not
90      worry about this.  Also, because of rounding errors, some bit
91      patterns may still be more likely than others, but the 0.5e-9
92      wide bins of random will be absent.
93 
94    SEE ALSO: random
95  */
96 {
97   while (more_args()) build_dimlist, dimlist, next_arg();
98   if (is_void(dimlist)) {
99     dimlist= [1,2];
100   } else if (!dimsof(dimlist)(1)) {
101     dimlist= [2,2,dimlist];
102   } else {
103     dimlist= grow([dimlist(1)+1],dimlist);
104     dimlist(2)= 2;
105   }
106   r= random_0(dimlist);
107   return r(1,..) + (r(2,..)-0.5)/2147483562.
108 }
109 
110 if (is_void(random_0)) random_0= random;
111 
112 /* ------------------------------------------------------------------------ */
113 
114 func random_u(a, b, dimlist, ..)
115 /* DOCUMENT random_u(a, b, dimlist)
116 
117      return uniformly distributed random numbers between A and B.
118      (Will never exactly equal A or B.)  The DIMLIST is as for the
119      array function.  Same as (b-a)*random(dimlist)+a.  If A==0,
120      you are better off just writing B*random(dimlist).
121 
122    SEE ALSO: random, random_x, random_n, random_ipq, random_rej
123 */
124 {
125   while (more_args()) build_dimlist, dimlist, next_arg();
126   return (b-a)*random(dimlist)+a;
127 }
128 
129 /* ------------------------------------------------------------------------ */
130 
131 func random_n(dimlist, ..)
132 /* DOCUMENT random_n(dimlist)
133 
134      returns an array of normally distributed random double values with
135      the given DIMLIST (see array function, nil for a scalar result).
136      The mean is 0.0 and the standard deviation is 1.0.
137      The algorithm follows the Box-Muller method (see Numerical Recipes
138      by Press et al.).
139 
140    SEE ALSO: random, random_x, random_u, random_ipq, random_rej, poisson
141 */
142 {
143   while (more_args()) build_dimlist, dimlist, next_arg();
144   a= array(0.0, dimlist);
145   scalar= !dimsof(a)(1);
146   if (scalar) a= [a];  /* work around long-standing Yorick bug */
147 
148   /* crucial feature of this algorithm is that the same sequence
149    * of random numbers is returned independent of the number requested
150    * each time, just like random itself */
151   na= numberof(a);
152   np= numberof(_random_n_prev);
153   n= (na-np+1)/2;
154   nr= 2*n;
155   if (n) {
156     x= random(2,n);
157     r= sqrt(-2.0*log(x(1,)));
158     theta= 2.0*pi*x(2,);
159     x(1,)= r*cos(theta);
160     x(2,)= r*sin(theta);
161     a(np+1:na)= x(1:na-np);
162   }
163   if (np) {
164     a(1)= _random_n_prev;
165     _random_n_prev= [];
166   }
167   if (na-np<nr) {
168     _random_n_prev= x(nr);
169   }
170 
171   if (scalar) a= a(1);  /* work around long-standing Yorick bug */
172   return a;
173 }
174 
175 /* storage for odd random_n values */
176 _random_n_prev= [];
177 
178 /* ------------------------------------------------------------------------ */
179 
180 func random_ipq(model, dimlist, ..)
181 /* DOCUMENT random_ipq(ipq_model, dimlist)
182 
183      returns an array of double values with the given DIMLIST (see array
184      function, nil for a scalar result).  The numbers are distributed
185      according to a piecewise linear function (possibly with power law
186      or exponential tails) specified by the IPQ_MODEL.  The "IPQ" stands
187      for "inverse piecewise quadratic", which the type of function
188      required to transform a uniform random deviate into the piecewise
189      linear distribution.  Use the ipq_setup function to compute
190      IPQ_MODEL.
191 
192    SEE ALSO: random, random_x, random_u, random_n, random_rej, ipq_setup
193  */
194 {
195   while (more_args()) build_dimlist, dimlist, next_arg();
196   ymax= (*model(4))(1);
197   return ipq_compute(model, ymax*random(dimlist));
198 }
199 
200 func random_rej(target, bound, dimlist, ..)
201 /* DOCUMENT random_rej(target_dist, ipq_model, dimlist)
202          or random_rej(target_dist, bounding_dist, bounding_rand, dimlist)
203 
204      returns an array of double values with the given DIMLIST (see array
205      function, nil for a scalar result).  The numbers are distributed
206      according to the TARGET_DIST function:
207         func target_dist(x)
208      returning u(x)>=0 of same number and dimensionality as x, normalized
209      so that the integral of target_dist(x) from -infinity to +infinity
210      is 1.0.  The BOUNDING_DIST function must have the same calling
211      sequence as TARGET_DIST:
212         func bounding_dist(x)
213      returning b(x)>=u(x) everywhere.  Since u(x) is normalized, the
214      integral of b(x) must be >=1.0.  Finally, BOUNDING_RAND is a
215      function which converts an array of uniformly distributed random
216      numbers on (0,1) -- as returned by random -- into an array
217      distributed according to BOUNDING_DIST:
218         func bounding_rand(uniform_x_01)
219      Mathematically, BOUNDING_RAND is the inverse of the integral of
220      BOUNDING_DIST from -infinity to x, with its input scaled to (0,1).
221 
222      If BOUNDING_DIST is not a function, then it must be an IPQ_MODEL
223      returned by the ipq_setup function.  In this case BOUNDING_RAND is
224      omitted -- ipq_compute will be used automatically.
225 
226    SEE ALSO: random, random_x, random_u, random_n, random_ipq, ipq_setup
227  */
228 {
229   if (is_func(bound)) {
230     brand= dimlist;
231     dimlist= more_args()? next_arg() : [];
232   }
233   while (more_args()) build_dimlist, dimlist, next_arg();
234   if (!is_func(target) || (!is_void(brand) && !is_func(brand)) ||
235       (!is_func(bound) && structof(bound)!=pointer))
236     error, "improper calling sequence, try help,random_rej";
237   if (!is_func(bound)) ymax= (*bound(4))(1);
238 
239   /* build result to requested shape */
240   x= array(0.0, dimlist);
241   nreq= nx= numberof(x);
242   ix= 1;
243 
244   do {
245     /* get 25% more pairs of random numbers than nreq in order
246      * to allow for some to be rejected -- should actually go for
247      * integral(bounding_dist) times nreq, but don't know what
248      * that is -- could refine the estimate as each pass gets a
249      * better notion of the fraction rejected, but don't bother */
250     r= random(2, max(nreq+nreq/4,10));
251 
252     /* first get xx distributed according to bounding_rand,
253      * then accept according to the second random number
254      * continue until at least one is accepted */
255     for (xx=[] ; !numberof(xx) ; xx=xx(list)) {
256       if (is_func(bound)) {
257         xx= brand(r(1,..));
258         list= where(bound(xx)*r(2,..) <= target(xx));
259       } else {
260         xx= ipq_compute(bound, ymax*r(1,..));
261         list= where(ipq_function(bound,xx)*r(2,..) <= target(xx));
262       }
263     }
264     nxx= numberof(xx);
265     if (nxx>nreq) {
266       xx= xx(1:nreq);
267       nxx= nreq;
268     }
269 
270     nreq-= nxx;
271     x(ix:nx-nreq)= xx;
272     ix+= nxx;
273   } while (nreq);
274 
275   return x;
276 }
277 
278 func ipq_setup(x,u,power=,slope=)
279 /* DOCUMENT model= ipq_setup(x, u)
280          or model= ipq_setup(x, u, power=[pleft,prght])
281          or model= ipq_setup(x, u, power=[pleft,prght], slope=[sleft,srght])
282 
283      compute a model for the ipq_compute function, which computes the
284      inverse of a piecewise quadratic function.  This function occurs
285      when computing random numbers distributed according to a piecewise
286      linear function.  The piecewise linear function is u(x), determined
287      by the discrete points X and U input to ipq_setup.  None of the
288      values of U may be negative, and X must be strictly increasing,
289      X(i)<X(i+1).
290 
291      If U(1) or U(0) is not zero, you may want to model the "tail" of
292      the distribution, which is not well modeled by a piecewise linear
293      function.  You can specify a power law tail using the power=
294      keyword; the left tail has initial slope (U(2)-U(1))/(X(2)-X(1)),
295      and decays to the left as 1/X^PLEFT.  Similarly for the right tail.
296      If U(1)==0, PLEFT is ignored, as is PRGHT when U(0)==0.  Use the
297      slope= keyword to specify an alternative value for the slope.
298      Note that PLEFT and PRGHT must each be greater than 1.0, and that
299      SLEFT>0 while SRGHT<0.  If either power is greater than or equal to
300      100, an exponential tail will be used.  As a convenience, you may
301      also specify PLEFT or PRGHT of 0 to get an exponential tail.
302 
303      Note: ipq_function(model, xp) returns the function values u(xp) at
304      the points xp, including the tails (if any).  ipq_compute(model, yp)
305      returns the xp for which (integral from -infinity to xp) of u(x)
306      equals yp; i.e.- the inverse of the piecewise quadratic.
307 
308    SEE ALSO: random_ipq, random_rej
309  */
310 {
311   x= double(x);
312   u= double(u);
313   if (dimsof(x)(1)!=1 || numberof(x)<2 || dimsof(u)(1)!=1 ||
314       numberof(u)!=numberof(x) || anyof(u<0.)) error, "bad U or X arrays";
315 
316   /* compute the integral of u(x), starting from x(1),
317    * both at the given points x and at the midpoints of the intervals
318    * integ(u,x,xx) is the basic piecewise quadratic function */
319   bins= (u(zcen)*x(dif))(cum);
320   cens= x(pcen);  /* right shape, wrong values */
321   yc= integ(u,x, x(zcen));
322   dy= bins(dif);
323   /* note that these cens are constrained to lie between -1 and +1 */
324   cens(2:-1)= 4.*(yc-bins(1:-1))/(dy+!dy) - 2.;
325 
326   ymax= bins(0);
327 
328   if (!is_void(power)) {
329     if (dimsof(power)(1)!=1 || numberof(power)!=2 ||
330         anyof(power<=1.&power!=0.)) error, "illegal power= keyword";
331     if (!power(1) || !u(1)) power(1)= 100.;
332     if (!power(0) || !u(0)) power(0)= 100.;
333 
334     if (is_void(slope))
335       slope= [(u(2)-u(1))/(x(2)-x(1)), (u(0)-u(-1))/(x(0)-x(-1))];
336     if (dimsof(slope)(1)!=1 || numberof(slope)!=2 ||
337         slope(1)<0. || slope(0)>0.)
338       error, "illegal slope= keyword, or upward slope at endpoint";
339 
340     cens(1)= u(1)? slope(1)/u(1) : 1000./(x(2)-x(1));
341     cens(0)= u(0)? -slope(0)/u(1) : 1000./(x(0)-x(-1));
342     yi= u(1)/cens(1);
343     if (power(1)<100.) yi*= power(1)/(power(1)-1.);
344     ymax+= yi;
345     bins+= yi;
346     yi= u(0)/cens(0);
347     if (power(0)<100.) yi*= power(0)/(power(0)-1.);
348     ymax+= yi;
349 
350   } else {
351     power= [100.,100.];
352     cens(1)= 1000./(x(2)-x(1));
353     cens(0)= 1000./(x(0)-x(-1));
354   }
355 
356   parm= [ymax, power(1), power(0)];
357 
358   return [&bins, &x, &cens, &parm, &u];
359 }
360 
361 /* ------------------------------------------------------------------------ */
362 
363 func ipq_compute(model, y)
364 {
365   /*
366    * model= [&bins, &vals, &cens, &parm] where:
367    * bins   values of y, a piecewise quadratic function of x
368    * vals   values of x that go with bins
369    * cens   4*(yc-y0)/(y1-y0) - 2 where yc is value of y at
370    *             x=vals(pcen), except for first and last points
371    *             which are du/dx / u0 for the extrapolation model
372    * parm   [ymax, left_power, right_power]
373    *             maximum possible value of y, and
374    *             [left,right] powers (>1.0) of x for extrap. model
375    */
376   local bins, vals, cens, parm;
377   eq_nocopy, bins, *model(1);
378   eq_nocopy, vals, *model(2);
379   eq_nocopy, cens, *model(3);
380   eq_nocopy, parm, *model(4);
381 
382   i= digitize(y, bins);
383 
384   mask0= (i>1);
385   list= where(mask0);
386   if (numberof(list)) {
387     yy= y(list);
388     ii= i(list);
389     mask= (ii<=numberof(bins));
390     list= where(mask);
391     if (numberof(list)) {
392       /* handle piecewise quadratic part */
393       j= ii(list);
394       yb= bins(j);
395       xb= vals(j);
396       aa= cens(j);   /* 4*(yc-y0)/(y1-y0) - 2 */
397       j-= 1;
398       ya= bins(j);
399       xa= vals(j);
400       bb= 0.5*(1.+aa);
401       yq= (yy(list)-ya)/(yb-ya);
402       xq= bb+sqrt(bb*bb-aa*yq);
403       xq= xa + (xb-xa)*( yq/(xq+!xq) );
404     }
405 
406     list= where(!mask);
407     if (numberof(list)) {
408       /* handle right tail */
409       ymax= parm(1);
410       if (ymax > bins(0)) {
411         yy= (ymax - yy(list))/(ymax - bins(0));
412         xa= vals(0);
413         aa= cens(0);    /* du/dx / u0 */
414         pp= parm(2);    /* power */
415         yy0= yy<=0.0;
416         yy= max(yy,0.0)+yy0;
417         if (pp>=100.) xt= -log(yy)/aa;
418         else xt= (pp/aa) * (yy^(1./(1.-pp)) - 1.0);
419         xt+= xa + 1.e9*yy0*(xa-vals(1));
420       } else {
421         xt= array(vals(0), numberof(list));
422       }
423     }
424 
425     xq= merge(xq, xt, mask);
426   }
427 
428   list= where(!mask0);
429   if (numberof(list)) {
430     /* handle left tail */
431     if (bins(1)) {
432       yy= y(list)/bins(1);
433       xa= vals(1);
434       aa= cens(1);    /* du/dx / u0 */
435       pp= parm(3);    /* power */
436       yy0= yy<=0.0;
437       yy= max(yy,0.0)+yy0;
438       if (pp>=100.) xt= log(yy)/aa;
439       else xt= (pp/aa) * (1.0 - yy^(1./(1.-pp)));
440       xt+= xa - 1.e9*yy0*(vals(0)-xa);
441     } else {
442       xt= array(vals(1), numberof(list));
443     }
444   } else {
445     xt= [];
446   }
447 
448   return merge(xq, xt, mask0);
449 }
450 
451 func ipq_function(model, x)
452 {
453   /*
454    * model= [&bins, &vals, &cens, &parm, &valu] where:
455    * bins   values of y, a piecewise quadratic function of x
456    * vals   values of x that go with bins
457    * cens   4*(yc-y0)/(y1-y0) - 2 where yc is value of y at
458    *             x=vals(pcen), except for first and last points
459    *             which are du/dx / u0 for the extrapolation model
460    * parm   [ymax, left_power, right_power]
461    *             maximum possible value of y, and
462    *             [left,right] powers (>1.0) of x for extrap. model
463    * valu   values of u that go with x
464    */
465   local vals, cens, parm, valu;
466   eq_nocopy, vals, *model(2);
467   eq_nocopy, cens, *model(3);
468   eq_nocopy, parm, *model(4);
469   eq_nocopy, valu, *model(5);
470 
471   i= digitize(x, vals);
472 
473   mask0= (i>1);
474   list= where(mask0);
475   if (numberof(list)) {
476     xx= x(list);
477     ii= i(list);
478     mask= (ii<=numberof(vals));
479     list= where(mask);
480     if (numberof(list)) {
481       /* handle piecewise linear part */
482       uq= interp(valu, vals, xx(list));
483     }
484 
485     list= where(!mask);
486     if (numberof(list)) {
487       /* handle right tail */
488       if (valu(0)) {
489         xx= xx(list) - vals(0);
490         aa= cens(0);    /* du/dx / u0 */
491         pp= parm(2);    /* power */
492         if (pp>=100.) ut= valu(0)*exp(-aa*xx);
493         else ut= valu(0) / (1. + (aa/pp)*xx)^pp;
494       } else {
495         ut= array(0.0, numberof(list));
496       }
497     }
498 
499     uq= merge(uq, ut, mask);
500   }
501 
502   list= where(!mask0);
503   if (numberof(list)) {
504     /* handle left tail */
505     if (valu(1)) {
506       xx= vals(1) - x(list);
507       aa= cens(1);    /* du/dx / u0 */
508       pp= parm(3);    /* power */
509       if (pp>=100.) ut= valu(1)*exp(-aa*xx);
510       else ut= valu(1) / (1. + (aa/pp)*xx)^pp;
511     } else {
512       ut= array(0.0, numberof(list));
513     }
514   } else {
515     ut= [];
516   }
517 
518   return merge(uq, ut, mask0);
519 }
520 
521 /* ------------------------------------------------------------------------ */
522 
523 _poiprev = 0;
524 
525 func poisson(navg)
526 /* DOCUMENT poisson(navg)
527 
528      returns a Poisson distributed random value with mean NAVG.
529      (This is the integer number of events which actually occur
530       in some time interval, when the probability per unit time of
531       an event is constant, and the average number of events during
532       the interval is NAVG.)
533      The return value has the same dimensions as the input NAVG.
534      The return value is an integer, but its type is double.
535      The algorithm is taken from Numerical Recipes by Press, et. al.
536 
537    SEE ALSO: random, random_n
538  */
539 {
540   if (!_poiprev) require, "gamma.i";
541   navg = double(navg);
542   is_scalar = !dimsof(navg)(1);
543   if (is_scalar) navg = [navg];
544 
545   mask = navg < 12;
546 
547   list = where(mask);
548   if (numberof(list)) {
549     n = exp(-navg(list));
550     rlo = 0.*n;
551     master = indgen(numberof(n));
552     t = random(numberof(n));
553     for (;;) {
554       list = where(t > n);
555       if (!numberof(list)) break;
556       t = t(list) * random(numberof(list));
557       n = n(list);
558       master = master(list);
559       rlo(master) += 1.;
560     }
561   }
562 
563   list = where(!mask);
564   if (numberof(list)) {
565     r = navg = double(navg(list));
566     master = indgen(numberof(r));
567     sq = sqrt(2.*navg);
568     alxm = log(navg);
569     g = navg*alxm - lngamma(navg+1.);
570     n = navg;
571     for (;;) {
572       nn = n;
573       rr = y = array(0., numberof(master));
574       for (sub=indgen(numberof(rr)) ;; sub=sub(list)) {
575         y(sub) = yy = tan(pi*random(numberof(sub)));
576         rr(sub) = rs  =sq(sub)*yy + nn;
577         list = where(rs < 0.);
578         if (!numberof(list)) break;
579         nn = nn(list);
580       }
581       r(master) = rr = floor(rr);
582       t = 0.9*(1.+y*y)*exp(rr*alxm-lngamma(rr+1.)-g);
583       list = where(random(numberof(t)) > t);
584       if (!numberof(list)) break;
585       master = master(list);
586       n = n(list);
587       sq =sq(list);
588       alxm = alxm(list);
589       g = g(list);
590     }
591   }
592 
593   r = merge(rlo, r, mask);
594   if (is_scalar) r = r(1);
595   return r;
596 }
597 
598 /* ------------------------------------------------------------------------ */
599