1import glob 2import os 3import textwrap 4 5import yaml 6 7# ========================================= 8# Write out file for special functions 9# ========================================= 10 11 12def _initiate_specials(): 13 ''' 14 Initiate python file for special functions which are present in 15 the Rmath.h file -- used mainly for characteristic functions 16 ''' 17 18 pre_code = """\ 19 \""" 20 Special functions used mainly to evaluate characteristic 21 functions of various distributions. 22 23 @authors : Daniel Csaba <daniel.csaba@nyu.edu> 24 Spencer Lyon <spencer.lyon@stern.nyu.edu> 25 @date : 2016-07-26 26 \""" 27 28 from . import _rmath_ffi 29 from numba import vectorize, jit 30 from numba.core.typing import cffi_utils as cffi_support 31 32 cffi_support.register_module(_rmath_ffi) 33 34 # --------------- 35 # gamma function 36 # --------------- 37 38 gammafn = _rmath_ffi.lib.gammafn 39 40 @vectorize(nopython=True) 41 def gamma(x): 42 return gammafn(x) 43 44 # --------------------- 45 # log of gamma function 46 # --------------------- 47 48 lgammafn = _rmath_ffi.lib.lgammafn 49 50 @vectorize(nopython=True) 51 def lgamma(x): 52 return lgammafn(x) 53 54 # ---------------- 55 # digamma function 56 # ---------------- 57 58 digammafn = _rmath_ffi.lib.digamma 59 60 @vectorize(nopython=True) 61 def digamma(x): 62 return digammafn(x) 63 64 # ------------- 65 # beta funciton 66 # ------------- 67 68 betafn = _rmath_ffi.lib.beta 69 70 @vectorize(nopython=True) 71 def beta(x, y): 72 return betafn(x, y) 73 74 # ------------------------------------------- 75 # modified Bessel function of the second kind 76 # ------------------------------------------- 77 78 bessel_k_fn = _rmath_ffi.lib.bessel_k 79 80 @vectorize(nopython=True) 81 def bessel_k(nu, x): 82 return bessel_k_fn(x, nu, 1) 83 84 # ---------------------------------- 85 # seed setting for the random number 86 # generator of the Rmath library 87 # ---------------------------------- 88 89 set_seed_rmath = _rmath_ffi.lib.set_seed 90 91 def set_seed(x, y): 92 return set_seed_rmath(x, y) 93 """ 94 with open(os.path.join("rvlib", "specials.py"), "w") as f: 95 f.write(textwrap.dedent(pre_code)) 96 97 98# ========================================= 99# write out preamble for univariate classes 100# ========================================= 101 102 103def _initiate_univariate(): 104 ''' 105 Initiate python file which collects all the 106 classes for different univariate distributions. 107 ''' 108 109 # Define code which appears for all classes 110 pre_code = """\ 111 \""" 112 Univariate distributions. 113 114 @authors : Daniel Csaba <daniel.csaba@nyu.edu> 115 Spencer Lyon <spencer.lyon@stern.nyu.edu> 116 @date : 2016-07-26 117 \""" 118 119 from os.path import join, dirname, abspath 120 from numba import vectorize, jit 121 from numba.experimental import jitclass 122 from numba import int32, float32 123 124 import numpy as np 125 from .specials import gamma, lgamma, digamma, beta, bessel_k, set_seed 126 127 from . import _rmath_ffi 128 from numba.core.typing import cffi_utils as cffi_support 129 130 cffi_support.register_module(_rmath_ffi) 131 132 # shut down divide by zero warnings for now 133 import warnings 134 warnings.filterwarnings("ignore") 135 136 import yaml 137 fn = join(dirname(abspath(__file__)), "metadata.yaml") 138 with open(fn, 'r') as ymlfile: 139 mtdt = yaml.safe_load(ymlfile) 140 141 # -------------------------------------------------- 142 # docstring following Spencer Lyon's distcan package 143 # https://github.com/spencerlyon2/distcan.git 144 # -------------------------------------------------- 145 146 univariate_class_docstr = r\""" 147 Construct a distribution representing {name_doc} random variables. The 148 probability density function of the distribution is given by 149 150 .. math:: 151 152 {pdf_tex} 153 154 Parameters 155 ---------- 156 {param_list} 157 158 Attributes 159 ---------- 160 {param_attributes} 161 location: scalar(float) 162 location of the distribution 163 scale: scalar(float) 164 scale of the distribution 165 shape: scalar(float) 166 shape of the distribution 167 mean : scalar(float) 168 mean of the distribution 169 median: scalar(float) 170 median of the distribution 171 mode : scalar(float) 172 mode of the distribution 173 var : scalar(float) 174 variance of the distribution 175 std : scalar(float) 176 standard deviation of the distribution 177 skewness : scalar(float) 178 skewness of the distribution 179 kurtosis : scalar(float) 180 kurtosis of the distribution 181 isplatykurtic : Boolean 182 boolean indicating if kurtosis > 0 183 isleptokurtic : bool 184 boolean indicating if kurtosis < 0 185 ismesokurtic : bool 186 boolean indicating if kurtosis == 0 187 entropy : scalar(float) 188 entropy value of the distribution 189 \""" 190 191 param_str = "{name_doc} : {kind}\\n {descr}" 192 193 194 def _create_param_list_str(names, descrs, kinds="scalar(float)"): 195 196 names = (names, ) if isinstance(names, str) else names 197 names = (names, ) if isinstance(names, str) else names 198 199 if isinstance(kinds, (list, tuple)): 200 if len(names) != len(kinds): 201 raise ValueError("Must have same number of names and kinds") 202 203 if isinstance(kinds, str): 204 kinds = [kinds for i in range(len(names))] 205 206 if len(descrs) != len(names): 207 raise ValueError("Must have same number of names and descrs") 208 209 210 params = [] 211 for i in range(len(names)): 212 n, k, d = names[i], kinds[i], descrs[i] 213 params.append(param_str.format(name_doc=n, kind=k, descr=d)) 214 215 return str.join("\\n", params) 216 217 218 def _create_class_docstr(name_doc, param_names, param_descrs, 219 param_kinds="scalar(float)", 220 pdf_tex=r"\\text{not given}", **kwargs): 221 param_list = _create_param_list_str(param_names, param_descrs, 222 param_kinds) 223 224 param_attributes = str.join(", ", param_names) + " : See Parameters" 225 226 return univariate_class_docstr.format(**locals()) 227 """ 228 with open(os.path.join("rvlib", "univariate.py"), "w") as f: 229 f.write(textwrap.dedent(pre_code)) 230 231 232# globals are called from the textwrapper 233# with distribution specific content 234 235def _import_rmath(rname, pyname, *pargs): 236 """ 237 Map the `_rmath.ffi.lib.*` function names to match the Julia API. 238 """ 239 240 # extract Rmath.h function names 241 dfun = "d{}".format(rname) 242 pfun = "p{}".format(rname) 243 qfun = "q{}".format(rname) 244 rfun = "r{}".format(rname) 245 246 # construct python function names 247 pdf = "{}_pdf".format(pyname) 248 cdf = "{}_cdf".format(pyname) 249 ccdf = "{}_ccdf".format(pyname) 250 251 logpdf = "{}_logpdf".format(pyname) 252 logcdf = "{}_logcdf".format(pyname) 253 logccdf = "{}_logccdf".format(pyname) 254 255 invcdf = "{}_invcdf".format(pyname) 256 invccdf = "{}_invccdf".format(pyname) 257 invlogcdf = "{}_invlogcdf".format(pyname) 258 invlogccdf = "{}_invlogccdf".format(pyname) 259 260 rand = "{}_rand".format(pyname) 261 262 # make sure all names are available 263 has_rand = True 264 if rname == "nbeta" or rname == "nf" or rname == "nt": 265 has_rand = False 266 267 p_args = ", ".join(pargs) 268 269 code = """\ 270 271 # ============================= NEW DISTRIBUTION ================================= 272 {dfun} = _rmath_ffi.lib.{dfun} 273 {pfun} = _rmath_ffi.lib.{pfun} 274 {qfun} = _rmath_ffi.lib.{qfun} 275 276 @vectorize(nopython=True) 277 def {pdf}({p_args}, x): 278 return {dfun}(x, {p_args}, 0) 279 280 281 @vectorize(nopython=True) 282 def {logpdf}({p_args}, x): 283 return {dfun}(x, {p_args}, 1) 284 285 286 @vectorize(nopython=True) 287 def {cdf}({p_args}, x): 288 return {pfun}(x, {p_args}, 1, 0) 289 290 291 @vectorize(nopython=True) 292 def {ccdf}({p_args}, x): 293 return {pfun}(x, {p_args}, 0, 0) 294 295 296 @vectorize(nopython=True) 297 def {logcdf}({p_args}, x): 298 return {pfun}(x, {p_args}, 1, 1) 299 300 301 @vectorize(nopython=True) 302 def {logccdf}({p_args}, x): 303 return {pfun}(x, {p_args}, 0, 1) 304 305 306 @vectorize(nopython=True) 307 def {invcdf}({p_args}, q): 308 return {qfun}(q, {p_args}, 1, 0) 309 310 311 @vectorize(nopython=True) 312 def {invccdf}({p_args}, q): 313 return {qfun}(q, {p_args}, 0, 0) 314 315 316 @vectorize(nopython=True) 317 def {invlogcdf}({p_args}, lq): 318 return {qfun}(lq, {p_args}, 1, 1) 319 320 321 @vectorize(nopython=True) 322 def {invlogccdf}({p_args}, lq): 323 return {qfun}(lq, {p_args}, 0, 1) 324 325 """.format(**locals()) 326 327 # append code for specific class to main `univariate.py` file 328 with open(os.path.join("rvlib", "univariate.py"), "a") as f: 329 f.write(textwrap.dedent(code)) 330 331 if not has_rand: 332 # end here if we don't have rand. I put it in a `not has_rand` block 333 # to the rand_code can be at the right indentation level below 334 return 335 336 rand_code = """\ 337 {rfun} = _rmath_ffi.lib.{rfun} 338 339 @jit(nopython=True) 340 def {rand}({p_args}): 341 return {rfun}({p_args}) 342 343 """.format(**locals()) 344 with open(os.path.join("rvlib", "univariate.py"), "a") as f: 345 f.write(textwrap.dedent(rand_code)) 346 347 348# function to write out the distribution specific part 349def _write_class_specific(metadata, *pargs): 350 """ 351 Write out the distribution specific part of the class 352 which is not related to the imported Rmath functions. 353 354 Builds on _metadata_DIST and some derived locals. 355 """ 356 357 p_args = ", ".join(pargs) 358 359 p_args_self = ", ".join(["".join(("self.", par)) for par in pargs]) 360 361 data = locals() 362 data.update(metadata) 363 364 class_specific = """\ 365 366 @vectorize(nopython=True) 367 def {pyname}_mgf({p_args}, x): 368 return {mgf} 369 370 @vectorize(nopython=True) 371 def {pyname}_cf({p_args}, x): 372 return {cf} 373 374 # ------------- 375 # {name} 376 # ------------- 377 378 spec = [ 379 {spec} 380 ] 381 382 @jitclass(spec) 383 class {name}(): 384 385 # set docstring 386 __doc__ = _create_class_docstr(**mtdt['{name}']) 387 388 def __init__(self, {p_args}): 389 {p_args_self} = {p_args} 390 391 def __str__(self): 392 return "{string}" %(self.params) 393 394 def __repr__(self): 395 return self.__str__() 396 397 # =================== 398 # Parameter retrieval 399 # =================== 400 401 @property 402 def params(self): 403 \"""Return a tuple of parameters.\""" 404 return ({p_args_self}) 405 406 @property 407 def location(self): 408 \"""Return location parameter if exists.\""" 409 return {loc} 410 411 @property 412 def scale(self): 413 \"""Return scale parameter if exists.\""" 414 return {scale} 415 416 @property 417 def shape(self): 418 \"""Return shape parameter if exists.\""" 419 return {shape} 420 421 # ========== 422 # Statistics 423 # ========== 424 425 @property 426 def mean(self): 427 \"""Return the mean.\""" 428 return {mean} 429 430 @property 431 def median(self): 432 \"""Return the median.\""" 433 return {median} 434 435 @property 436 def mode(self): 437 \"""Return the mode.\""" 438 return {mode} 439 440 @property 441 def var(self): 442 \"""Return the variance.\""" 443 return {var} 444 445 @property 446 def std(self): 447 \"""Return the standard deviation.\""" 448 return {std} 449 450 @property 451 def skewness(self): 452 \"""Return the skewness.\""" 453 return {skewness} 454 455 @property 456 def kurtosis(self): 457 \"""Return the kurtosis.\""" 458 return {kurtosis} 459 460 @property 461 def isplatykurtic(self): 462 \"""Kurtosis being greater than zero.\""" 463 return self.kurtosis > 0 464 465 @property 466 def isleptokurtic(self): 467 \"""Kurtosis being smaller than zero.\""" 468 return self.kurtosis < 0 469 470 @property 471 def ismesokurtic(self): 472 \"""Kurtosis being equal to zero.\""" 473 return self.kurtosis == 0.0 474 475 @property 476 def entropy(self): 477 \"""Return the entropy.\""" 478 return {entropy} 479 480 def mgf(self, x): 481 \"""Evaluate the moment generating function at x.\""" 482 return {pyname}_mgf({p_args_self}, x) 483 484 def cf(self, x): 485 \"""Evaluate the characteristic function at x.\""" 486 return {pyname}_cf({p_args_self}, x) 487 488 # ========== 489 # Evaluation 490 # ========== 491 492 def insupport(self, x): 493 \"""When x is a scalar, return whether x is within 494 the support of the distribution. When x is an array, 495 return whether every element of x is within 496 the support of the distribution.\""" 497 return {insupport} 498 """.format(**data) 499 500 # append distribution specific code to `univariate.py` file 501 with open(os.path.join("rvlib", "univariate.py"), "a") as f: 502 f.write(textwrap.dedent(class_specific)) 503 504 505def _write_class_rmath(rname, pyname, *pargs): 506 """ 507 Call top level @vectorized evaluation methods from Rmath. 508 """ 509 510 # construct distribution specific function names 511 pdf = "{}_pdf".format(pyname) 512 cdf = "{}_cdf".format(pyname) 513 ccdf = "{}_ccdf".format(pyname) 514 515 logpdf = "{}_logpdf".format(pyname) 516 logcdf = "{}_logcdf".format(pyname) 517 logccdf = "{}_logccdf".format(pyname) 518 519 invcdf = "{}_invcdf".format(pyname) 520 invccdf = "{}_invccdf".format(pyname) 521 invlogcdf = "{}_invlogcdf".format(pyname) 522 invlogccdf = "{}_invlogccdf".format(pyname) 523 524 rand = "{}_rand".format(pyname) 525 526 # make sure all names are available 527 has_rand = True 528 if rname == "nbeta" or rname == "nf" or rname == "nt": 529 has_rand = False 530 531 # append 'self.' at the beginning of each parameter 532 p_args = ", ".join(["".join(("self.", par)) for par in pargs]) 533 534 loc_code = """\ 535 536 def pdf(self, x): 537 \"""The pdf value(s) evaluated at x.\""" 538 return {pdf}({p_args}, x) 539 540 def logpdf(self, x): 541 \"""The logarithm of the pdf value(s) evaluated at x.\""" 542 return {logpdf}({p_args}, x) 543 544 def loglikelihood(self, x): 545 \"""The log-likelihood of the distribution w.r.t. all 546 samples contained in array x.\""" 547 return sum({logpdf}({p_args}, x)) 548 549 def cdf(self, x): 550 \"""The cdf value(s) evaluated at x.\""" 551 return {cdf}({p_args}, x) 552 553 def ccdf(self, x): 554 \"""The complementary cdf evaluated at x, i.e. 1 - cdf(x).\""" 555 return {ccdf}({p_args}, x) 556 557 def logcdf(self, x): 558 \"""The logarithm of the cdf value(s) evaluated at x.\""" 559 return {logcdf}({p_args}, x) 560 561 def logccdf(self, x): 562 \"""The logarithm of the complementary cdf evaluated at x.\""" 563 return {logccdf}({p_args}, x) 564 565 def quantile(self, q): 566 \"""The quantile value evaluated at q.\""" 567 return {invcdf}({p_args}, q) 568 569 def cquantile(self, q): 570 \"""The complementary quantile value evaluated at q.\""" 571 return {invccdf}({p_args}, q) 572 573 def invlogcdf(self, lq): 574 \"""The inverse function of the logcdf.\""" 575 return {invlogcdf}({p_args}, lq) 576 577 def invlogccdf(self, lq): 578 \"""The inverse function of the logccdf.\""" 579 return {invlogccdf}({p_args}, lq) 580 """.format(**locals()) 581 582 # append code for class to main `univariate.py` file 583 with open(os.path.join("rvlib", "univariate.py"), "a") as f: 584 f.write(loc_code) 585 586 if not has_rand: 587 # end here if we don't have rand. I put it in a `not has_rand` block 588 # to the rand_code can be at the right indentation level below 589 return 590 591 rand_code = """\ 592 593 # ======== 594 # Sampling 595 # ======== 596 597 def rand(self, n): 598 \"""Generates a vector of n independent samples from the distribution.\""" 599 out = np.empty(n) 600 for i, _ in np.ndenumerate(out): 601 out[i] = {rand}({p_args}) 602 return out 603 604 """.format(**locals()) 605 with open(os.path.join("rvlib", "univariate.py"), "a") as f: 606 f.write(rand_code) 607 608 609 610def main(): 611 # Write out specials.py 612 _initiate_specials() 613 614 # Preamble for univariate.py 615 _initiate_univariate() 616 617 # Normal 618 _import_rmath("norm", "norm", "mu", "sigma") 619 _write_class_specific(mtdt["Normal"], "mu", "sigma") 620 _write_class_rmath("norm", "norm", "mu", "sigma") 621 622 # Chisq 623 _import_rmath("chisq", "chisq", "v") 624 _write_class_specific(mtdt["Chisq"], "v") 625 _write_class_rmath("chisq", "chisq", "v") 626 627 # Uniform 628 _import_rmath("unif", "unif", "a", "b") 629 _write_class_specific(mtdt["Uniform"], "a", "b") 630 _write_class_rmath("unif", "unif", "a", "b") 631 632 # T 633 _import_rmath("t", "tdist", "v") 634 _write_class_specific(mtdt["T"], "v") 635 _write_class_rmath("t", "tdist", "v") 636 637 # LogNormal 638 _import_rmath("lnorm", "lognormal", "mu", "sigma") 639 _write_class_specific(mtdt["LogNormal"], "mu", "sigma") 640 _write_class_rmath("lnorm", "lognormal", "mu", "sigma") 641 642 # F 643 _import_rmath("f", "fdist", "v1", "v2") 644 _write_class_specific(mtdt["F"], "v1", "v2") 645 _write_class_rmath("f", "fdist", "v1", "v2") 646 647 # Gamma 648 _import_rmath("gamma", "gamma", "alpha", "beta") 649 _write_class_specific(mtdt["Gamma"], "alpha", "beta") 650 _write_class_rmath("gamma", "gamma", "alpha", "beta") 651 652 # Beta 653 _import_rmath("beta", "beta", "alpha", "beta") 654 _write_class_specific(mtdt["Beta"], "alpha", "beta") 655 _write_class_rmath("beta", "beta", "alpha", "beta") 656 657 # Exponential 658 _import_rmath("exp", "exp", "theta") 659 _write_class_specific(mtdt["Exponential"], "theta") 660 _write_class_rmath("exp", "exp", "theta") 661 662 # Cauchy 663 _import_rmath("cauchy", "cauchy", "mu", "sigma") 664 _write_class_specific(mtdt["Cauchy"], "mu", "sigma") 665 _write_class_rmath("cauchy", "cauchy", "mu", "sigma") 666 667 # Poisson 668 _import_rmath("pois", "pois", "mu") 669 _write_class_specific(mtdt["Poisson"], "mu") 670 _write_class_rmath("pois", "pois", "mu") 671 672 # Geometric 673 _import_rmath("geom", "geom", "p") 674 _write_class_specific(mtdt["Geometric"], "p") 675 _write_class_rmath("geom", "geom", "p") 676 677 # Binomial 678 _import_rmath("binom", "binom", "n", "p") 679 _write_class_specific(mtdt["Binomial"], "n", "p") 680 _write_class_rmath("binom", "binom", "n", "p") 681 682 # Logistic 683 _import_rmath("logis", "logis", "mu", "theta") 684 _write_class_specific(mtdt["Logistic"], "mu", "theta") 685 _write_class_rmath("logis", "logis", "mu", "theta") 686 687 # Weibull 688 _import_rmath("weibull", "weibull", "alpha", "theta") 689 _write_class_specific(mtdt["Weibull"], "alpha", "theta") 690 _write_class_rmath("weibull", "weibull", "alpha", "theta") 691 692 # Hypergeometric 693 _import_rmath("hyper", "hyper", "s", "f", "n") 694 _write_class_specific(mtdt["Hypergeometric"], "s", "f", "n") 695 _write_class_rmath("hyper", "hyper", "s", "f", "n") 696 697 # NegativeBinomial 698 _import_rmath("nbinom", "nbinom", "r", "p") 699 _write_class_specific(mtdt["NegativeBinomial"], "r", "p") 700 _write_class_rmath("nbinom", "nbinom", "r", "p") 701 702with open(os.path.join("rvlib", "metadata.yaml"), 'r') as ymlfile: 703 mtdt = yaml.safe_load(ymlfile) 704 main() 705