1import glob
2import os
3import textwrap
4
5import yaml
6
7# =========================================
8# Write out file for special functions
9# =========================================
10
11
12def _initiate_specials():
13    '''
14    Initiate python file for special functions which are present in
15    the Rmath.h file  -- used mainly for characteristic functions
16    '''
17
18    pre_code = """\
19    \"""
20    Special functions used mainly to evaluate characteristic
21    functions of various distributions.
22
23    @authors :  Daniel Csaba <daniel.csaba@nyu.edu>
24                Spencer Lyon <spencer.lyon@stern.nyu.edu>
25    @date : 2016-07-26
26    \"""
27
28    from . import _rmath_ffi
29    from numba import vectorize, jit
30    from numba.core.typing import cffi_utils as cffi_support
31
32    cffi_support.register_module(_rmath_ffi)
33
34    # ---------------
35    # gamma function
36    # ---------------
37
38    gammafn = _rmath_ffi.lib.gammafn
39
40    @vectorize(nopython=True)
41    def gamma(x):
42        return gammafn(x)
43
44    # ---------------------
45    # log of gamma function
46    # ---------------------
47
48    lgammafn = _rmath_ffi.lib.lgammafn
49
50    @vectorize(nopython=True)
51    def lgamma(x):
52        return lgammafn(x)
53
54    # ----------------
55    # digamma function
56    # ----------------
57
58    digammafn = _rmath_ffi.lib.digamma
59
60    @vectorize(nopython=True)
61    def digamma(x):
62        return digammafn(x)
63
64    # -------------
65    # beta funciton
66    # -------------
67
68    betafn = _rmath_ffi.lib.beta
69
70    @vectorize(nopython=True)
71    def beta(x, y):
72        return betafn(x, y)
73
74    # -------------------------------------------
75    # modified Bessel function of the second kind
76    # -------------------------------------------
77
78    bessel_k_fn = _rmath_ffi.lib.bessel_k
79
80    @vectorize(nopython=True)
81    def bessel_k(nu, x):
82        return bessel_k_fn(x, nu, 1)
83
84    # ----------------------------------
85    # seed setting for the random number
86    # generator of the Rmath library
87    # ----------------------------------
88
89    set_seed_rmath = _rmath_ffi.lib.set_seed
90
91    def set_seed(x, y):
92        return set_seed_rmath(x, y)
93    """
94    with open(os.path.join("rvlib", "specials.py"), "w") as f:
95        f.write(textwrap.dedent(pre_code))
96
97
98# =========================================
99# write out preamble for univariate classes
100# =========================================
101
102
103def _initiate_univariate():
104    '''
105    Initiate python file which collects all the
106    classes for different univariate distributions.
107    '''
108
109    # Define code which appears for all classes
110    pre_code = """\
111    \"""
112    Univariate distributions.
113
114    @authors :  Daniel Csaba <daniel.csaba@nyu.edu>
115                Spencer Lyon <spencer.lyon@stern.nyu.edu>
116    @date : 2016-07-26
117    \"""
118
119    from os.path import join, dirname, abspath
120    from numba import vectorize, jit
121    from numba.experimental import jitclass
122    from numba import int32, float32
123
124    import numpy as np
125    from .specials import gamma, lgamma, digamma, beta, bessel_k, set_seed
126
127    from . import _rmath_ffi
128    from numba.core.typing import cffi_utils as cffi_support
129
130    cffi_support.register_module(_rmath_ffi)
131
132    # shut down divide by zero warnings for now
133    import warnings
134    warnings.filterwarnings("ignore")
135
136    import yaml
137    fn = join(dirname(abspath(__file__)), "metadata.yaml")
138    with open(fn, 'r') as ymlfile:
139        mtdt = yaml.safe_load(ymlfile)
140
141    # --------------------------------------------------
142    # docstring following Spencer Lyon's distcan package
143    # https://github.com/spencerlyon2/distcan.git
144    # --------------------------------------------------
145
146    univariate_class_docstr = r\"""
147    Construct a distribution representing {name_doc} random variables. The
148    probability density function of the distribution is given by
149
150    .. math::
151
152        {pdf_tex}
153
154    Parameters
155    ----------
156    {param_list}
157
158    Attributes
159    ----------
160    {param_attributes}
161    location: scalar(float)
162        location of the distribution
163    scale: scalar(float)
164        scale of the distribution
165    shape: scalar(float)
166        shape of the distribution
167    mean :  scalar(float)
168        mean of the distribution
169    median: scalar(float)
170        median of the distribution
171    mode :  scalar(float)
172        mode of the distribution
173    var :  scalar(float)
174        variance of the distribution
175    std :  scalar(float)
176        standard deviation of the distribution
177    skewness :  scalar(float)
178        skewness of the distribution
179    kurtosis :  scalar(float)
180        kurtosis of the distribution
181    isplatykurtic :  Boolean
182        boolean indicating if kurtosis > 0
183    isleptokurtic :  bool
184        boolean indicating if kurtosis < 0
185    ismesokurtic :  bool
186        boolean indicating if kurtosis == 0
187    entropy :  scalar(float)
188        entropy value of the distribution
189    \"""
190
191    param_str = "{name_doc} : {kind}\\n    {descr}"
192
193
194    def _create_param_list_str(names, descrs, kinds="scalar(float)"):
195
196        names = (names, ) if isinstance(names, str) else names
197        names = (names, ) if isinstance(names, str) else names
198
199        if isinstance(kinds, (list, tuple)):
200            if len(names) != len(kinds):
201                raise ValueError("Must have same number of names and kinds")
202
203        if isinstance(kinds, str):
204            kinds = [kinds for i in range(len(names))]
205
206        if len(descrs) != len(names):
207            raise ValueError("Must have same number of names and descrs")
208
209
210        params = []
211        for i in range(len(names)):
212            n, k, d = names[i], kinds[i], descrs[i]
213            params.append(param_str.format(name_doc=n, kind=k, descr=d))
214
215        return str.join("\\n", params)
216
217
218    def _create_class_docstr(name_doc, param_names, param_descrs,
219                             param_kinds="scalar(float)",
220                             pdf_tex=r"\\text{not given}", **kwargs):
221        param_list = _create_param_list_str(param_names, param_descrs,
222                                            param_kinds)
223
224        param_attributes = str.join(", ", param_names) + " : See Parameters"
225
226        return univariate_class_docstr.format(**locals())
227    """
228    with open(os.path.join("rvlib", "univariate.py"), "w") as f:
229        f.write(textwrap.dedent(pre_code))
230
231
232# globals are called from the textwrapper
233# with distribution specific content
234
235def _import_rmath(rname, pyname, *pargs):
236    """
237    Map the `_rmath.ffi.lib.*` function names to match the Julia API.
238    """
239
240    # extract Rmath.h function names
241    dfun = "d{}".format(rname)
242    pfun = "p{}".format(rname)
243    qfun = "q{}".format(rname)
244    rfun = "r{}".format(rname)
245
246    # construct python function names
247    pdf = "{}_pdf".format(pyname)
248    cdf = "{}_cdf".format(pyname)
249    ccdf = "{}_ccdf".format(pyname)
250
251    logpdf = "{}_logpdf".format(pyname)
252    logcdf = "{}_logcdf".format(pyname)
253    logccdf = "{}_logccdf".format(pyname)
254
255    invcdf = "{}_invcdf".format(pyname)
256    invccdf = "{}_invccdf".format(pyname)
257    invlogcdf = "{}_invlogcdf".format(pyname)
258    invlogccdf = "{}_invlogccdf".format(pyname)
259
260    rand = "{}_rand".format(pyname)
261
262    # make sure all names are available
263    has_rand = True
264    if rname == "nbeta" or rname == "nf" or rname == "nt":
265        has_rand = False
266
267    p_args = ", ".join(pargs)
268
269    code = """\
270
271    # ============================= NEW DISTRIBUTION =================================
272    {dfun} = _rmath_ffi.lib.{dfun}
273    {pfun} = _rmath_ffi.lib.{pfun}
274    {qfun} = _rmath_ffi.lib.{qfun}
275
276    @vectorize(nopython=True)
277    def {pdf}({p_args}, x):
278        return {dfun}(x, {p_args}, 0)
279
280
281    @vectorize(nopython=True)
282    def {logpdf}({p_args}, x):
283        return {dfun}(x, {p_args}, 1)
284
285
286    @vectorize(nopython=True)
287    def {cdf}({p_args}, x):
288        return {pfun}(x, {p_args}, 1, 0)
289
290
291    @vectorize(nopython=True)
292    def {ccdf}({p_args}, x):
293        return {pfun}(x, {p_args}, 0, 0)
294
295
296    @vectorize(nopython=True)
297    def {logcdf}({p_args}, x):
298        return {pfun}(x, {p_args}, 1, 1)
299
300
301    @vectorize(nopython=True)
302    def {logccdf}({p_args}, x):
303        return {pfun}(x, {p_args}, 0, 1)
304
305
306    @vectorize(nopython=True)
307    def {invcdf}({p_args}, q):
308        return {qfun}(q, {p_args}, 1, 0)
309
310
311    @vectorize(nopython=True)
312    def {invccdf}({p_args}, q):
313        return {qfun}(q, {p_args}, 0, 0)
314
315
316    @vectorize(nopython=True)
317    def {invlogcdf}({p_args}, lq):
318        return {qfun}(lq, {p_args}, 1, 1)
319
320
321    @vectorize(nopython=True)
322    def {invlogccdf}({p_args}, lq):
323        return {qfun}(lq, {p_args}, 0, 1)
324
325    """.format(**locals())
326
327    # append code for specific class to main `univariate.py` file
328    with open(os.path.join("rvlib", "univariate.py"), "a") as f:
329        f.write(textwrap.dedent(code))
330
331    if not has_rand:
332        # end here if we don't have rand. I put it in a `not has_rand` block
333        # to the rand_code can be at the right indentation level below
334        return
335
336    rand_code = """\
337    {rfun} = _rmath_ffi.lib.{rfun}
338
339    @jit(nopython=True)
340    def {rand}({p_args}):
341        return {rfun}({p_args})
342
343    """.format(**locals())
344    with open(os.path.join("rvlib", "univariate.py"), "a") as f:
345        f.write(textwrap.dedent(rand_code))
346
347
348# function to write out the distribution specific part
349def _write_class_specific(metadata, *pargs):
350    """
351    Write out the distribution specific part of the class
352    which is not related to the imported Rmath functions.
353
354    Builds on _metadata_DIST and some derived locals.
355    """
356
357    p_args = ", ".join(pargs)
358
359    p_args_self = ", ".join(["".join(("self.", par)) for par in pargs])
360
361    data = locals()
362    data.update(metadata)
363
364    class_specific = """\
365
366    @vectorize(nopython=True)
367    def {pyname}_mgf({p_args}, x):
368        return {mgf}
369
370    @vectorize(nopython=True)
371    def {pyname}_cf({p_args}, x):
372        return {cf}
373
374    # -------------
375    #  {name}
376    # -------------
377
378    spec = [
379        {spec}
380    ]
381
382    @jitclass(spec)
383    class {name}():
384
385        # set docstring
386        __doc__ = _create_class_docstr(**mtdt['{name}'])
387
388        def __init__(self, {p_args}):
389            {p_args_self} = {p_args}
390
391        def __str__(self):
392            return "{string}" %(self.params)
393
394        def __repr__(self):
395            return self.__str__()
396
397        # ===================
398        # Parameter retrieval
399        # ===================
400
401        @property
402        def params(self):
403            \"""Return a tuple of parameters.\"""
404            return ({p_args_self})
405
406        @property
407        def location(self):
408            \"""Return location parameter if exists.\"""
409            return {loc}
410
411        @property
412        def scale(self):
413            \"""Return scale parameter if exists.\"""
414            return {scale}
415
416        @property
417        def shape(self):
418            \"""Return shape parameter if exists.\"""
419            return {shape}
420
421        # ==========
422        # Statistics
423        # ==========
424
425        @property
426        def mean(self):
427            \"""Return the mean.\"""
428            return {mean}
429
430        @property
431        def median(self):
432            \"""Return the median.\"""
433            return {median}
434
435        @property
436        def mode(self):
437            \"""Return the mode.\"""
438            return {mode}
439
440        @property
441        def var(self):
442            \"""Return the variance.\"""
443            return {var}
444
445        @property
446        def std(self):
447            \"""Return the standard deviation.\"""
448            return {std}
449
450        @property
451        def skewness(self):
452            \"""Return the skewness.\"""
453            return {skewness}
454
455        @property
456        def kurtosis(self):
457            \"""Return the kurtosis.\"""
458            return {kurtosis}
459
460        @property
461        def isplatykurtic(self):
462            \"""Kurtosis being greater than zero.\"""
463            return self.kurtosis > 0
464
465        @property
466        def isleptokurtic(self):
467            \"""Kurtosis being smaller than zero.\"""
468            return self.kurtosis < 0
469
470        @property
471        def ismesokurtic(self):
472            \"""Kurtosis being equal to zero.\"""
473            return self.kurtosis == 0.0
474
475        @property
476        def entropy(self):
477            \"""Return the entropy.\"""
478            return {entropy}
479
480        def mgf(self, x):
481            \"""Evaluate the moment generating function at x.\"""
482            return {pyname}_mgf({p_args_self}, x)
483
484        def cf(self, x):
485            \"""Evaluate the characteristic function at x.\"""
486            return {pyname}_cf({p_args_self}, x)
487
488        # ==========
489        # Evaluation
490        # ==========
491
492        def insupport(self, x):
493            \"""When x is a scalar, return whether x is within
494            the support of the distribution. When x is an array,
495            return whether every element of x is within
496            the support of the distribution.\"""
497            return {insupport}
498        """.format(**data)
499
500    # append distribution specific code to `univariate.py` file
501    with open(os.path.join("rvlib", "univariate.py"), "a") as f:
502        f.write(textwrap.dedent(class_specific))
503
504
505def _write_class_rmath(rname, pyname, *pargs):
506    """
507    Call top level @vectorized evaluation methods from Rmath.
508    """
509
510    # construct distribution specific function names
511    pdf = "{}_pdf".format(pyname)
512    cdf = "{}_cdf".format(pyname)
513    ccdf = "{}_ccdf".format(pyname)
514
515    logpdf = "{}_logpdf".format(pyname)
516    logcdf = "{}_logcdf".format(pyname)
517    logccdf = "{}_logccdf".format(pyname)
518
519    invcdf = "{}_invcdf".format(pyname)
520    invccdf = "{}_invccdf".format(pyname)
521    invlogcdf = "{}_invlogcdf".format(pyname)
522    invlogccdf = "{}_invlogccdf".format(pyname)
523
524    rand = "{}_rand".format(pyname)
525
526    # make sure all names are available
527    has_rand = True
528    if rname == "nbeta" or rname == "nf" or rname == "nt":
529        has_rand = False
530
531    # append 'self.' at the beginning of each parameter
532    p_args = ", ".join(["".join(("self.", par)) for par in pargs])
533
534    loc_code = """\
535
536    def pdf(self, x):
537        \"""The pdf value(s) evaluated at x.\"""
538        return {pdf}({p_args}, x)
539
540    def logpdf(self, x):
541        \"""The logarithm of the pdf value(s) evaluated at x.\"""
542        return {logpdf}({p_args}, x)
543
544    def loglikelihood(self, x):
545        \"""The log-likelihood of the distribution w.r.t. all
546        samples contained in array x.\"""
547        return sum({logpdf}({p_args}, x))
548
549    def cdf(self, x):
550        \"""The cdf value(s) evaluated at x.\"""
551        return {cdf}({p_args}, x)
552
553    def ccdf(self, x):
554        \"""The complementary cdf evaluated at x, i.e. 1 - cdf(x).\"""
555        return {ccdf}({p_args}, x)
556
557    def logcdf(self, x):
558        \"""The logarithm of the cdf value(s) evaluated at x.\"""
559        return {logcdf}({p_args}, x)
560
561    def logccdf(self, x):
562        \"""The logarithm of the complementary cdf evaluated at x.\"""
563        return {logccdf}({p_args}, x)
564
565    def quantile(self, q):
566        \"""The quantile value evaluated at q.\"""
567        return {invcdf}({p_args}, q)
568
569    def cquantile(self, q):
570        \"""The complementary quantile value evaluated at q.\"""
571        return {invccdf}({p_args}, q)
572
573    def invlogcdf(self, lq):
574        \"""The inverse function of the logcdf.\"""
575        return {invlogcdf}({p_args}, lq)
576
577    def invlogccdf(self, lq):
578        \"""The inverse function of the logccdf.\"""
579        return {invlogccdf}({p_args}, lq)
580    """.format(**locals())
581
582    # append code for class to main `univariate.py` file
583    with open(os.path.join("rvlib", "univariate.py"), "a") as f:
584        f.write(loc_code)
585
586    if not has_rand:
587        # end here if we don't have rand. I put it in a `not has_rand` block
588        # to the rand_code can be at the right indentation level below
589        return
590
591    rand_code = """\
592
593    # ========
594    # Sampling
595    # ========
596
597    def rand(self, n):
598        \"""Generates a vector of n independent samples from the distribution.\"""
599        out = np.empty(n)
600        for i, _ in np.ndenumerate(out):
601            out[i] = {rand}({p_args})
602        return out
603
604    """.format(**locals())
605    with open(os.path.join("rvlib", "univariate.py"), "a") as f:
606        f.write(rand_code)
607
608
609
610def main():
611    # Write out specials.py
612    _initiate_specials()
613
614    # Preamble for univariate.py
615    _initiate_univariate()
616
617    # Normal
618    _import_rmath("norm", "norm", "mu", "sigma")
619    _write_class_specific(mtdt["Normal"], "mu", "sigma")
620    _write_class_rmath("norm",  "norm", "mu", "sigma")
621
622    # Chisq
623    _import_rmath("chisq", "chisq", "v")
624    _write_class_specific(mtdt["Chisq"], "v")
625    _write_class_rmath("chisq",  "chisq", "v")
626
627    # Uniform
628    _import_rmath("unif", "unif", "a", "b")
629    _write_class_specific(mtdt["Uniform"], "a", "b")
630    _write_class_rmath("unif",  "unif", "a", "b")
631
632    # T
633    _import_rmath("t", "tdist", "v")
634    _write_class_specific(mtdt["T"], "v")
635    _write_class_rmath("t", "tdist", "v")
636
637    # LogNormal
638    _import_rmath("lnorm", "lognormal", "mu", "sigma")
639    _write_class_specific(mtdt["LogNormal"], "mu", "sigma")
640    _write_class_rmath("lnorm", "lognormal", "mu", "sigma")
641
642    # F
643    _import_rmath("f", "fdist", "v1", "v2")
644    _write_class_specific(mtdt["F"], "v1", "v2")
645    _write_class_rmath("f", "fdist", "v1", "v2")
646
647    # Gamma
648    _import_rmath("gamma", "gamma", "alpha", "beta")
649    _write_class_specific(mtdt["Gamma"], "alpha", "beta")
650    _write_class_rmath("gamma", "gamma", "alpha", "beta")
651
652    # Beta
653    _import_rmath("beta", "beta", "alpha", "beta")
654    _write_class_specific(mtdt["Beta"], "alpha", "beta")
655    _write_class_rmath("beta", "beta", "alpha", "beta")
656
657    # Exponential
658    _import_rmath("exp", "exp", "theta")
659    _write_class_specific(mtdt["Exponential"], "theta")
660    _write_class_rmath("exp", "exp", "theta")
661
662    # Cauchy
663    _import_rmath("cauchy", "cauchy", "mu", "sigma")
664    _write_class_specific(mtdt["Cauchy"], "mu", "sigma")
665    _write_class_rmath("cauchy", "cauchy", "mu", "sigma")
666
667    # Poisson
668    _import_rmath("pois", "pois", "mu")
669    _write_class_specific(mtdt["Poisson"], "mu")
670    _write_class_rmath("pois", "pois", "mu")
671
672    # Geometric
673    _import_rmath("geom", "geom", "p")
674    _write_class_specific(mtdt["Geometric"], "p")
675    _write_class_rmath("geom", "geom", "p")
676
677    # Binomial
678    _import_rmath("binom", "binom", "n", "p")
679    _write_class_specific(mtdt["Binomial"], "n", "p")
680    _write_class_rmath("binom", "binom", "n", "p")
681
682    # Logistic
683    _import_rmath("logis", "logis", "mu", "theta")
684    _write_class_specific(mtdt["Logistic"], "mu", "theta")
685    _write_class_rmath("logis", "logis", "mu", "theta")
686
687    # Weibull
688    _import_rmath("weibull", "weibull", "alpha", "theta")
689    _write_class_specific(mtdt["Weibull"], "alpha", "theta")
690    _write_class_rmath("weibull", "weibull", "alpha", "theta")
691
692    # Hypergeometric
693    _import_rmath("hyper", "hyper", "s", "f", "n")
694    _write_class_specific(mtdt["Hypergeometric"], "s", "f", "n")
695    _write_class_rmath("hyper", "hyper", "s", "f", "n")
696
697    # NegativeBinomial
698    _import_rmath("nbinom", "nbinom", "r", "p")
699    _write_class_specific(mtdt["NegativeBinomial"], "r", "p")
700    _write_class_rmath("nbinom", "nbinom", "r", "p")
701
702with open(os.path.join("rvlib", "metadata.yaml"), 'r') as ymlfile:
703    mtdt = yaml.safe_load(ymlfile)
704    main()
705