1#-----------------------------------------------------------------------------
2# Copyright (c) 2017, PyStan developers
3#
4# This file is licensed under Version 3.0 of the GNU General Public
5# License. See LICENSE for a text of the license.
6#-----------------------------------------------------------------------------
7
8import numpy as np
9import re
10import pkg_resources
11import io
12
13lookuptable = None
14stanftable = None
15
16def lookup(name, min_similarity_ratio=.75):
17    """
18    Look up for a Stan function with similar functionality to a Python
19    function (or even an R function, see examples). If the function is
20    not present on the lookup table, then attempts to find similar one
21    and prints the results. This function requires package `pandas`.
22
23    Parameters
24    -----------
25    name : str
26        Name of the function one wants to look for.
27    min_similarity_ratio : float
28        In case no exact match is found on the lookup table, the
29        function will attempt to find similar names using
30        `difflib.SequenceMatcher.ratio()`, and then results with
31        calculated ratio below `min_similarity_ratio` will be discarded.
32
33    Examples
34    ---------
35    #Look up for a Stan function similar to scipy.stats.skewnorm
36    lookup("scipy.stats.skewnorm")
37    #Look up for a Stan function similar to R dnorm
38    lookup("R.dnorm")
39    #Look up for a Stan function similar to numpy.hstack
40    lookup("numpy.hstack")
41    #List Stan log probability mass functions
42    lookup("lpmfs")
43    #List Stan log cumulative density functions
44    lookup("lcdfs")
45
46    Returns
47    ---------
48    A pandas.core.frame.DataFrame if exact or at least one similar
49    result is found, None otherwise.
50    """
51    if lookuptable is None:
52        build()
53    if name not in lookuptable.keys():
54        from difflib import SequenceMatcher
55        from operator import itemgetter
56        print("No match for " + name + " in the lookup table.")
57
58        lkt_keys = list(lookuptable.keys())
59        mapfunction = lambda x: SequenceMatcher(a=name, b=x).ratio()
60        similars = list(map(mapfunction, lkt_keys))
61        similars = zip(range(len(similars)), similars)
62        similars = list(filter(lambda x: x[1] >= min_similarity_ratio,
63                               similars))
64        similars = sorted(similars, key=itemgetter(1))
65
66        if (len(similars)):
67            print("But the following similar entries were found: ")
68            for i in range(len(similars)):
69                print(lkt_keys[similars[i][0]] + " ===> with similary "
70                      "ratio of " + str(round(similars[i][1], 3)) + "")
71            print("Will return results for entry"
72                  " " + lkt_keys[similars[i][0]] + " "
73                  "(which is the most similar entry found).")
74            return lookup(lkt_keys[similars[i][0]])
75        else:
76            print("And no similar entry found. You may try to decrease"
77                  "the min_similarity_ratio parameter.")
78        return
79    entries = stanftable[lookuptable[name]]
80    if not len(entries):
81        return "Found no equivalent Stan function available for " + name
82
83    try:
84        import pandas as pd
85    except ImportError:
86        raise ImportError('Package pandas is require to use this '
87                          'function.')
88
89    return pd.DataFrame(entries)
90
91
92
93def build():
94    def load_table_file(fname):
95        fname = "lookuptable/" + fname
96        fbytes = pkg_resources.resource_string(__name__, fname)
97        return io.BytesIO(fbytes)
98    stanfunctions_file = load_table_file("stan-functions.txt")
99    rfunctions_file = load_table_file("R.txt")
100    pythontb_file = load_table_file("python.txt")
101
102    stanftb = np.genfromtxt(stanfunctions_file, delimiter=';',
103                            names=True, skip_header=True,
104                            dtype=['<U200','<U200','<U200' ,"int"])
105    rpl_textbar = np.vectorize(lambda x: x.replace("\\textbar \\", "|"))
106    stanftb['Arguments'] = rpl_textbar(stanftb['Arguments'])
107
108    StanFunction = stanftb["StanFunction"]
109
110    #Auto-extract R functions
111    rmatches = [re.findall(r'('
112                           '(?<=RFunction\[StanFunction == \").+?(?=\")'
113                           '|(?<=grepl\(").+?(?=", StanFunction\))'
114                           '|(?<= \<\- ").+?(?="\)))'
115                           '|NA\_character\_', l.decode("utf-8"))
116                for l in rfunctions_file]
117    tomatch = list(filter(lambda x: len(x) == 2, rmatches))
118    tomatch = np.array(tomatch, dtype=str)
119    tomatch[:, 1] = np.vectorize(lambda x: "R." + x)(tomatch[:,1])
120
121    #Get packages lookup table for Python packages
122    pymatches = np.genfromtxt(pythontb_file, delimiter='; ', dtype=str)
123    tomatch = np.vstack((tomatch, pymatches))
124
125    lookuptb = dict()
126    for i in range(tomatch.shape[0]):
127        matchedlines = np.vectorize(lambda x: re.match(tomatch[i, 0],
128                                    x))(StanFunction)
129        lookuptb[tomatch[i, 1]] = np.where(matchedlines)[0]
130
131    #debug: list of rmatches that got wrong
132    #print(list(filter(lambda x: len(x) != 2 and len(x) != 0,
133    #                  rmatches)))
134
135    #debug: list of nodes without matches on lookup table
136    #for k in lookuptb:
137    #    if len(lookuptb[k]) == 0:
138    #        print(k)
139    global lookuptable
140    global stanftable
141
142    stanftable = stanftb
143    lookuptable = lookuptb
144