1#----------------------------------------------------------------------------- 2# Copyright (c) 2017, PyStan developers 3# 4# This file is licensed under Version 3.0 of the GNU General Public 5# License. See LICENSE for a text of the license. 6#----------------------------------------------------------------------------- 7 8import numpy as np 9import re 10import pkg_resources 11import io 12 13lookuptable = None 14stanftable = None 15 16def lookup(name, min_similarity_ratio=.75): 17 """ 18 Look up for a Stan function with similar functionality to a Python 19 function (or even an R function, see examples). If the function is 20 not present on the lookup table, then attempts to find similar one 21 and prints the results. This function requires package `pandas`. 22 23 Parameters 24 ----------- 25 name : str 26 Name of the function one wants to look for. 27 min_similarity_ratio : float 28 In case no exact match is found on the lookup table, the 29 function will attempt to find similar names using 30 `difflib.SequenceMatcher.ratio()`, and then results with 31 calculated ratio below `min_similarity_ratio` will be discarded. 32 33 Examples 34 --------- 35 #Look up for a Stan function similar to scipy.stats.skewnorm 36 lookup("scipy.stats.skewnorm") 37 #Look up for a Stan function similar to R dnorm 38 lookup("R.dnorm") 39 #Look up for a Stan function similar to numpy.hstack 40 lookup("numpy.hstack") 41 #List Stan log probability mass functions 42 lookup("lpmfs") 43 #List Stan log cumulative density functions 44 lookup("lcdfs") 45 46 Returns 47 --------- 48 A pandas.core.frame.DataFrame if exact or at least one similar 49 result is found, None otherwise. 50 """ 51 if lookuptable is None: 52 build() 53 if name not in lookuptable.keys(): 54 from difflib import SequenceMatcher 55 from operator import itemgetter 56 print("No match for " + name + " in the lookup table.") 57 58 lkt_keys = list(lookuptable.keys()) 59 mapfunction = lambda x: SequenceMatcher(a=name, b=x).ratio() 60 similars = list(map(mapfunction, lkt_keys)) 61 similars = zip(range(len(similars)), similars) 62 similars = list(filter(lambda x: x[1] >= min_similarity_ratio, 63 similars)) 64 similars = sorted(similars, key=itemgetter(1)) 65 66 if (len(similars)): 67 print("But the following similar entries were found: ") 68 for i in range(len(similars)): 69 print(lkt_keys[similars[i][0]] + " ===> with similary " 70 "ratio of " + str(round(similars[i][1], 3)) + "") 71 print("Will return results for entry" 72 " " + lkt_keys[similars[i][0]] + " " 73 "(which is the most similar entry found).") 74 return lookup(lkt_keys[similars[i][0]]) 75 else: 76 print("And no similar entry found. You may try to decrease" 77 "the min_similarity_ratio parameter.") 78 return 79 entries = stanftable[lookuptable[name]] 80 if not len(entries): 81 return "Found no equivalent Stan function available for " + name 82 83 try: 84 import pandas as pd 85 except ImportError: 86 raise ImportError('Package pandas is require to use this ' 87 'function.') 88 89 return pd.DataFrame(entries) 90 91 92 93def build(): 94 def load_table_file(fname): 95 fname = "lookuptable/" + fname 96 fbytes = pkg_resources.resource_string(__name__, fname) 97 return io.BytesIO(fbytes) 98 stanfunctions_file = load_table_file("stan-functions.txt") 99 rfunctions_file = load_table_file("R.txt") 100 pythontb_file = load_table_file("python.txt") 101 102 stanftb = np.genfromtxt(stanfunctions_file, delimiter=';', 103 names=True, skip_header=True, 104 dtype=['<U200','<U200','<U200' ,"int"]) 105 rpl_textbar = np.vectorize(lambda x: x.replace("\\textbar \\", "|")) 106 stanftb['Arguments'] = rpl_textbar(stanftb['Arguments']) 107 108 StanFunction = stanftb["StanFunction"] 109 110 #Auto-extract R functions 111 rmatches = [re.findall(r'(' 112 '(?<=RFunction\[StanFunction == \").+?(?=\")' 113 '|(?<=grepl\(").+?(?=", StanFunction\))' 114 '|(?<= \<\- ").+?(?="\)))' 115 '|NA\_character\_', l.decode("utf-8")) 116 for l in rfunctions_file] 117 tomatch = list(filter(lambda x: len(x) == 2, rmatches)) 118 tomatch = np.array(tomatch, dtype=str) 119 tomatch[:, 1] = np.vectorize(lambda x: "R." + x)(tomatch[:,1]) 120 121 #Get packages lookup table for Python packages 122 pymatches = np.genfromtxt(pythontb_file, delimiter='; ', dtype=str) 123 tomatch = np.vstack((tomatch, pymatches)) 124 125 lookuptb = dict() 126 for i in range(tomatch.shape[0]): 127 matchedlines = np.vectorize(lambda x: re.match(tomatch[i, 0], 128 x))(StanFunction) 129 lookuptb[tomatch[i, 1]] = np.where(matchedlines)[0] 130 131 #debug: list of rmatches that got wrong 132 #print(list(filter(lambda x: len(x) != 2 and len(x) != 0, 133 # rmatches))) 134 135 #debug: list of nodes without matches on lookup table 136 #for k in lookuptb: 137 # if len(lookuptb[k]) == 0: 138 # print(k) 139 global lookuptable 140 global stanftable 141 142 stanftable = stanftb 143 lookuptable = lookuptb 144