1""" 2Negative Binomial Regression 3 4This is an algopy implementation of the statsmodels example 5http://statsmodels.sourceforge.net/devel/examples/generated/example_gmle.html 6. 7""" 8 9import functools 10 11import numpy 12import scipy.optimize 13import algopy 14import numdifftools 15import pandas 16import patsy 17 18g_url = 'http://vincentarelbundock.github.com/Rdatasets/csv/COUNT/medpar.csv' 19 20def get_aic(y, X, theta): 21 return 2*len(theta) + 2*get_neg_ll(y, X, theta) 22 23def get_neg_ll(y, X, theta): 24 alpha = theta[-1] 25 beta = theta[:-1] 26 a = alpha * algopy.exp(algopy.dot(X, beta)) 27 ll = algopy.sum( 28 -y*algopy.log1p(1/a) + 29 -algopy.log1p(a) / alpha + 30 algopy.special.gammaln(y + 1/alpha) + 31 -algopy.special.gammaln(y + 1) + 32 -algopy.special.gammaln(1/alpha)) 33 neg_ll = -ll 34 return neg_ll 35 36def eval_grad(f, theta): 37 theta = algopy.UTPM.init_jacobian(theta) 38 return algopy.UTPM.extract_jacobian(f(theta)) 39 40def eval_hess(f, theta): 41 theta = algopy.UTPM.init_hessian(theta) 42 return algopy.UTPM.extract_hessian(len(theta), f(theta)) 43 44def main(): 45 46 # read the data from the internet into numpy arrays 47 medpar = pandas.read_csv(g_url) 48 y_patsy, X_patsy = patsy.dmatrices('los~type2+type3+hmo+white', medpar) 49 y = numpy.array(y_patsy).flatten() 50 X = numpy.array(X_patsy) 51 52 # define the objective function and the autodiff gradient and hessian 53 f = functools.partial(get_neg_ll, y, X) 54 g = functools.partial(eval_grad, f) 55 h = functools.partial(eval_hess, f) 56 57 # init the search for max likelihood parameters 58 theta0 = numpy.array([ 59 numpy.log(numpy.mean(y)), 60 0, 0, 0, 0, 61 0.5, 62 ], dtype=float) 63 64 # do the max likelihood search 65 results = scipy.optimize.fmin_ncg( 66 f, 67 theta0, 68 fprime=g, 69 fhess=h, 70 avextol=1e-6, 71 ) 72 73 # compute the hessian a couple of different ways 74 algopy_hessian = h(results) 75 num_hessian = numdifftools.Hessian(f)(results) 76 77 # report the results of the search including aic and standard error 78 print('search results:') 79 print(results) 80 print() 81 print('aic:') 82 print(get_aic(y, X, results)) 83 print() 84 print('standard error using observed fisher information,') 85 print('with hessian computed using algopy:') 86 print(numpy.sqrt(numpy.diag(scipy.linalg.inv(algopy_hessian)))) 87 print() 88 print('standard error using observed fisher information,') 89 print('with hessian computed using numdifftools:') 90 print(numpy.sqrt(numpy.diag(scipy.linalg.inv(num_hessian)))) 91 print() 92 93 94if __name__ == '__main__': 95 main() 96 97