1"""
2Negative Binomial Regression
3
4This is an algopy implementation of the statsmodels example
5http://statsmodels.sourceforge.net/devel/examples/generated/example_gmle.html
6.
7"""
8
9import functools
10
11import numpy
12import scipy.optimize
13import algopy
14import numdifftools
15import pandas
16import patsy
17
18g_url = 'http://vincentarelbundock.github.com/Rdatasets/csv/COUNT/medpar.csv'
19
20def get_aic(y, X, theta):
21    return 2*len(theta) + 2*get_neg_ll(y, X, theta)
22
23def get_neg_ll(y, X, theta):
24    alpha = theta[-1]
25    beta = theta[:-1]
26    a = alpha * algopy.exp(algopy.dot(X, beta))
27    ll = algopy.sum(
28        -y*algopy.log1p(1/a) +
29        -algopy.log1p(a) / alpha +
30        algopy.special.gammaln(y + 1/alpha) +
31        -algopy.special.gammaln(y + 1) +
32        -algopy.special.gammaln(1/alpha))
33    neg_ll = -ll
34    return neg_ll
35
36def eval_grad(f, theta):
37    theta = algopy.UTPM.init_jacobian(theta)
38    return algopy.UTPM.extract_jacobian(f(theta))
39
40def eval_hess(f, theta):
41    theta = algopy.UTPM.init_hessian(theta)
42    return algopy.UTPM.extract_hessian(len(theta), f(theta))
43
44def main():
45
46    # read the data from the internet into numpy arrays
47    medpar = pandas.read_csv(g_url)
48    y_patsy, X_patsy = patsy.dmatrices('los~type2+type3+hmo+white', medpar)
49    y = numpy.array(y_patsy).flatten()
50    X = numpy.array(X_patsy)
51
52    # define the objective function and the autodiff gradient and hessian
53    f = functools.partial(get_neg_ll, y, X)
54    g = functools.partial(eval_grad, f)
55    h = functools.partial(eval_hess, f)
56
57    # init the search for max likelihood parameters
58    theta0 = numpy.array([
59        numpy.log(numpy.mean(y)),
60        0, 0, 0, 0,
61        0.5,
62        ], dtype=float)
63
64    # do the max likelihood search
65    results = scipy.optimize.fmin_ncg(
66            f,
67            theta0,
68            fprime=g,
69            fhess=h,
70            avextol=1e-6,
71            )
72
73    # compute the hessian a couple of different ways
74    algopy_hessian = h(results)
75    num_hessian = numdifftools.Hessian(f)(results)
76
77    # report the results of the search including aic and standard error
78    print('search results:')
79    print(results)
80    print()
81    print('aic:')
82    print(get_aic(y, X, results))
83    print()
84    print('standard error using observed fisher information,')
85    print('with hessian computed using algopy:')
86    print(numpy.sqrt(numpy.diag(scipy.linalg.inv(algopy_hessian))))
87    print()
88    print('standard error using observed fisher information,')
89    print('with hessian computed using numdifftools:')
90    print(numpy.sqrt(numpy.diag(scipy.linalg.inv(num_hessian))))
91    print()
92
93
94if __name__ == '__main__':
95    main()
96
97