statsmodels/emplike/descriptive.py

"""
Empirical likelihood inference on descriptive statistics

This module conducts hypothesis tests and constructs confidence
intervals for the mean, variance, skewness, kurtosis and correlation.

If matplotlib is installed, this module can also generate multivariate
confidence region plots as well as mean-variance contour plots.

See _OptFuncts docstring for technical details and optimization variable
definitions.

General References:
------------------
Owen, A. (2001). "Empirical Likelihood." Chapman and Hall

"""
import numpy as np
from scipy import optimize
from scipy.stats import chi2, skew, kurtosis
from statsmodels.base.optimizer import _fit_newton
import itertools
from statsmodels.graphics import utils


def DescStat(endog):
    """
    Returns an instance to conduct inference on descriptive statistics
    via empirical likelihood.  See DescStatUV and DescStatMV for more
    information.

    Parameters
    ----------
    endog : ndarray
         Array of data

    Returns : DescStat instance
        If k=1, the function returns a univariate instance, DescStatUV.
        If k>1, the function returns a multivariate instance, DescStatMV.
    """
    if endog.ndim == 1:
        endog = endog.reshape(len(endog), 1)
    if endog.shape[1] == 1:
        return DescStatUV(endog)
    if endog.shape[1] > 1:
        return DescStatMV(endog)


class _OptFuncts(object):
    """
    A class that holds functions that are optimized/solved.

    The general setup of the class is simple.  Any method that starts with
    _opt_ creates a vector of estimating equations named est_vect such that
    np.dot(p, (est_vect))=0 where p is the weight on each
    observation as a 1 x n array and est_vect is n x k.  Then _modif_Newton is
    called to determine the optimal p by solving for the Lagrange multiplier
    (eta) in the profile likelihood maximization problem.  In the presence
    of nuisance parameters, _opt_ functions are  optimized over to profile
    out the nuisance parameters.

    Any method starting with _ci_limits calculates the log likelihood
    ratio for a specific value of a parameter and then subtracts a
    pre-specified critical value.  This is solved so that llr - crit = 0.
    """

    def __init__(self, endog):
        pass

    def _log_star(self, eta, est_vect, weights, nobs):
        """
        Transforms the log of observation probabilities in terms of the
        Lagrange multiplier to the log 'star' of the probabilities.

        Parameters
        ----------
        eta : float
            Lagrange multiplier

        est_vect : ndarray (n,k)
            Estimating equations vector

        wts : nx1 array
            Observation weights

        Returns
        ------
        data_star : ndarray
            The weighted logstar of the estimting equations

        Notes
        -----
        This function is only a placeholder for the _fit_Newton.
        The function value is not used in optimization and the optimal value
        is disregarded when computing the log likelihood ratio.
        """
        data_star = np.log(weights) + (np.sum(weights) +\
                                       np.dot(est_vect, eta))
        idx = data_star < 1. / nobs
        not_idx = ~idx
        nx = nobs * data_star[idx]
        data_star[idx] = np.log(1. / nobs) - 1.5 + nx * (2. - nx / 2)
        data_star[not_idx] = np.log(data_star[not_idx])
        return data_star

    def _hess(self, eta, est_vect, weights, nobs):
        """
        Calculates the hessian of a weighted empirical likelihood
        problem.

        Parameters
        ----------
        eta : ndarray, (1,m)
            Lagrange multiplier in the profile likelihood maximization

        est_vect : ndarray (n,k)
            Estimating equations vector

        weights : 1darray
            Observation weights

        Returns
        -------
        hess : m x m array
            Weighted hessian used in _wtd_modif_newton
        """
        #eta = np.squeeze(eta)
        data_star_doub_prime = np.sum(weights) + np.dot(est_vect, eta)
        idx = data_star_doub_prime < 1. / nobs
        not_idx = ~idx
        data_star_doub_prime[idx] = - nobs ** 2
        data_star_doub_prime[not_idx] = - (data_star_doub_prime[not_idx]) ** -2
        wtd_dsdp = weights * data_star_doub_prime
        return np.dot(est_vect.T, wtd_dsdp[:, None] * est_vect)

    def _grad(self, eta, est_vect, weights, nobs):
        """
        Calculates the gradient of a weighted empirical likelihood
        problem

        Parameters
        ----------
        eta : ndarray, (1,m)
            Lagrange multiplier in the profile likelihood maximization

        est_vect : ndarray, (n,k)
            Estimating equations vector

        weights : 1darray
            Observation weights

        Returns
        -------
        gradient : ndarray (m,1)
            The gradient used in _wtd_modif_newton
        """
        #eta = np.squeeze(eta)
        data_star_prime = np.sum(weights) + np.dot(est_vect, eta)
        idx = data_star_prime < 1. / nobs
        not_idx = ~idx
        data_star_prime[idx] = nobs * (2 - nobs * data_star_prime[idx])
        data_star_prime[not_idx] = 1. / data_star_prime[not_idx]
        return np.dot(weights * data_star_prime, est_vect)

    def _modif_newton(self,  eta, est_vect, weights):
        """
        Modified Newton's method for maximizing the log 'star' equation.  This
        function calls _fit_newton to find the optimal values of eta.

        Parameters
        ----------
        eta : ndarray, (1,m)
            Lagrange multiplier in the profile likelihood maximization

        est_vect : ndarray, (n,k)
            Estimating equations vector

        weights : 1darray
            Observation weights

        Returns
        -------
        params : 1xm array
            Lagrange multiplier that maximizes the log-likelihood
        """
        nobs = len(est_vect)
        f = lambda x0: - np.sum(self._log_star(x0, est_vect, weights, nobs))
        grad = lambda x0: - self._grad(x0, est_vect, weights, nobs)
        hess = lambda x0: - self._hess(x0, est_vect, weights, nobs)
        kwds = {'tol': 1e-8}
        eta = eta.squeeze()
        res = _fit_newton(f, grad, eta, (), kwds, hess=hess, maxiter=50, \
                              disp=0)
        return res[0]

    def _find_eta(self, eta):
        """
        Finding the root of sum(xi-h0)/(1+eta(xi-mu)) solves for
        eta when computing ELR for univariate mean.

        Parameters
        ----------
        eta : float
            Lagrange multiplier in the empirical likelihood maximization

        Returns
        -------
        llr : float
            n times the log likelihood value for a given value of eta
        """
        return np.sum((self.endog - self.mu0) / \
              (1. + eta * (self.endog - self.mu0)))

    def _ci_limits_mu(self, mu):
        """
        Calculates the difference between the log likelihood of mu_test and a
        specified critical value.

        Parameters
        ----------
        mu : float
           Hypothesized value of the mean.

        Returns
        -------
        diff : float
            The difference between the log likelihood value of mu0 and
            a specified value.
        """
        return self.test_mean(mu)[0] - self.r0

    def _find_gamma(self, gamma):
        """
        Finds gamma that satisfies
        sum(log(n * w(gamma))) - log(r0) = 0

        Used for confidence intervals for the mean

        Parameters
        ----------
        gamma : float
            Lagrange multiplier when computing confidence interval

        Returns
        -------
        diff : float
            The difference between the log-liklihood when the Lagrange
            multiplier is gamma and a pre-specified value
        """
        denom = np.sum((self.endog - gamma) ** -1)
        new_weights = (self.endog - gamma) ** -1 / denom
        return -2 * np.sum(np.log(self.nobs * new_weights)) - \
            self.r0

    def _opt_var(self, nuisance_mu, pval=False):
        """
        This is the function to be optimized over a nuisance mean parameter
        to determine the likelihood ratio for the variance

        Parameters
        ----------
        nuisance_mu : float
            Value of a nuisance mean parameter

        Returns
        -------
        llr : float
            Log likelihood of a pre-specified variance holding the nuisance
            parameter constant
        """
        endog = self.endog
        nobs = self.nobs
        sig_data = ((endog - nuisance_mu) ** 2 \
                    - self.sig2_0)
        mu_data = (endog - nuisance_mu)
        est_vect = np.column_stack((mu_data, sig_data))
        eta_star = self._modif_newton(np.array([1. / nobs,
                                               1. / nobs]), est_vect,
                                                np.ones(nobs) * (1. / nobs))

        denom = 1 + np.dot(eta_star, est_vect.T)
        self.new_weights = 1. / nobs * 1. / denom
        llr = np.sum(np.log(nobs * self.new_weights))
        if pval:  # Used for contour plotting
            return chi2.sf(-2 * llr, 1)
        return -2 * llr

    def _ci_limits_var(self, var):
        """
        Used to determine the confidence intervals for the variance.
        It calls test_var and when called by an optimizer,
        finds the value of sig2_0 that is chi2.ppf(significance-level)

        Parameters
        ----------
        var_test : float
            Hypothesized value of the variance

        Returns
        -------
        diff : float
            The difference between the log likelihood ratio at var_test and a
            pre-specified value.
        """
        return self.test_var(var)[0] - self.r0

    def _opt_skew(self, nuis_params):
        """
        Called by test_skew.  This function is optimized over
        nuisance parameters mu and sigma

        Parameters
        ----------
        nuis_params : 1darray
            An array with a  nuisance mean and variance parameter

        Returns
        -------
        llr : float
            The log likelihood ratio of a pre-specified skewness holding
            the nuisance parameters constant.
        """
        endog = self.endog
        nobs = self.nobs
        mu_data = endog - nuis_params[0]
        sig_data = ((endog - nuis_params[0]) ** 2) - nuis_params[1]
        skew_data = ((((endog - nuis_params[0]) ** 3) /
                    (nuis_params[1] ** 1.5))) - self.skew0
        est_vect = np.column_stack((mu_data, sig_data, skew_data))
        eta_star = self._modif_newton(np.array([1. / nobs,
                                               1. / nobs,
                                               1. / nobs]), est_vect,
                                               np.ones(nobs) * (1. / nobs))
        denom = 1. + np.dot(eta_star, est_vect.T)
        self.new_weights = 1. / nobs * 1. / denom
        llr = np.sum(np.log(nobs * self.new_weights))
        return -2 * llr

    def _opt_kurt(self, nuis_params):
        """
        Called by test_kurt.  This function is optimized over
        nuisance parameters mu and sigma

        Parameters
        ----------
        nuis_params : 1darray
            An array with a nuisance mean and variance parameter

        Returns
        -------
        llr : float
            The log likelihood ratio of a pre-speified kurtosis holding the
            nuisance parameters constant
        """
        endog = self.endog
        nobs = self.nobs
        mu_data = endog - nuis_params[0]
        sig_data = ((endog - nuis_params[0]) ** 2) - nuis_params[1]
        kurt_data = (((((endog - nuis_params[0]) ** 4) / \
                    (nuis_params[1] ** 2))) - 3) - self.kurt0
        est_vect = np.column_stack((mu_data, sig_data, kurt_data))
        eta_star = self._modif_newton(np.array([1. / nobs,
                                               1. / nobs,
                                               1. / nobs]), est_vect,
                                               np.ones(nobs) * (1. / nobs))
        denom = 1 + np.dot(eta_star, est_vect.T)
        self.new_weights = 1. / nobs * 1. / denom
        llr = np.sum(np.log(nobs * self.new_weights))
        return -2 * llr

    def _opt_skew_kurt(self, nuis_params):
        """
        Called by test_joint_skew_kurt.  This function is optimized over
        nuisance parameters mu and sigma

        Parameters
        ----------
        nuis_params : 1darray
            An array with a nuisance mean and variance parameter

        Returns
        ------
        llr : float
            The log likelihood ratio of a pre-speified skewness and
            kurtosis holding the nuisance parameters constant.
        """
        endog = self.endog
        nobs = self.nobs
        mu_data = endog - nuis_params[0]
        sig_data = ((endog - nuis_params[0]) ** 2) - nuis_params[1]
        skew_data = ((((endog - nuis_params[0]) ** 3) / \
                    (nuis_params[1] ** 1.5))) - self.skew0
        kurt_data = (((((endog - nuis_params[0]) ** 4) / \
                    (nuis_params[1] ** 2))) - 3) - self.kurt0
        est_vect = np.column_stack((mu_data, sig_data, skew_data, kurt_data))
        eta_star = self._modif_newton(np.array([1. / nobs,
                                               1. / nobs,
                                               1. / nobs,
                                               1. / nobs]), est_vect,
                                               np.ones(nobs) * (1. / nobs))
        denom = 1. + np.dot(eta_star, est_vect.T)
        self.new_weights = 1. / nobs * 1. / denom
        llr = np.sum(np.log(nobs * self.new_weights))
        return -2 * llr

    def _ci_limits_skew(self, skew):
        """
        Parameters
        ----------
        skew0 : float
            Hypothesized value of skewness

        Returns
        -------
        diff : float
            The difference between the log likelihood ratio at skew and a
            pre-specified value.
        """
        return self.test_skew(skew)[0] - self.r0

    def _ci_limits_kurt(self, kurt):
        """
        Parameters
        ----------
        skew0 : float
            Hypothesized value of kurtosis

        Returns
        -------
        diff : float
            The difference between the log likelihood ratio at kurt and a
            pre-specified value.
        """
        return self.test_kurt(kurt)[0] - self.r0

    def _opt_correl(self, nuis_params, corr0, endog, nobs, x0, weights0):
        """
        Parameters
        ----------
        nuis_params : 1darray
            Array containing two nuisance means and two nuisance variances

        Returns
        -------
        llr : float
            The log-likelihood of the correlation coefficient holding nuisance
            parameters constant
        """
        mu1_data, mu2_data = (endog - nuis_params[::2]).T
        sig1_data = mu1_data ** 2 - nuis_params[1]
        sig2_data = mu2_data ** 2 - nuis_params[3]
        correl_data = ((mu1_data * mu2_data) - corr0 *
                    (nuis_params[1] * nuis_params[3]) ** .5)
        est_vect = np.column_stack((mu1_data, sig1_data,
                                    mu2_data, sig2_data, correl_data))
        eta_star = self._modif_newton(x0, est_vect, weights0)
        denom = 1. + np.dot(est_vect, eta_star)
        self.new_weights = 1. / nobs * 1. / denom
        llr = np.sum(np.log(nobs * self.new_weights))
        return -2 * llr

    def _ci_limits_corr(self, corr):
        return self.test_corr(corr)[0] - self.r0


class DescStatUV(_OptFuncts):
    """
    A class to compute confidence intervals and hypothesis tests involving
    mean, variance, kurtosis and skewness of a univariate random variable.

    Parameters
    ----------
    endog : 1darray
        Data to be analyzed

    Attributes
    ----------
    endog : 1darray
        Data to be analyzed

    nobs : float
        Number of observations
    """

    def __init__(self, endog):
        self.endog = np.squeeze(endog)
        self.nobs = endog.shape[0]

    def test_mean(self, mu0, return_weights=False):
        """
        Returns - 2 x log-likelihood ratio, p-value and weights
        for a hypothesis test of the mean.

        Parameters
        ----------
        mu0 : float
            Mean value to be tested

        return_weights : bool
            If return_weights is True the function returns
            the weights of the observations under the null hypothesis.
            Default is False

        Returns
        -------
        test_results : tuple
            The log-likelihood ratio and p-value of mu0
        """
        self.mu0 = mu0
        endog = self.endog
        nobs = self.nobs
        eta_min = (1. - (1. / nobs)) / (self.mu0 - max(endog))
        eta_max = (1. - (1. / nobs)) / (self.mu0 - min(endog))
        eta_star = optimize.brentq(self._find_eta, eta_min, eta_max)
        new_weights = (1. / nobs) * 1. / (1. + eta_star * (endog - self.mu0))
        llr = -2 * np.sum(np.log(nobs * new_weights))
        if return_weights:
            return llr, chi2.sf(llr, 1), new_weights
        else:
            return llr, chi2.sf(llr, 1)

    def ci_mean(self, sig=.05, method='gamma', epsilon=10 ** -8,
                 gamma_low=-10 ** 10, gamma_high=10 ** 10):
        """
        Returns the confidence interval for the mean.

        Parameters
        ----------
        sig : float
            significance level. Default is .05

        method : str
            Root finding method,  Can be 'nested-brent' or
            'gamma'.  Default is 'gamma'

            'gamma' Tries to solve for the gamma parameter in the
            Lagrange (see Owen pg 22) and then determine the weights.

            'nested brent' uses brents method to find the confidence
            intervals but must maximize the likelihood ratio on every
            iteration.

            gamma is generally much faster.  If the optimizations does not
            converge, try expanding the gamma_high and gamma_low
            variable.

        gamma_low : float
            Lower bound for gamma when finding lower limit.
            If function returns f(a) and f(b) must have different signs,
            consider lowering gamma_low.

        gamma_high : float
            Upper bound for gamma when finding upper limit.
            If function returns f(a) and f(b) must have different signs,
            consider raising gamma_high.

        epsilon : float
            When using 'nested-brent', amount to decrease (increase)
            from the maximum (minimum) of the data when
            starting the search.  This is to protect against the
            likelihood ratio being zero at the maximum (minimum)
            value of the data.  If data is very small in absolute value
            (<10 ``**`` -6) consider shrinking epsilon

            When using 'gamma', amount to decrease (increase) the
            minimum (maximum) by to start the search for gamma.
            If function returns f(a) and f(b) must have different signs,
            consider lowering epsilon.

        Returns
        -------
        Interval : tuple
            Confidence interval for the mean
        """
        endog = self.endog
        sig = 1 - sig
        if method == 'nested-brent':
            self.r0 = chi2.ppf(sig, 1)
            middle = np.mean(endog)
            epsilon_u = (max(endog) - np.mean(endog)) * epsilon
            epsilon_l = (np.mean(endog) - min(endog)) * epsilon
            ulim = optimize.brentq(self._ci_limits_mu, middle,
                max(endog) - epsilon_u)
            llim = optimize.brentq(self._ci_limits_mu, middle,
                min(endog) + epsilon_l)
            return llim, ulim

        if method == 'gamma':
            self.r0 = chi2.ppf(sig, 1)
            gamma_star_l = optimize.brentq(self._find_gamma, gamma_low,
                min(endog) - epsilon)
            gamma_star_u = optimize.brentq(self._find_gamma, \
                         max(endog) + epsilon, gamma_high)
            weights_low = ((endog - gamma_star_l) ** -1) / \
                np.sum((endog - gamma_star_l) ** -1)
            weights_high = ((endog - gamma_star_u) ** -1) / \
                np.sum((endog - gamma_star_u) ** -1)
            mu_low = np.sum(weights_low * endog)
            mu_high = np.sum(weights_high * endog)
            return mu_low,  mu_high

    def test_var(self, sig2_0, return_weights=False):
        """
        Returns  -2 x log-likelihood ratio and the p-value for the
        hypothesized variance

        Parameters
        ----------
        sig2_0 : float
            Hypothesized variance to be tested

        return_weights : bool
            If True, returns the weights that maximize the
            likelihood of observing sig2_0. Default is False

        Returns
        -------
        test_results : tuple
            The  log-likelihood ratio and the p_value  of sig2_0

        Examples
        --------
        >>> import numpy as np
        >>> import statsmodels.api as sm
        >>> random_numbers = np.random.standard_normal(1000)*100
        >>> el_analysis = sm.emplike.DescStat(random_numbers)
        >>> hyp_test = el_analysis.test_var(9500)
        """
        self.sig2_0 = sig2_0
        mu_max = max(self.endog)
        mu_min = min(self.endog)
        llr = optimize.fminbound(self._opt_var, mu_min, mu_max, \
                                 full_output=1)[1]
        p_val = chi2.sf(llr, 1)
        if return_weights:
            return llr, p_val, self.new_weights.T
        else:
            return llr, p_val

    def ci_var(self, lower_bound=None, upper_bound=None, sig=.05):
        """
        Returns the confidence interval for the variance.

        Parameters
        ----------
        lower_bound : float
            The minimum value the lower confidence interval can
            take. The p-value from test_var(lower_bound) must be lower
            than 1 - significance level. Default is .99 confidence
            limit assuming normality

        upper_bound : float
            The maximum value the upper confidence interval
            can take. The p-value from test_var(upper_bound) must be lower
            than 1 - significance level.  Default is .99 confidence
            limit assuming normality

        sig : float
            The significance level. Default is .05

        Returns
        -------
        Interval : tuple
            Confidence interval for the variance

        Examples
        --------
        >>> import numpy as np
        >>> import statsmodels.api as sm
        >>> random_numbers = np.random.standard_normal(100)
        >>> el_analysis = sm.emplike.DescStat(random_numbers)
        >>> el_analysis.ci_var()
        (0.7539322567470305, 1.229998852496268)
        >>> el_analysis.ci_var(.5, 2)
        (0.7539322567469926, 1.2299988524962664)

        Notes
        -----
        If the function returns the error f(a) and f(b) must have
        different signs, consider lowering lower_bound and raising
        upper_bound.
        """
        endog = self.endog
        if upper_bound is None:
            upper_bound = ((self.nobs - 1) * endog.var()) / \
              (chi2.ppf(.0001, self.nobs - 1))
        if lower_bound is None:
            lower_bound = ((self.nobs - 1) * endog.var()) / \
              (chi2.ppf(.9999, self.nobs - 1))
        self.r0 = chi2.ppf(1 - sig, 1)
        llim = optimize.brentq(self._ci_limits_var, lower_bound, endog.var())
        ulim = optimize.brentq(self._ci_limits_var, endog.var(), upper_bound)
        return llim, ulim

    def plot_contour(self, mu_low, mu_high, var_low, var_high, mu_step,
                        var_step,
                        levs=[.2, .1, .05, .01, .001]):
        """
        Returns a plot of the confidence region for a univariate
        mean and variance.

        Parameters
        ----------
        mu_low : float
            Lowest value of the mean to plot

        mu_high : float
            Highest value of the mean to plot

        var_low : float
            Lowest value of the variance to plot

        var_high : float
            Highest value of the variance to plot

        mu_step : float
            Increments to evaluate the mean

        var_step : float
            Increments to evaluate the mean

        levs : list
            Which values of significance the contour lines will be drawn.
            Default is [.2, .1, .05, .01, .001]

        Returns
        -------
        Figure
            The contour plot
        """
        fig, ax = utils.create_mpl_ax()
        ax.set_ylabel('Variance')
        ax.set_xlabel('Mean')
        mu_vect = list(np.arange(mu_low, mu_high, mu_step))
        var_vect = list(np.arange(var_low, var_high, var_step))
        z = []
        for sig0 in var_vect:
            self.sig2_0 = sig0
            for mu0 in mu_vect:
                z.append(self._opt_var(mu0, pval=True))
        z = np.asarray(z).reshape(len(var_vect), len(mu_vect))
        ax.contour(mu_vect, var_vect, z, levels=levs)
        return fig

    def test_skew(self, skew0, return_weights=False):
        """
        Returns  -2 x log-likelihood and p-value for the hypothesized
        skewness.

        Parameters
        ----------
        skew0 : float
            Skewness value to be tested

        return_weights : bool
            If True, function also returns the weights that
            maximize the likelihood ratio. Default is False.

        Returns
        -------
        test_results : tuple
            The log-likelihood ratio and p_value of skew0
        """
        self.skew0 = skew0
        start_nuisance = np.array([self.endog.mean(),
                                       self.endog.var()])

        llr = optimize.fmin_powell(self._opt_skew, start_nuisance,
                                     full_output=1, disp=0)[1]
        p_val = chi2.sf(llr, 1)
        if return_weights:
            return llr, p_val,  self.new_weights.T
        return llr, p_val

    def test_kurt(self, kurt0, return_weights=False):
        """
        Returns -2 x log-likelihood and the p-value for the hypothesized
        kurtosis.

        Parameters
        ----------
        kurt0 : float
            Kurtosis value to be tested

        return_weights : bool
            If True, function also returns the weights that
            maximize the likelihood ratio. Default is False.

        Returns
        -------
        test_results : tuple
            The log-likelihood ratio and p-value of kurt0
        """
        self.kurt0 = kurt0
        start_nuisance = np.array([self.endog.mean(),
                                       self.endog.var()])

        llr = optimize.fmin_powell(self._opt_kurt, start_nuisance,
                                     full_output=1, disp=0)[1]
        p_val = chi2.sf(llr, 1)
        if return_weights:
            return llr, p_val, self.new_weights.T
        return llr, p_val

    def test_joint_skew_kurt(self, skew0, kurt0, return_weights=False):
        """
        Returns - 2 x log-likelihood and the p-value for the joint
        hypothesis test for skewness and kurtosis

        Parameters
        ----------
        skew0 : float
            Skewness value to be tested
        kurt0 : float
            Kurtosis value to be tested

        return_weights : bool
            If True, function also returns the weights that
            maximize the likelihood ratio. Default is False.

        Returns
        -------
        test_results : tuple
            The log-likelihood ratio and p-value  of the joint hypothesis test.
        """
        self.skew0 = skew0
        self.kurt0 = kurt0
        start_nuisance = np.array([self.endog.mean(),
                                       self.endog.var()])

        llr = optimize.fmin_powell(self._opt_skew_kurt, start_nuisance,
                                     full_output=1, disp=0)[1]
        p_val = chi2.sf(llr, 2)
        if return_weights:
            return llr, p_val, self.new_weights.T
        return llr, p_val

    def ci_skew(self, sig=.05, upper_bound=None, lower_bound=None):
        """
        Returns the confidence interval for skewness.

        Parameters
        ----------
        sig : float
            The significance level.  Default is .05

        upper_bound : float
            Maximum value of skewness the upper limit can be.
            Default is .99 confidence limit assuming normality.

        lower_bound : float
            Minimum value of skewness the lower limit can be.
            Default is .99 confidence level assuming normality.

        Returns
        -------
        Interval : tuple
            Confidence interval for the skewness

        Notes
        -----
        If function returns f(a) and f(b) must have different signs, consider
        expanding lower and upper bounds
        """
        nobs = self.nobs
        endog = self.endog
        if upper_bound is None:
            upper_bound = skew(endog) + \
            2.5 * ((6. * nobs * (nobs - 1.)) / \
              ((nobs - 2.) * (nobs + 1.) * \
               (nobs + 3.))) ** .5
        if lower_bound is None:
            lower_bound = skew(endog) - \
            2.5 * ((6. * nobs * (nobs - 1.)) / \
              ((nobs - 2.) * (nobs + 1.) * \
               (nobs + 3.))) ** .5
        self.r0 = chi2.ppf(1 - sig, 1)
        llim = optimize.brentq(self._ci_limits_skew, lower_bound, skew(endog))
        ulim = optimize.brentq(self._ci_limits_skew, skew(endog), upper_bound)
        return llim, ulim

    def ci_kurt(self, sig=.05, upper_bound=None, lower_bound=None):
        """
        Returns the confidence interval for kurtosis.

        Parameters
        ----------

        sig : float
            The significance level.  Default is .05

        upper_bound : float
            Maximum value of kurtosis the upper limit can be.
            Default is .99 confidence limit assuming normality.

        lower_bound : float
            Minimum value of kurtosis the lower limit can be.
            Default is .99 confidence limit assuming normality.

        Returns
        -------
        Interval : tuple
            Lower and upper confidence limit

        Notes
        -----
        For small n, upper_bound and lower_bound may have to be
        provided by the user.  Consider using test_kurt to find
        values close to the desired significance level.

        If function returns f(a) and f(b) must have different signs, consider
        expanding the bounds.
        """
        endog = self.endog
        nobs = self.nobs
        if upper_bound is None:
            upper_bound = kurtosis(endog) + \
            (2.5 * (2. * ((6. * nobs * (nobs - 1.)) / \
              ((nobs - 2.) * (nobs + 1.) * \
               (nobs + 3.))) ** .5) * \
               (((nobs ** 2.) - 1.) / ((nobs - 3.) *\
                 (nobs + 5.))) ** .5)
        if lower_bound is None:
            lower_bound = kurtosis(endog) - \
            (2.5 * (2. * ((6. * nobs * (nobs - 1.)) / \
              ((nobs - 2.) * (nobs + 1.) * \
               (nobs + 3.))) ** .5) * \
               (((nobs ** 2.) - 1.) / ((nobs - 3.) *\
                 (nobs + 5.))) ** .5)
        self.r0 = chi2.ppf(1 - sig, 1)
        llim = optimize.brentq(self._ci_limits_kurt, lower_bound, \
                             kurtosis(endog))
        ulim = optimize.brentq(self._ci_limits_kurt, kurtosis(endog), \
                             upper_bound)
        return llim, ulim


class DescStatMV(_OptFuncts):
    """
    A class for conducting inference on multivariate means and correlation.

    Parameters
    ----------
    endog : ndarray
        Data to be analyzed

    Attributes
    ----------
    endog : ndarray
        Data to be analyzed

    nobs : float
        Number of observations
    """

    def __init__(self, endog):
        self.endog = endog
        self.nobs = endog.shape[0]

    def mv_test_mean(self, mu_array, return_weights=False):
        """
        Returns -2 x log likelihood and the p-value
        for a multivariate hypothesis test of the mean

        Parameters
        ----------
        mu_array  : 1d array
            Hypothesized values for the mean.  Must have same number of
            elements as columns in endog

        return_weights : bool
            If True, returns the weights that maximize the
            likelihood of mu_array. Default is False.

        Returns
        -------
        test_results : tuple
            The log-likelihood ratio and p-value for mu_array
        """
        endog = self.endog
        nobs = self.nobs
        if len(mu_array) != endog.shape[1]:
            raise ValueError('mu_array must have the same number of '
                             'elements as the columns of the data.')
        mu_array = mu_array.reshape(1, endog.shape[1])
        means = np.ones((endog.shape[0], endog.shape[1]))
        means = mu_array * means
        est_vect = endog - means
        start_vals = 1. / nobs * np.ones(endog.shape[1])
        eta_star = self._modif_newton(start_vals, est_vect,
                                      np.ones(nobs) * (1. / nobs))
        denom = 1 + np.dot(eta_star, est_vect.T)
        self.new_weights = 1 / nobs * 1 / denom
        llr = -2 * np.sum(np.log(nobs * self.new_weights))
        p_val = chi2.sf(llr, mu_array.shape[1])
        if return_weights:
            return llr, p_val,  self.new_weights.T
        else:
            return llr, p_val

    def mv_mean_contour(self, mu1_low, mu1_upp, mu2_low, mu2_upp, step1, step2,
                        levs=(.001, .01, .05, .1, .2), var1_name=None,
                        var2_name=None, plot_dta=False):
        """
        Creates a confidence region plot for the mean of bivariate data

        Parameters
        ----------
        m1_low : float
            Minimum value of the mean for variable 1
        m1_upp : float
            Maximum value of the mean for variable 1
        mu2_low : float
            Minimum value of the mean for variable 2
        mu2_upp : float
            Maximum value of the mean for variable 2
        step1 : float
            Increment of evaluations for variable 1
        step2 : float
            Increment of evaluations for variable 2
        levs : list
            Levels to be drawn on the contour plot.
            Default =  (.001, .01, .05, .1, .2)
        plot_dta : bool
            If True, makes a scatter plot of the data on
            top of the contour plot. Defaultis False.
        var1_name : str
            Name of variable 1 to be plotted on the x-axis
        var2_name : str
            Name of variable 2 to be plotted on the y-axis

        Notes
        -----
        The smaller the step size, the more accurate the intervals
        will be

        If the function returns optimization failed, consider narrowing
        the boundaries of the plot

        Examples
        --------
        >>> import statsmodels.api as sm
        >>> two_rvs = np.random.standard_normal((20,2))
        >>> el_analysis = sm.emplike.DescStat(two_rvs)
        >>> contourp = el_analysis.mv_mean_contour(-2, 2, -2, 2, .1, .1)
        >>> contourp.show()
        """
        if self.endog.shape[1] != 2:
            raise ValueError('Data must contain exactly two variables')
        fig, ax = utils.create_mpl_ax()
        if var2_name is None:
            ax.set_ylabel('Variable 2')
        else:
            ax.set_ylabel(var2_name)
        if var1_name is None:
            ax.set_xlabel('Variable 1')
        else:
            ax.set_xlabel(var1_name)
        x = np.arange(mu1_low, mu1_upp, step1)
        y = np.arange(mu2_low, mu2_upp, step2)
        pairs = itertools.product(x, y)
        z = []
        for i in pairs:
            z.append(self.mv_test_mean(np.asarray(i))[0])
        X, Y = np.meshgrid(x, y)
        z = np.asarray(z)
        z = z.reshape(X.shape[1], Y.shape[0])
        ax.contour(x, y, z.T, levels=levs)
        if plot_dta:
            ax.plot(self.endog[:, 0], self.endog[:, 1], 'bo')
        return fig

    def test_corr(self, corr0, return_weights=0):
        """
        Returns -2 x log-likelihood ratio and  p-value for the
        correlation coefficient between 2 variables

        Parameters
        ----------
        corr0 : float
            Hypothesized value to be tested

        return_weights : bool
            If true, returns the weights that maximize
            the log-likelihood at the hypothesized value
        """
        nobs = self.nobs
        endog = self.endog
        if endog.shape[1] != 2:
            raise NotImplementedError('Correlation matrix not yet implemented')
        nuis0 = np.array([endog[:, 0].mean(),
                              endog[:, 0].var(),
                              endog[:, 1].mean(),
                              endog[:, 1].var()])

        x0 = np.zeros(5)
        weights0 = np.array([1. / nobs] * int(nobs))
        args = (corr0, endog, nobs, x0, weights0)
        llr = optimize.fmin(self._opt_correl, nuis0, args=args,
                                     full_output=1, disp=0)[1]
        p_val = chi2.sf(llr, 1)
        if return_weights:
            return llr, p_val, self.new_weights.T
        return llr, p_val

    def ci_corr(self, sig=.05, upper_bound=None, lower_bound=None):
        """
        Returns the confidence intervals for the correlation coefficient

        Parameters
        ----------
        sig : float
            The significance level.  Default is .05

        upper_bound : float
            Maximum value the upper confidence limit can be.
            Default is  99% confidence limit assuming normality.

        lower_bound : float
            Minimum value the lower confidence limit can be.
            Default is 99% confidence limit assuming normality.

        Returns
        -------
        interval : tuple
            Confidence interval for the correlation
        """
        endog = self.endog
        nobs = self.nobs
        self.r0 = chi2.ppf(1 - sig, 1)
        point_est = np.corrcoef(endog[:, 0], endog[:, 1])[0, 1]
        if upper_bound is None:
            upper_bound = min(.999, point_est + \
                          2.5 * ((1. - point_est ** 2.) / \
                          (nobs - 2.)) ** .5)

        if lower_bound is None:
            lower_bound = max(- .999, point_est - \
                          2.5 * (np.sqrt((1. - point_est ** 2.) / \
                          (nobs - 2.))))

        llim = optimize.brenth(self._ci_limits_corr, lower_bound, point_est)
        ulim = optimize.brenth(self._ci_limits_corr, point_est, upper_bound)
        return llim, ulim