PerformanceAnalytics/R/SmoothingIndex.R

#' calculate Normalized Getmansky Smoothing Index
#'
#' Proposed by Getmansky et al to provide a normalized measure of "liquidity
#' risk."
#'
#' To measure the effects of smoothing, Getmansky, Lo, et al (2004) define a
#' "smoothing profile" as a vector of coefficients for an MLE fit on returns
#' using a two-period moving-average process.
#'
#' The moving-average process of order \eqn{k=2} (specified using
#' \code{MAorder}) gives \eqn{R_t = \theta_{0} R_{t} + \theta_1 R_{t -1} +
#' \theta_2 R_{t-2}}, under the constraint that the sum of the coefficients is
#' equal to 1. In , the \code{arima} function allows us to create an MA(2)
#' model using an "ARIMA(p,d,q)" model, where \eqn{p} is the number of
#' autoregressive terms (AR), \eqn{d} is the degree of differencing, and
#' \eqn{q} is the number of lagged forecast errors (MA) in the prediction
#' equation.  The \code{order} parameter allows us to specify the three
#' components \eqn{(p, d, q)} as an argument, e.g., \code{order = c(0, 0, 2)}.
#' The \code{method} specifies how to fit the model, in this case using maximum
#' likelihood estimation (MLE) in a fashion similar to the estimation of
#' standard moving-average time series models, using:
#'
#' \code{arima(ra, order=c(0,0,2), method="ML", transform.pars=TRUE,
#' include.mean=FALSE)}
#'
#' \code{include.mean}: Getmansky, et al. (2004) p 555 "By applying the above
#' procedure to observed de-meaned returns...", so we set that parameter to
#' 'FALSE'.
#'
#' \code{transform.pars}: ibid, "we impose the additional restriction that the
#' estimated MA(k) process be invertible," so we set the parameter to 'TRUE'.
#'
#' The coefficients, \eqn{\theta_{j}}, are then normalized to sum to
#' interpreted as a "weighted average of the fund's true returns over the most
#' recent \eqn{k + 1} periods, including the current period."
#'
#' If these weights are disproportionately centered on a small number of lags,
#' relatively little serial correlation will be induced. However, if the
#' weights are evenly distributed among many lags, this would show higher
#' serial correlation.
#'
#' The paper notes that because \eqn{\theta_j \in [0, 1]}, \eqn{\xi} is also
#' confined to the unit interval, and is minimized when all the
#' \eqn{\theta_j}'s are identical.  That implies a value of \eqn{1/(k + 1)} for
#' \eqn{\xi}, and a maximum value of \eqn{\xi = 1} when one coefficient is 1
#' and the rest are 0.  In the context of smoothed returns, a lower value of
#' \eqn{\xi} implies more smoothing, and the upper bound of 1 implies no
#' smoothing.
#'
#' The "smoothing index", represented as \eqn{\xi}, is calculated the same way
#' the Herfindahl index.  The Herfindal measure is well known in the industrial
#' organization literature as a measure of the concentration of firms in a
#' given industry where \eqn{y_j} represents the market share of firm \eqn{j}.
#'
#' This method (as well as the implementation described in the paper), does not
#' enforce \eqn{\theta_j \in [0, 1]}, so \eqn{\xi} is not limited to that range
#' either.  All we can say is that lower values are "less liquid" and higher
#' values are "more liquid" or mis-specified.  In this function, setting the
#' parameter neg.thetas = FALSE does enforce the limitation, eliminating
#' negative autocorrelation coefficients from the calculation (the papers below
#' do not make an economic case for eliminating negative autocorrelation,
#' however).
#'
#' Interpretation of the resulting value is difficult.  All we can say is that
#' lower values appear to have autocorrelation structure like we might expect
#' of "less liquid" instruments.  Higher values appear "more liquid" or are
#' poorly fit or mis-specified.
#'
#' @param R an xts, vector, matrix, data frame, timeSeries or zoo object of
#' asset returns
#' @param neg.thetas if FALSE, function removes negative coefficients (thetas)
#' when calculating the index
#' @param MAorder specify the number of periods used to calculate the moving
#' average, defaults to 2
#' @param verbose if TRUE, return a list containing the Thetas in addition to
#' the smoothing index/
#' @param \dots any other passthru parameters
#' @section Acknowledgments: Thanks to Dr. Stefan Albrecht, CFA, for invaluable
#' input.
#' @author Peter Carl
#' @references Chan, Nicholas, Mila Getmansky, Shane M. Haas, and Andrew W. Lo.
#' 2005. Systemic Risk and Hedge Funds. NBER Working Paper Series (11200).
#' Getmansky, Mila, Andrew W. Lo, and Igor Makarov. 2004. An Econometric Model
#' of Serial Correlation and Illiquidity in Hedge Fund Returns. Journal of
#' Financial Economics (74): 529-609.
###keywords ts multivariate distribution models
#' @examples
#'
#' data(managers)
#' data(edhec)
#' SmoothingIndex(managers[,1,drop=FALSE])
#' SmoothingIndex(managers[,1:8])
#' SmoothingIndex(edhec)
#'
#' @export
SmoothingIndex <-
function (R, neg.thetas = FALSE, MAorder=2, verbose = FALSE, ...)
{ # @author Peter Carl

    # Description:
    # SmoothingIndex

    # ra    log return vector

    # Function:
    if (is.vector(R)) {
        x = na.omit(R)

        MA2 = NULL
        thetas = 0
        smoothing.index = 0

        # First, create a a maximum likelihood estimation fit for an MA process.

        # include.mean: Getmansky, et al. JFE 2004 p 555 "By applying the above procedure
        # to observed de-meaned returns...", so set parameter to FALSE
        # transform.pars: ibid, "we impose the additional restriction that the estimated MA(k)
        # process be invertible." so set the parameter to TRUE
        MA2 = arima(R, order=c(0,0,MAorder), method="ML", transform.pars=TRUE, include.mean=FALSE)

        # Page 555:
        #
        # "Because of the scaling property Eq. (52) of the MA(k) likelihood function, a
        # simple procedure for obtaining estimates of our smoothing model with the
        # normalization Eq. (49) is to transform estimates (\theta; sigma) from standard
        # MA(k) estimation packages such as SAS or RATS by dividing each \theta_i by 1 + \theta_1 +
        # \theta_2 ... \theta_k and multiplying sigma by the same factor. The likelihood function
        # remains unchanged but the transformed smoothing coefficients will now satisfy
        # Eq. (49)."

        # From the arima function above, MA2$coef contains two coefficients, and no intercept value.
        # The calculation below adjusts for that.
        coefMA2=0
        if(neg.thetas == FALSE)
            for (i in 1:length(coef(MA2)))
                coefMA2[i] = max(0,coef(MA2)[i])
    #         coefMA2 = max(0,coef(MA2)) # enforces no negative thetas
        else
            coefMA2 = coef(MA2) # allows negative thetas

        # Dr. Stefan Albrecht, CFA points out, "I assume that you have to take:"
        thetas = c(1, coefMA2) / (1 + sum(coefMA2))
    #
    #
    #     thetas = as.numeric((MA2$coef)/sum(MA2$coef))

        # This measure is well known in the industrial organization literature as the HeRfindahl
        # index, a measure of the concentration of firms in a given industry where yj represents the
        # market share of firm j: Because theta_j; 1; x is also confined to the unit interval,
        # and is minimized when all the theta_j 's are identical, which implies a value of 1=\delta_k \phi 1 \phi
        # for x; and is maximized when one coefficient is 1 and the rest are 0, in which case x 1/4 1:
        # In the context of smoothed returns, a lower value of x implies more smoothing, and the upper bound
        # of 1 implies no smoothing, hence we shall refer to x as a "smoothing index".

        smoothing.index = sum(thetas^2) # Calc'd as Herfindahl index would be, referred to as \xi, below

        # The interpretation of this is tricky:

        # "Because \theta_j \varepsilon [0, 1], \xi is also confined to the unit interval, and is minimized when all
        # the \theta_j 's are identical, which implies a value of 1/(k + 1) for \xi, and is maximized when
        # one coefficient is 1 and the rest are 0, in which case \xi = 1. In the context of smoothed
        # returns, a lower value of \xi implies more smoothing, and the upper bound of 1 implies no
        # smoothing, hence we shall refer to \xi as a "smoothing index"."

        # That's fine, except that this method (as described in the paper), does not enforce
        # \thetaj \varepsilon [0, 1], so \xi is not limited to that range either.  All we can say is that lower values
        # are "less liquid" and higher values are "more liquid" or mis-specified.

        if(verbose)
            return(list(SmoothingIndex = smoothing.index, Thetas = thetas))
        else
            return(smoothing.index)

    }
    else {
        R = checkData(R, method = "matrix", ... = ...)
        result = apply(R, 2, SmoothingIndex, neg.thetas = neg.thetas, MAorder = MAorder, verbose = verbose, ... = ...)
        if(length(result) ==1)
            return(result)
        else {
            dim(result) = c(1,NCOL(R))
            colnames(result) = colnames(R)
            rownames(result) = "Smoothing Index"
            return(result)
        }
    }
}

###############################################################################
# R (http://r-project.org/) Econometrics for Performance and Risk Analysis
#
# Copyright (c) 2004-2020 Peter Carl and Brian G. Peterson
#
# This R package is distributed under the terms of the GNU Public License (GPL)
# for full details see the file COPYING
#
# $Id$
#
###############################################################################