1"""Internal helper files for user output."""
2
3__author__ = ("Luc Anselin luc.anselin@asu.edu, "
4              "David C. Folch david.folch@asu.edu, "
5              "Levi John Wolf levi.john.wolf@gmail.com, "
6              "Jing Yao jingyao@asu.edu")
7import numpy as np
8import copy as COPY
9from . import diagnostics
10from . import sputils as spu
11from libpysal import weights
12from scipy.sparse.csr import csr_matrix
13
14
15def set_name_ds(name_ds):
16    """Set the dataset name in regression; return generic name if user
17    provides no explicit name."
18
19    Parameters
20    ----------
21
22    name_ds     : string
23                  User provided dataset name.
24
25    Returns
26    -------
27
28    name_ds     : string
29
30    """
31    if not name_ds:
32        name_ds = 'unknown'
33    return name_ds
34
35
36def set_name_y(name_y):
37    """Set the dataset name in regression; return generic name if user
38    provides no explicit name."
39
40    Parameters
41    ----------
42    name_ds     : string
43                  User provided dataset name.
44
45    Returns
46    -------
47    name_ds     : string
48
49    """
50    if not name_y:
51        name_y = 'dep_var'
52    return name_y
53
54
55def set_name_x(name_x, x, constant=False):
56    """Set the independent variable names in regression; return generic name if user
57    provides no explicit name."
58
59    Parameters
60    ----------
61    name_x      : list of string
62                  User provided exogenous variable names.
63
64    x           : array
65                  User provided exogenous variables including the constant.
66    constant    : boolean
67                  If False (default), constant name not included in name_x list yet
68                  Append 'CONSTANT' at the front of the names
69
70    Returns
71    -------
72    name_x      : list of strings
73
74    """
75    if not name_x:
76        name_x = ['var_' + str(i + 1) for i in range(x.shape[1]-1+int(constant))]
77    else:
78        name_x = name_x[:]
79    if not constant:
80        name_x.insert(0, 'CONSTANT')
81    return name_x
82
83
84def set_name_yend(name_yend, yend):
85    """Set the endogenous variable names in regression; return generic name if user
86    provides no explicit name."
87
88    Parameters
89    ----------
90    name_yend   : list of strings
91                  User provided exogenous variable names.
92
93    Returns
94    -------
95    name_yend   : list of strings
96
97    """
98    if yend is not None:
99        if not name_yend:
100            return ['endogenous_' + str(i + 1) for i in range(len(yend[0]))]
101        else:
102            return name_yend[:]
103    else:
104        return []
105
106
107def set_name_q(name_q, q):
108    """Set the external instrument names in regression; return generic name if user
109    provides no explicit name."
110
111    Parameters
112    ----------
113    name_q      : string
114                  User provided instrument names.
115    q           : array
116                  Array of instruments
117
118    Returns
119    -------
120    name_q      : list of strings
121
122    """
123    if q is not None:
124        if not name_q:
125            return ['instrument_' + str(i + 1) for i in range(len(q[0]))]
126        else:
127            return name_q[:]
128    else:
129        return []
130
131
132def set_name_yend_sp(name_y):
133    """Set the spatial lag name in regression; return generic name if user
134    provides no explicit name."
135
136    Parameters
137    ----------
138    name_y      : string
139                  User provided dependent variable name.
140
141    Returns
142    -------
143    name_yend_sp : string
144
145    """
146    return 'W_' + name_y
147
148
149def set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=False):
150    """Set the spatial instrument names in regression; return generic name if user
151    provides no explicit name."
152
153    Parameters
154    ----------
155    name_x      : list of strings
156                  User provided exogenous variable names.
157    w_lags      : int
158                  User provided number of spatial instruments lags
159
160    Returns
161    -------
162    name_q_sp   : list of strings
163
164    """
165    if force_all:
166        names = name_x
167    else:
168        names = name_x[1:]  # drop the constant
169    if lag_q:
170        names = names + name_q
171    sp_inst_names = []
172    for j in names:
173        sp_inst_names.append('W_' + j)
174    if w_lags > 1:
175        for i in range(2, w_lags + 1):
176            for j in names:
177                sp_inst_names.append('W' + str(i) + '_' + j)
178    return sp_inst_names
179
180
181def set_name_h(name_x, name_q):
182    """Set the full instruments names in regression; return generic name if user
183    provides no explicit name."
184
185    Parameters
186    ----------
187    name_x      : list of strings
188                  User provided exogenous variable names.
189    name_q      : list of strings
190                  User provided instrument variable names.
191
192    Returns
193    -------
194    name_h      : list of strings
195
196    """
197    return name_x + name_q
198
199
200def set_robust(robust):
201    """Return generic name if user passes None to the robust parameter in a
202    regression. Note: already verified that the name is valid in
203    check_robust() if the user passed anything besides None to robust.
204
205    Parameters
206    ----------
207    robust      : string or None
208                  Object passed by the user to a regression class
209
210    Returns
211    -------
212    robust      : string
213
214    """
215    if not robust:
216        return 'unadjusted'
217    return robust
218
219
220def set_name_w(name_w, w):
221    """Return generic name if user passes None to the robust parameter in a
222    regression. Note: already verified that the name is valid in
223    check_robust() if the user passed anything besides None to robust.
224
225    Parameters
226    ----------
227    name_w      : string
228                  Name passed in by user. Default is None.
229    w           : W object
230                  pysal W object passed in by user
231
232    Returns
233    -------
234    name_w      : string
235
236    """
237    if w != None:
238        if name_w != None:
239            return name_w
240        else:
241            return 'unknown'
242    return None
243
244
245def set_name_multi(multireg, multi_set, name_multiID, y, x, name_y, name_x, name_ds, title, name_w, robust, endog=False, sp_lag=False):
246    """Returns multiple regression objects with generic names
247
248    Parameters
249    ----------
250    endog       : tuple
251                  If the regression object contains endogenous variables, endog must have the
252                  following parameters in the following order: (yend, q, name_yend, name_q)
253    sp_lag       : tuple
254                  If the regression object contains spatial lag, sp_lag must have the
255                  following parameters in the following order: (w_lags, lag_q)
256
257    """
258    name_ds = set_name_ds(name_ds)
259    name_y = set_name_y(name_y)
260    name_x = set_name_x(name_x, x)
261    name_multiID = set_name_ds(name_multiID)
262    if endog or sp_lag:
263        name_yend = set_name_yend(endog[2], endog[0])
264        name_q = set_name_q(endog[3], endog[1])
265    for r in multi_set:
266        multireg[r].title = title + "%s" % r
267        multireg[r].name_ds = name_ds
268        multireg[r].robust = set_robust(robust)
269        multireg[r].name_w = name_w
270        multireg[r].name_y = '%s_%s' % (str(r), name_y)
271        multireg[r].name_x = ['%s_%s' % (str(r), i) for i in name_x]
272        multireg[r].name_multiID = name_multiID
273        if endog or sp_lag:
274            multireg[r].name_yend = ['%s_%s' % (str(r), i) for i in name_yend]
275            multireg[r].name_q = ['%s_%s' % (str(r), i) for i in name_q]
276            if sp_lag:
277                multireg[r].name_yend.append(
278                    set_name_yend_sp(multireg[r].name_y))
279                multireg[r].name_q.extend(
280                    set_name_q_sp(multireg[r].name_x, sp_lag[0], multireg[r].name_q, sp_lag[1]))
281            multireg[r].name_z = multireg[r].name_x + multireg[r].name_yend
282            multireg[r].name_h = multireg[r].name_x + multireg[r].name_q
283    return multireg
284
285
286def check_arrays(*arrays):
287    """Check if the objects passed by a user to a regression class are
288    correctly structured. If the user's data is correctly formed this function
289    returns nothing, if not then an exception is raised. Note, this does not
290    check for model setup, simply the shape and types of the objects.
291
292    Parameters
293    ----------
294    *arrays : anything
295              Objects passed by the user to a regression class; any type
296              object can be passed and any number of objects can be passed
297
298    Returns
299    -------
300    Returns : int
301              number of observations
302
303    Examples
304    --------
305
306    >>> import numpy as np
307    >>> import libpysal
308    >>> from spreg import check_arrays
309    >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r')
310    >>> # Extract CRIME column from the dbf file
311    >>> y = np.array(db.by_col("CRIME"))
312    >>> y = np.reshape(y, (49,1))
313    >>> X = []
314    >>> X.append(db.by_col("INC"))
315    >>> X.append(db.by_col("HOVAL"))
316    >>> X = np.array(X).T
317    >>> n = check_arrays(y, X)
318    >>> print(n)
319    49
320
321    """
322    rows = []
323    for i in arrays:
324        if i is None:
325            continue
326        if not isinstance(i, (np.ndarray, csr_matrix)):
327            raise Exception("all input data must be either numpy arrays or sparse csr matrices")
328        shape = i.shape
329        if len(shape) > 2:
330            raise Exception("all input arrays must have two dimensions")
331        if len(shape) == 1:
332            shape = (shape[0],1)
333        if shape[0] < shape[1]:
334            raise Exception("one or more input arrays have more columns than rows")
335        if not spu.spisfinite(i):
336            raise Exception("one or more input arrays have missing/NaN values")
337        rows.append(shape[0])
338    if len(set(rows)) > 1:
339        raise Exception("arrays not all of same length")
340    return rows[0]
341
342
343def check_y(y, n):
344    """Check if the y object passed by a user to a regression class is
345    correctly structured. If the user's data is correctly formed this function
346    returns nothing, if not then an exception is raised. Note, this does not
347    check for model setup, simply the shape and types of the objects.
348
349    Parameters
350    ----------
351    y       : anything
352              Object passed by the user to a regression class; any type
353              object can be passed
354
355    n       : int
356              number of observations
357
358    Returns
359    -------
360    y       : anything
361              Object passed by the user to a regression class
362
363    Examples
364    --------
365
366    >>> import numpy as np
367    >>> import libpysal
368    >>> from spreg import check_y
369    >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r')
370
371    # Extract CRIME column from the dbf file
372
373    >>> y = np.array(db.by_col("CRIME"))
374    >>> y = np.reshape(y, (49,1))
375    >>> y = check_y(y, 49)
376
377    # should not raise an exception
378
379    """
380    if not isinstance(y, np.ndarray):
381        print(y.__class__.__name__)
382        raise Exception("y must be a numpy array")
383    shape = y.shape
384    if len(shape) > 2:
385        raise Exception("all input arrays must have two dimensions")
386    if len(shape) == 1:
387        try:
388            y = y.reshape(n,1)
389        except:
390            raise Exception("y must be a single column array matching the length of other arrays")
391    if y.shape != (n, 1):
392        raise Exception("y must be a single column array matching the length of other arrays")
393    return y
394
395def check_weights(w, y, w_required=False, time=False):
396    """Check if the w parameter passed by the user is a libpysal.W object and
397    check that its dimensionality matches the y parameter.  Note that this
398    check is not performed if w set to None.
399
400    Parameters
401    ----------
402    w       : any python object
403              Object passed by the user to a regression class; any type
404              object can be passed
405    y       : numpy array
406              Any shape numpy array can be passed. Note: if y passed
407              check_arrays, then it will be valid for this function
408    w_required : boolean
409                 True if a W matrix is required, False (default) if not.
410    time    : boolean
411              True if data contains a time dimension.
412              False (default) if not.
413
414    Returns
415    -------
416    Returns : nothing
417              Nothing is returned
418
419    Examples
420    --------
421    >>> import numpy as np
422    >>> import libpysal
423    >>> from spreg import check_weights
424    >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r')
425    >>> # Extract CRIME column from the dbf file
426    >>> y = np.array(db.by_col("CRIME"))
427    >>> y = np.reshape(y, (49,1))
428    >>> X = []
429    >>> X.append(db.by_col("INC"))
430    >>> X.append(db.by_col("HOVAL"))
431    >>> X = np.array(X).T
432    >>> w = libpysal.io.open(libpysal.examples.get_path("columbus.gal"), 'r').read()
433    >>> check_weights(w, y)
434
435    # should not raise an exception
436
437    """
438    if w_required == True or w != None:
439        if w == None:
440            raise Exception("A weights matrix w must be provided to run this method.")
441        if not isinstance(w, weights.W):
442            from warnings import warn
443            warn("w must be API-compatible pysal weights object")
444        if w.n != y.shape[0] and time == False:
445            raise Exception("y must have n rows, and w must be an nxn PySAL W object")
446        diag = w.sparse.diagonal()
447        # check to make sure all entries equal 0
448        if diag.min() != 0:
449            raise Exception("All entries on diagonal must equal 0.")
450        if diag.max() != 0:
451            raise Exception("All entries on diagonal must equal 0.")
452
453
454def check_robust(robust, wk):
455    """Check if the combination of robust and wk parameters passed by the user
456    are valid. Note: this does not check if the W object is a valid adaptive
457    kernel weights matrix needed for the HAC.
458
459    Parameters
460    ----------
461    robust  : string or None
462              Object passed by the user to a regression class
463    w       : any python object
464              Object passed by the user to a regression class; any type
465              object can be passed
466
467    Returns
468    -------
469    Returns : nothing
470              Nothing is returned
471
472    Examples
473    --------
474    >>> import numpy as np
475    >>> import libpysal
476    >>> from spreg import check_robust
477    >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r')
478    >>> # Extract CRIME column from the dbf file
479    >>> y = np.array(db.by_col("CRIME"))
480    >>> y = np.reshape(y, (49,1))
481    >>> X = []
482    >>> X.append(db.by_col("INC"))
483    >>> X.append(db.by_col("HOVAL"))
484    >>> X = np.array(X).T
485    >>> wk = None
486    >>> check_robust('White', wk)
487
488    # should not raise an exception
489
490    """
491    if robust:
492        if robust.lower() == 'hac':
493            if not isinstance(wk, weights.Kernel):
494                raise Exception("HAC requires that wk be a Kernel Weights object")
495            diag = wk.sparse.diagonal()
496            # check to make sure all entries equal 1
497            if diag.min() < 1.0:
498                print(diag.min())
499                raise Exception("All entries on diagonal of kernel weights matrix must equal 1.")
500            if diag.max() > 1.0:
501                print(diag.max())
502                raise Exception("All entries on diagonal of kernel weights matrix must equal 1.")
503            # ensure off-diagonal entries are in the set of real numbers [0,1)
504            wegt = wk.weights
505            for i in wk.id_order:
506                vals = wegt[i]
507                vmin = min(vals)
508                vmax = max(vals)
509                if vmin < 0.0:
510                    raise Exception("Off-diagonal entries must be greater than or equal to 0.")
511                if vmax > 1.0:
512                    # NOTE: we are not checking for the case of exactly 1.0 ###
513                    raise Exception("Off-diagonal entries must be less than 1.")
514        elif robust.lower() == 'white' or robust.lower() == 'ogmm':
515            if wk:
516                raise Exception("White requires that wk be set to None")
517        else:
518            raise Exception("invalid value passed to robust, see docs for valid options")
519
520
521def check_spat_diag(spat_diag, w):
522    """Check if there is a w parameter passed by the user if the user also
523    requests spatial diagnostics.
524
525    Parameters
526    ----------
527    spat_diag   : boolean
528                  Value passed by a used to a regression class
529    w           : any python object
530                  Object passed by the user to a regression class; any type
531                  object can be passed
532
533    Returns
534    -------
535    Returns : nothing
536              Nothing is returned
537
538    Examples
539    --------
540    >>> import numpy as np
541    >>> import libpysal
542    >>> from spreg import check_spat_diag
543    >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r')
544    >>> # Extract CRIME column from the dbf file
545    >>> y = np.array(db.by_col("CRIME"))
546    >>> y = np.reshape(y, (49,1))
547    >>> X = []
548    >>> X.append(db.by_col("INC"))
549    >>> X.append(db.by_col("HOVAL"))
550    >>> X = np.array(X).T
551    >>> w = libpysal.io.open(libpysal.examples.get_path("columbus.gal"), 'r').read()
552    >>> check_spat_diag(True, w)
553
554    # should not raise an exception
555
556    """
557    if spat_diag:
558        if not isinstance(w, weights.W):
559            raise Exception("w must be a libpysal.W object to run spatial diagnostics")
560
561
562def check_regimes(reg_set, N=None, K=None):
563    """Check if there are at least two regimes
564
565    Parameters
566    ----------
567    reg_set     : list
568                  List of the regimes IDs
569
570    Returns
571    -------
572    Returns : nothing
573              Nothing is returned
574
575    """
576    if len(reg_set) < 2:
577        raise Exception("At least 2 regimes are needed to run regimes methods. Please check your regimes variable.")
578    if 1.0 * N / len(reg_set) < K + 1:
579        raise Exception("There aren't enough observations for the given number of regimes and variables. Please check your regimes variable.")
580
581
582def check_constant(x,name_x=None,just_rem=False):
583    """Check if the X matrix contains a constant. If it does, drop the constant and replace by a vector of ones.
584
585    Parameters
586    ----------
587    x           : array
588                  Value passed by a used to a regression class
589    name_x      : list of strings
590                  Names of independent variables
591    just_rem    : boolean
592                  If False (default), remove all constants and add a vector of ones
593                  If True, just remove all constants
594    Returns
595    -------
596    x_constant : array
597                 Matrix with independent variables plus constant
598    name_x     : list of strings
599                 Names of independent variables (updated if any variable droped)
600    Examples
601    --------
602    >>> import numpy as np
603    >>> import libpysal
604    >>> from spreg import check_constant
605    >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r')
606    >>> X = []
607    >>> X.append(db.by_col("INC"))
608    >>> X.append(db.by_col("HOVAL"))
609    >>> X = np.array(X).T
610    >>> x_constant,name_x,warn = check_constant(X)
611    >>> x_constant.shape
612    (49, 3)
613
614    """
615    x_constant = COPY.copy(x)
616    keep_x = COPY.copy(name_x)
617    warn = None
618    if isinstance(x_constant, np.ndarray):
619        diffs = np.ptp(x_constant,axis=0)
620        if sum(diffs==0) > 0:
621            x_constant = np.delete(x_constant,np.nonzero(diffs==0),1)
622    else:
623        diffs = (x_constant.max(axis=0).toarray()-x_constant.min(axis=0).toarray())[0]
624        if sum(diffs==0) > 0:
625            x_constant = x_constant[:,np.nonzero(diffs>0)[0]]
626
627    if sum(diffs==0) > 0:
628        if keep_x:
629            rem_x = [keep_x[i] for i in np.nonzero(diffs==0)[0]]
630            warn = 'Variable(s) '+str(rem_x)+' removed for being constant.'
631            keep_x[:] = [keep_x[i] for i in np.nonzero(diffs>0)[0]]
632        else:
633            if sum(diffs==0) == 1:
634                warn = 'One variable has been removed for being constant.'
635            else:
636                warn = str(sum(diffs==0))+' variables have been removed for being constant.'
637    if not just_rem:
638        return spu.sphstack(np.ones((x_constant.shape[0], 1)), x_constant),keep_x,warn
639    else:
640        return x_constant,keep_x,warn
641
642def _test():
643    import doctest
644    doctest.testmod()
645
646if __name__ == '__main__':
647    _test()
648