1"""
2Spatial lag operations.
3"""
4__author__ = "Sergio J. Rey <srey@asu.edu>, David C. Folch <david.folch@asu.edu>, Levi John Wolf <ljw2@asu.edu"
5__all__ = ["lag_spatial", "lag_categorical"]
6
7import numpy as np
8
9
10def lag_spatial(w, y):
11    """
12    Spatial lag operator.
13
14    If w is row standardized, returns the average of each observation's neighbors;
15    if not, returns the weighted sum of each observation's neighbors.
16
17    Parameters
18    ----------
19
20    w                   : W
21                          libpysal spatial weightsobject
22    y                   : array
23                          numpy array with dimensionality conforming to w (see examples)
24
25    Returns
26    -------
27
28    wy                  : array
29                          array of numeric values for the spatial lag
30
31    Examples
32    --------
33
34    Setup a 9x9 binary spatial weights matrix and vector of data; compute the
35    spatial lag of the vector.
36
37    >>> import libpysal
38    >>> import numpy as np
39    >>> w = libpysal.weights.lat2W(3, 3)
40    >>> y = np.arange(9)
41    >>> yl = libpysal.weights.lag_spatial(w, y)
42    >>> yl
43    array([ 4.,  6.,  6., 10., 16., 14., 10., 18., 12.])
44
45    Row standardize the weights matrix and recompute the spatial lag
46
47    >>> w.transform = 'r'
48    >>> yl = libpysal.weights.lag_spatial(w, y)
49    >>> yl
50    array([2.        , 2.        , 3.        , 3.33333333, 4.        ,
51           4.66666667, 5.        , 6.        , 6.        ])
52
53
54    Explicitly define data vector as 9x1 and recompute the spatial lag
55
56    >>> y.shape = (9, 1)
57    >>> yl = libpysal.weights.lag_spatial(w, y)
58    >>> yl
59    array([[2.        ],
60           [2.        ],
61           [3.        ],
62           [3.33333333],
63           [4.        ],
64           [4.66666667],
65           [5.        ],
66           [6.        ],
67           [6.        ]])
68
69
70    Take the spatial lag of a 9x2 data matrix
71
72    >>> yr = np.arange(8, -1, -1)
73    >>> yr.shape = (9, 1)
74    >>> x = np.hstack((y, yr))
75    >>> yl = libpysal.weights.lag_spatial(w, x)
76    >>> yl
77    array([[2.        , 6.        ],
78           [2.        , 6.        ],
79           [3.        , 5.        ],
80           [3.33333333, 4.66666667],
81           [4.        , 4.        ],
82           [4.66666667, 3.33333333],
83           [5.        , 3.        ],
84           [6.        , 2.        ],
85           [6.        , 2.        ]])
86
87    """
88    return w.sparse * y
89
90
91def lag_categorical(w, y, ties="tryself"):
92    """
93    Spatial lag operator for categorical variables.
94
95    Constructs the most common categories of neighboring observations, weighted
96    by their weight strength.
97
98    Parameters
99    ----------
100
101    w                   : W
102                          PySAL spatial weightsobject
103    y                   : iterable
104                          iterable collection of categories (either int or
105                          string) with dimensionality conforming to w (see examples)
106    ties                : str
107                          string describing the method to use when resolving
108                          ties. By default, the option is "tryself",
109                          and the category of the focal observation
110                          is included with its neighbors to try
111                          and break a tie. If this does not resolve the tie,
112                          a winner is chosen randomly. To just use random choice to
113                          break ties, pass "random" instead.
114    Returns
115    -------
116    an (n x k) column vector containing the most common neighboring observation
117
118    Notes
119    -----
120    This works on any array where the number of unique elements along the column
121    axis is less than the number of elements in the array, for any dtype.
122    That means the routine should work on any dtype that np.unique() can
123    compare.
124
125    Examples
126    --------
127
128    Set up a 9x9 weights matrix describing a 3x3 regular lattice. Lag one list of
129    categorical variables with no ties.
130
131    >>> import libpysal
132    >>> import numpy as np
133    >>> np.random.seed(12345)
134    >>> w = libpysal.weights.lat2W(3, 3)
135    >>> y = ['a','b','a','b','c','b','c','b','c']
136    >>> y_l = libpysal.weights.lag_categorical(w, y)
137    >>> np.array_equal(y_l, np.array(['b', 'a', 'b', 'c', 'b', 'c', 'b', 'c', 'b']))
138    True
139
140    Explicitly reshape y into a (9x1) array and calculate lag again
141
142    >>> yvect = np.array(y).reshape(9,1)
143    >>> yvect_l = libpysal.weights.lag_categorical(w,yvect)
144    >>> check = np.array( [ [i] for i in  ['b', 'a', 'b', 'c', 'b', 'c', 'b', 'c', 'b']] )
145    >>> np.array_equal(yvect_l, check)
146    True
147
148    compute the lag of a 9x2 matrix of categories
149
150    >>> y2 = ['a', 'c', 'c', 'd', 'b', 'a', 'd', 'd', 'c']
151    >>> ym = np.vstack((y,y2)).T
152    >>> ym_lag = libpysal.weights.lag_categorical(w,ym)
153    >>> check = np.array([['b', 'd'], ['a', 'c'], ['b', 'c'], ['c', 'd'], ['b', 'd'], ['c', 'c'], ['b', 'd'], ['c', 'd'], ['b', 'c']])
154    >>> np.array_equal(check, ym_lag)
155    True
156
157    """
158    if isinstance(y, list):
159        y = np.array(y)
160    orig_shape = y.shape
161    if len(orig_shape) > 1:
162        if orig_shape[1] > 1:
163            return np.vstack([lag_categorical(w, col) for col in y.T]).T
164    y = y.flatten()
165    output = np.zeros_like(y)
166    labels = np.unique(y)
167    normalized_labels = np.zeros(y.shape, dtype=np.int)
168    for i, label in enumerate(labels):
169        normalized_labels[y == label] = i
170    for focal_name, neighbors in w:
171        focal_idx = w.id2i[focal_name]
172        neighborhood_tally = np.zeros(labels.shape)
173        for neighb_name, weight in list(neighbors.items()):
174            neighb_idx = w.id2i[neighb_name]
175            neighb_label = normalized_labels[neighb_idx]
176            neighborhood_tally[neighb_label] += weight
177        out_label_idx = _resolve_ties(
178            focal_idx, normalized_labels, neighborhood_tally, neighbors, ties, w
179        )
180        output[focal_idx] = labels[out_label_idx]
181    return output.reshape(orig_shape)
182
183
184def _resolve_ties(idx, normalized_labels, tally, neighbors, method, w):
185    """
186    Helper function to resolve ties if lag is multimodal
187
188    first, if this function gets called when there's actually no tie, then the
189    correct value will be picked.
190
191    if 'random' is selected as the method, a random tiebeaker is picked
192
193    if 'tryself' is selected, then the observation's own value will be used in
194    an attempt to break the tie, but if it fails, a random tiebreaker will be
195    selected.
196
197    Arguments
198    ---------
199    idx                 : int
200                          index (aligned with `normalized_labels`) of the
201                          current observation being resolved.
202    normalized_labels   : (n,) array of ints
203                          normalized array of labels for each observation
204    tally               : (p,) array of floats
205                          current tally of neighbors' labels around `idx` to resolve.
206    neighbors           : dict of (neighbor_name : weight)
207                          the elements of the weights object, identical to w[idx]
208    method              : string
209                          configuration option to use a specific tiebreaking method.
210                          supported options are:
211                          1. tryself: Use the focal observation's label to tiebreak.
212                                      If this doesn't successfully break the tie,
213                                      (which only occurs if it induces a new tie),
214                                      decide randomly.
215                          2. random: Resolve the tie randomly amongst winners.
216                          3. lowest: Pick the lowest-value label amongst winners.
217                          4. highest: Pick the highest-value label amongst winners.
218    w                   : pysal.W object
219                          a PySAL weights object aligned with normalized_labels.
220
221    Returns
222    -------
223    integer denoting which label to use to label the observation.
224    """
225    (ties,) = np.where(tally == tally.max())  # returns a tuple for flat arrays
226    if len(tally[tally == tally.max()]) <= 1:  # no tie, pick the highest
227        return np.argmax(tally).astype(int)
228    elif method.lower() == "random":  # choose randomly from tally
229        return np.random.choice(np.squeeze(ties)).astype(int)
230    elif method.lower() == "lowest":  # pick lowest tied value
231        return ties[0].astype(int)
232    elif method.lower() == "highest":  # pick highest tied value
233        return ties[-1].astype(int)
234    elif (
235        method.lower() == "tryself"
236    ):  # add self-label as observation, try again, random if fail
237        mean_neighbor_value = np.mean(list(neighbors.values()))
238        tally[normalized_labels[idx]] += mean_neighbor_value
239        return _resolve_ties(idx, normalized_labels, tally, neighbors, "random", w)
240    else:
241        raise KeyError("Tie-breaking method for categorical lag not recognized")
242