1""" 2Spatial lag operations. 3""" 4__author__ = "Sergio J. Rey <srey@asu.edu>, David C. Folch <david.folch@asu.edu>, Levi John Wolf <ljw2@asu.edu" 5__all__ = ["lag_spatial", "lag_categorical"] 6 7import numpy as np 8 9 10def lag_spatial(w, y): 11 """ 12 Spatial lag operator. 13 14 If w is row standardized, returns the average of each observation's neighbors; 15 if not, returns the weighted sum of each observation's neighbors. 16 17 Parameters 18 ---------- 19 20 w : W 21 libpysal spatial weightsobject 22 y : array 23 numpy array with dimensionality conforming to w (see examples) 24 25 Returns 26 ------- 27 28 wy : array 29 array of numeric values for the spatial lag 30 31 Examples 32 -------- 33 34 Setup a 9x9 binary spatial weights matrix and vector of data; compute the 35 spatial lag of the vector. 36 37 >>> import libpysal 38 >>> import numpy as np 39 >>> w = libpysal.weights.lat2W(3, 3) 40 >>> y = np.arange(9) 41 >>> yl = libpysal.weights.lag_spatial(w, y) 42 >>> yl 43 array([ 4., 6., 6., 10., 16., 14., 10., 18., 12.]) 44 45 Row standardize the weights matrix and recompute the spatial lag 46 47 >>> w.transform = 'r' 48 >>> yl = libpysal.weights.lag_spatial(w, y) 49 >>> yl 50 array([2. , 2. , 3. , 3.33333333, 4. , 51 4.66666667, 5. , 6. , 6. ]) 52 53 54 Explicitly define data vector as 9x1 and recompute the spatial lag 55 56 >>> y.shape = (9, 1) 57 >>> yl = libpysal.weights.lag_spatial(w, y) 58 >>> yl 59 array([[2. ], 60 [2. ], 61 [3. ], 62 [3.33333333], 63 [4. ], 64 [4.66666667], 65 [5. ], 66 [6. ], 67 [6. ]]) 68 69 70 Take the spatial lag of a 9x2 data matrix 71 72 >>> yr = np.arange(8, -1, -1) 73 >>> yr.shape = (9, 1) 74 >>> x = np.hstack((y, yr)) 75 >>> yl = libpysal.weights.lag_spatial(w, x) 76 >>> yl 77 array([[2. , 6. ], 78 [2. , 6. ], 79 [3. , 5. ], 80 [3.33333333, 4.66666667], 81 [4. , 4. ], 82 [4.66666667, 3.33333333], 83 [5. , 3. ], 84 [6. , 2. ], 85 [6. , 2. ]]) 86 87 """ 88 return w.sparse * y 89 90 91def lag_categorical(w, y, ties="tryself"): 92 """ 93 Spatial lag operator for categorical variables. 94 95 Constructs the most common categories of neighboring observations, weighted 96 by their weight strength. 97 98 Parameters 99 ---------- 100 101 w : W 102 PySAL spatial weightsobject 103 y : iterable 104 iterable collection of categories (either int or 105 string) with dimensionality conforming to w (see examples) 106 ties : str 107 string describing the method to use when resolving 108 ties. By default, the option is "tryself", 109 and the category of the focal observation 110 is included with its neighbors to try 111 and break a tie. If this does not resolve the tie, 112 a winner is chosen randomly. To just use random choice to 113 break ties, pass "random" instead. 114 Returns 115 ------- 116 an (n x k) column vector containing the most common neighboring observation 117 118 Notes 119 ----- 120 This works on any array where the number of unique elements along the column 121 axis is less than the number of elements in the array, for any dtype. 122 That means the routine should work on any dtype that np.unique() can 123 compare. 124 125 Examples 126 -------- 127 128 Set up a 9x9 weights matrix describing a 3x3 regular lattice. Lag one list of 129 categorical variables with no ties. 130 131 >>> import libpysal 132 >>> import numpy as np 133 >>> np.random.seed(12345) 134 >>> w = libpysal.weights.lat2W(3, 3) 135 >>> y = ['a','b','a','b','c','b','c','b','c'] 136 >>> y_l = libpysal.weights.lag_categorical(w, y) 137 >>> np.array_equal(y_l, np.array(['b', 'a', 'b', 'c', 'b', 'c', 'b', 'c', 'b'])) 138 True 139 140 Explicitly reshape y into a (9x1) array and calculate lag again 141 142 >>> yvect = np.array(y).reshape(9,1) 143 >>> yvect_l = libpysal.weights.lag_categorical(w,yvect) 144 >>> check = np.array( [ [i] for i in ['b', 'a', 'b', 'c', 'b', 'c', 'b', 'c', 'b']] ) 145 >>> np.array_equal(yvect_l, check) 146 True 147 148 compute the lag of a 9x2 matrix of categories 149 150 >>> y2 = ['a', 'c', 'c', 'd', 'b', 'a', 'd', 'd', 'c'] 151 >>> ym = np.vstack((y,y2)).T 152 >>> ym_lag = libpysal.weights.lag_categorical(w,ym) 153 >>> check = np.array([['b', 'd'], ['a', 'c'], ['b', 'c'], ['c', 'd'], ['b', 'd'], ['c', 'c'], ['b', 'd'], ['c', 'd'], ['b', 'c']]) 154 >>> np.array_equal(check, ym_lag) 155 True 156 157 """ 158 if isinstance(y, list): 159 y = np.array(y) 160 orig_shape = y.shape 161 if len(orig_shape) > 1: 162 if orig_shape[1] > 1: 163 return np.vstack([lag_categorical(w, col) for col in y.T]).T 164 y = y.flatten() 165 output = np.zeros_like(y) 166 labels = np.unique(y) 167 normalized_labels = np.zeros(y.shape, dtype=np.int) 168 for i, label in enumerate(labels): 169 normalized_labels[y == label] = i 170 for focal_name, neighbors in w: 171 focal_idx = w.id2i[focal_name] 172 neighborhood_tally = np.zeros(labels.shape) 173 for neighb_name, weight in list(neighbors.items()): 174 neighb_idx = w.id2i[neighb_name] 175 neighb_label = normalized_labels[neighb_idx] 176 neighborhood_tally[neighb_label] += weight 177 out_label_idx = _resolve_ties( 178 focal_idx, normalized_labels, neighborhood_tally, neighbors, ties, w 179 ) 180 output[focal_idx] = labels[out_label_idx] 181 return output.reshape(orig_shape) 182 183 184def _resolve_ties(idx, normalized_labels, tally, neighbors, method, w): 185 """ 186 Helper function to resolve ties if lag is multimodal 187 188 first, if this function gets called when there's actually no tie, then the 189 correct value will be picked. 190 191 if 'random' is selected as the method, a random tiebeaker is picked 192 193 if 'tryself' is selected, then the observation's own value will be used in 194 an attempt to break the tie, but if it fails, a random tiebreaker will be 195 selected. 196 197 Arguments 198 --------- 199 idx : int 200 index (aligned with `normalized_labels`) of the 201 current observation being resolved. 202 normalized_labels : (n,) array of ints 203 normalized array of labels for each observation 204 tally : (p,) array of floats 205 current tally of neighbors' labels around `idx` to resolve. 206 neighbors : dict of (neighbor_name : weight) 207 the elements of the weights object, identical to w[idx] 208 method : string 209 configuration option to use a specific tiebreaking method. 210 supported options are: 211 1. tryself: Use the focal observation's label to tiebreak. 212 If this doesn't successfully break the tie, 213 (which only occurs if it induces a new tie), 214 decide randomly. 215 2. random: Resolve the tie randomly amongst winners. 216 3. lowest: Pick the lowest-value label amongst winners. 217 4. highest: Pick the highest-value label amongst winners. 218 w : pysal.W object 219 a PySAL weights object aligned with normalized_labels. 220 221 Returns 222 ------- 223 integer denoting which label to use to label the observation. 224 """ 225 (ties,) = np.where(tally == tally.max()) # returns a tuple for flat arrays 226 if len(tally[tally == tally.max()]) <= 1: # no tie, pick the highest 227 return np.argmax(tally).astype(int) 228 elif method.lower() == "random": # choose randomly from tally 229 return np.random.choice(np.squeeze(ties)).astype(int) 230 elif method.lower() == "lowest": # pick lowest tied value 231 return ties[0].astype(int) 232 elif method.lower() == "highest": # pick highest tied value 233 return ties[-1].astype(int) 234 elif ( 235 method.lower() == "tryself" 236 ): # add self-label as observation, try again, random if fail 237 mean_neighbor_value = np.mean(list(neighbors.values())) 238 tally[normalized_labels[idx]] += mean_neighbor_value 239 return _resolve_ties(idx, normalized_labels, tally, neighbors, "random", w) 240 else: 241 raise KeyError("Tie-breaking method for categorical lag not recognized") 242