1"""Internal helper files for user output.""" 2 3__author__ = ("Luc Anselin luc.anselin@asu.edu, " 4 "David C. Folch david.folch@asu.edu, " 5 "Levi John Wolf levi.john.wolf@gmail.com, " 6 "Jing Yao jingyao@asu.edu") 7import numpy as np 8import copy as COPY 9from . import diagnostics 10from . import sputils as spu 11from libpysal import weights 12from scipy.sparse.csr import csr_matrix 13 14 15def set_name_ds(name_ds): 16 """Set the dataset name in regression; return generic name if user 17 provides no explicit name." 18 19 Parameters 20 ---------- 21 22 name_ds : string 23 User provided dataset name. 24 25 Returns 26 ------- 27 28 name_ds : string 29 30 """ 31 if not name_ds: 32 name_ds = 'unknown' 33 return name_ds 34 35 36def set_name_y(name_y): 37 """Set the dataset name in regression; return generic name if user 38 provides no explicit name." 39 40 Parameters 41 ---------- 42 name_ds : string 43 User provided dataset name. 44 45 Returns 46 ------- 47 name_ds : string 48 49 """ 50 if not name_y: 51 name_y = 'dep_var' 52 return name_y 53 54 55def set_name_x(name_x, x, constant=False): 56 """Set the independent variable names in regression; return generic name if user 57 provides no explicit name." 58 59 Parameters 60 ---------- 61 name_x : list of string 62 User provided exogenous variable names. 63 64 x : array 65 User provided exogenous variables including the constant. 66 constant : boolean 67 If False (default), constant name not included in name_x list yet 68 Append 'CONSTANT' at the front of the names 69 70 Returns 71 ------- 72 name_x : list of strings 73 74 """ 75 if not name_x: 76 name_x = ['var_' + str(i + 1) for i in range(x.shape[1]-1+int(constant))] 77 else: 78 name_x = name_x[:] 79 if not constant: 80 name_x.insert(0, 'CONSTANT') 81 return name_x 82 83 84def set_name_yend(name_yend, yend): 85 """Set the endogenous variable names in regression; return generic name if user 86 provides no explicit name." 87 88 Parameters 89 ---------- 90 name_yend : list of strings 91 User provided exogenous variable names. 92 93 Returns 94 ------- 95 name_yend : list of strings 96 97 """ 98 if yend is not None: 99 if not name_yend: 100 return ['endogenous_' + str(i + 1) for i in range(len(yend[0]))] 101 else: 102 return name_yend[:] 103 else: 104 return [] 105 106 107def set_name_q(name_q, q): 108 """Set the external instrument names in regression; return generic name if user 109 provides no explicit name." 110 111 Parameters 112 ---------- 113 name_q : string 114 User provided instrument names. 115 q : array 116 Array of instruments 117 118 Returns 119 ------- 120 name_q : list of strings 121 122 """ 123 if q is not None: 124 if not name_q: 125 return ['instrument_' + str(i + 1) for i in range(len(q[0]))] 126 else: 127 return name_q[:] 128 else: 129 return [] 130 131 132def set_name_yend_sp(name_y): 133 """Set the spatial lag name in regression; return generic name if user 134 provides no explicit name." 135 136 Parameters 137 ---------- 138 name_y : string 139 User provided dependent variable name. 140 141 Returns 142 ------- 143 name_yend_sp : string 144 145 """ 146 return 'W_' + name_y 147 148 149def set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=False): 150 """Set the spatial instrument names in regression; return generic name if user 151 provides no explicit name." 152 153 Parameters 154 ---------- 155 name_x : list of strings 156 User provided exogenous variable names. 157 w_lags : int 158 User provided number of spatial instruments lags 159 160 Returns 161 ------- 162 name_q_sp : list of strings 163 164 """ 165 if force_all: 166 names = name_x 167 else: 168 names = name_x[1:] # drop the constant 169 if lag_q: 170 names = names + name_q 171 sp_inst_names = [] 172 for j in names: 173 sp_inst_names.append('W_' + j) 174 if w_lags > 1: 175 for i in range(2, w_lags + 1): 176 for j in names: 177 sp_inst_names.append('W' + str(i) + '_' + j) 178 return sp_inst_names 179 180 181def set_name_h(name_x, name_q): 182 """Set the full instruments names in regression; return generic name if user 183 provides no explicit name." 184 185 Parameters 186 ---------- 187 name_x : list of strings 188 User provided exogenous variable names. 189 name_q : list of strings 190 User provided instrument variable names. 191 192 Returns 193 ------- 194 name_h : list of strings 195 196 """ 197 return name_x + name_q 198 199 200def set_robust(robust): 201 """Return generic name if user passes None to the robust parameter in a 202 regression. Note: already verified that the name is valid in 203 check_robust() if the user passed anything besides None to robust. 204 205 Parameters 206 ---------- 207 robust : string or None 208 Object passed by the user to a regression class 209 210 Returns 211 ------- 212 robust : string 213 214 """ 215 if not robust: 216 return 'unadjusted' 217 return robust 218 219 220def set_name_w(name_w, w): 221 """Return generic name if user passes None to the robust parameter in a 222 regression. Note: already verified that the name is valid in 223 check_robust() if the user passed anything besides None to robust. 224 225 Parameters 226 ---------- 227 name_w : string 228 Name passed in by user. Default is None. 229 w : W object 230 pysal W object passed in by user 231 232 Returns 233 ------- 234 name_w : string 235 236 """ 237 if w != None: 238 if name_w != None: 239 return name_w 240 else: 241 return 'unknown' 242 return None 243 244 245def set_name_multi(multireg, multi_set, name_multiID, y, x, name_y, name_x, name_ds, title, name_w, robust, endog=False, sp_lag=False): 246 """Returns multiple regression objects with generic names 247 248 Parameters 249 ---------- 250 endog : tuple 251 If the regression object contains endogenous variables, endog must have the 252 following parameters in the following order: (yend, q, name_yend, name_q) 253 sp_lag : tuple 254 If the regression object contains spatial lag, sp_lag must have the 255 following parameters in the following order: (w_lags, lag_q) 256 257 """ 258 name_ds = set_name_ds(name_ds) 259 name_y = set_name_y(name_y) 260 name_x = set_name_x(name_x, x) 261 name_multiID = set_name_ds(name_multiID) 262 if endog or sp_lag: 263 name_yend = set_name_yend(endog[2], endog[0]) 264 name_q = set_name_q(endog[3], endog[1]) 265 for r in multi_set: 266 multireg[r].title = title + "%s" % r 267 multireg[r].name_ds = name_ds 268 multireg[r].robust = set_robust(robust) 269 multireg[r].name_w = name_w 270 multireg[r].name_y = '%s_%s' % (str(r), name_y) 271 multireg[r].name_x = ['%s_%s' % (str(r), i) for i in name_x] 272 multireg[r].name_multiID = name_multiID 273 if endog or sp_lag: 274 multireg[r].name_yend = ['%s_%s' % (str(r), i) for i in name_yend] 275 multireg[r].name_q = ['%s_%s' % (str(r), i) for i in name_q] 276 if sp_lag: 277 multireg[r].name_yend.append( 278 set_name_yend_sp(multireg[r].name_y)) 279 multireg[r].name_q.extend( 280 set_name_q_sp(multireg[r].name_x, sp_lag[0], multireg[r].name_q, sp_lag[1])) 281 multireg[r].name_z = multireg[r].name_x + multireg[r].name_yend 282 multireg[r].name_h = multireg[r].name_x + multireg[r].name_q 283 return multireg 284 285 286def check_arrays(*arrays): 287 """Check if the objects passed by a user to a regression class are 288 correctly structured. If the user's data is correctly formed this function 289 returns nothing, if not then an exception is raised. Note, this does not 290 check for model setup, simply the shape and types of the objects. 291 292 Parameters 293 ---------- 294 *arrays : anything 295 Objects passed by the user to a regression class; any type 296 object can be passed and any number of objects can be passed 297 298 Returns 299 ------- 300 Returns : int 301 number of observations 302 303 Examples 304 -------- 305 306 >>> import numpy as np 307 >>> import libpysal 308 >>> from spreg import check_arrays 309 >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') 310 >>> # Extract CRIME column from the dbf file 311 >>> y = np.array(db.by_col("CRIME")) 312 >>> y = np.reshape(y, (49,1)) 313 >>> X = [] 314 >>> X.append(db.by_col("INC")) 315 >>> X.append(db.by_col("HOVAL")) 316 >>> X = np.array(X).T 317 >>> n = check_arrays(y, X) 318 >>> print(n) 319 49 320 321 """ 322 rows = [] 323 for i in arrays: 324 if i is None: 325 continue 326 if not isinstance(i, (np.ndarray, csr_matrix)): 327 raise Exception("all input data must be either numpy arrays or sparse csr matrices") 328 shape = i.shape 329 if len(shape) > 2: 330 raise Exception("all input arrays must have two dimensions") 331 if len(shape) == 1: 332 shape = (shape[0],1) 333 if shape[0] < shape[1]: 334 raise Exception("one or more input arrays have more columns than rows") 335 if not spu.spisfinite(i): 336 raise Exception("one or more input arrays have missing/NaN values") 337 rows.append(shape[0]) 338 if len(set(rows)) > 1: 339 raise Exception("arrays not all of same length") 340 return rows[0] 341 342 343def check_y(y, n): 344 """Check if the y object passed by a user to a regression class is 345 correctly structured. If the user's data is correctly formed this function 346 returns nothing, if not then an exception is raised. Note, this does not 347 check for model setup, simply the shape and types of the objects. 348 349 Parameters 350 ---------- 351 y : anything 352 Object passed by the user to a regression class; any type 353 object can be passed 354 355 n : int 356 number of observations 357 358 Returns 359 ------- 360 y : anything 361 Object passed by the user to a regression class 362 363 Examples 364 -------- 365 366 >>> import numpy as np 367 >>> import libpysal 368 >>> from spreg import check_y 369 >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') 370 371 # Extract CRIME column from the dbf file 372 373 >>> y = np.array(db.by_col("CRIME")) 374 >>> y = np.reshape(y, (49,1)) 375 >>> y = check_y(y, 49) 376 377 # should not raise an exception 378 379 """ 380 if not isinstance(y, np.ndarray): 381 print(y.__class__.__name__) 382 raise Exception("y must be a numpy array") 383 shape = y.shape 384 if len(shape) > 2: 385 raise Exception("all input arrays must have two dimensions") 386 if len(shape) == 1: 387 try: 388 y = y.reshape(n,1) 389 except: 390 raise Exception("y must be a single column array matching the length of other arrays") 391 if y.shape != (n, 1): 392 raise Exception("y must be a single column array matching the length of other arrays") 393 return y 394 395def check_weights(w, y, w_required=False, time=False): 396 """Check if the w parameter passed by the user is a libpysal.W object and 397 check that its dimensionality matches the y parameter. Note that this 398 check is not performed if w set to None. 399 400 Parameters 401 ---------- 402 w : any python object 403 Object passed by the user to a regression class; any type 404 object can be passed 405 y : numpy array 406 Any shape numpy array can be passed. Note: if y passed 407 check_arrays, then it will be valid for this function 408 w_required : boolean 409 True if a W matrix is required, False (default) if not. 410 time : boolean 411 True if data contains a time dimension. 412 False (default) if not. 413 414 Returns 415 ------- 416 Returns : nothing 417 Nothing is returned 418 419 Examples 420 -------- 421 >>> import numpy as np 422 >>> import libpysal 423 >>> from spreg import check_weights 424 >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') 425 >>> # Extract CRIME column from the dbf file 426 >>> y = np.array(db.by_col("CRIME")) 427 >>> y = np.reshape(y, (49,1)) 428 >>> X = [] 429 >>> X.append(db.by_col("INC")) 430 >>> X.append(db.by_col("HOVAL")) 431 >>> X = np.array(X).T 432 >>> w = libpysal.io.open(libpysal.examples.get_path("columbus.gal"), 'r').read() 433 >>> check_weights(w, y) 434 435 # should not raise an exception 436 437 """ 438 if w_required == True or w != None: 439 if w == None: 440 raise Exception("A weights matrix w must be provided to run this method.") 441 if not isinstance(w, weights.W): 442 from warnings import warn 443 warn("w must be API-compatible pysal weights object") 444 if w.n != y.shape[0] and time == False: 445 raise Exception("y must have n rows, and w must be an nxn PySAL W object") 446 diag = w.sparse.diagonal() 447 # check to make sure all entries equal 0 448 if diag.min() != 0: 449 raise Exception("All entries on diagonal must equal 0.") 450 if diag.max() != 0: 451 raise Exception("All entries on diagonal must equal 0.") 452 453 454def check_robust(robust, wk): 455 """Check if the combination of robust and wk parameters passed by the user 456 are valid. Note: this does not check if the W object is a valid adaptive 457 kernel weights matrix needed for the HAC. 458 459 Parameters 460 ---------- 461 robust : string or None 462 Object passed by the user to a regression class 463 w : any python object 464 Object passed by the user to a regression class; any type 465 object can be passed 466 467 Returns 468 ------- 469 Returns : nothing 470 Nothing is returned 471 472 Examples 473 -------- 474 >>> import numpy as np 475 >>> import libpysal 476 >>> from spreg import check_robust 477 >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') 478 >>> # Extract CRIME column from the dbf file 479 >>> y = np.array(db.by_col("CRIME")) 480 >>> y = np.reshape(y, (49,1)) 481 >>> X = [] 482 >>> X.append(db.by_col("INC")) 483 >>> X.append(db.by_col("HOVAL")) 484 >>> X = np.array(X).T 485 >>> wk = None 486 >>> check_robust('White', wk) 487 488 # should not raise an exception 489 490 """ 491 if robust: 492 if robust.lower() == 'hac': 493 if not isinstance(wk, weights.Kernel): 494 raise Exception("HAC requires that wk be a Kernel Weights object") 495 diag = wk.sparse.diagonal() 496 # check to make sure all entries equal 1 497 if diag.min() < 1.0: 498 print(diag.min()) 499 raise Exception("All entries on diagonal of kernel weights matrix must equal 1.") 500 if diag.max() > 1.0: 501 print(diag.max()) 502 raise Exception("All entries on diagonal of kernel weights matrix must equal 1.") 503 # ensure off-diagonal entries are in the set of real numbers [0,1) 504 wegt = wk.weights 505 for i in wk.id_order: 506 vals = wegt[i] 507 vmin = min(vals) 508 vmax = max(vals) 509 if vmin < 0.0: 510 raise Exception("Off-diagonal entries must be greater than or equal to 0.") 511 if vmax > 1.0: 512 # NOTE: we are not checking for the case of exactly 1.0 ### 513 raise Exception("Off-diagonal entries must be less than 1.") 514 elif robust.lower() == 'white' or robust.lower() == 'ogmm': 515 if wk: 516 raise Exception("White requires that wk be set to None") 517 else: 518 raise Exception("invalid value passed to robust, see docs for valid options") 519 520 521def check_spat_diag(spat_diag, w): 522 """Check if there is a w parameter passed by the user if the user also 523 requests spatial diagnostics. 524 525 Parameters 526 ---------- 527 spat_diag : boolean 528 Value passed by a used to a regression class 529 w : any python object 530 Object passed by the user to a regression class; any type 531 object can be passed 532 533 Returns 534 ------- 535 Returns : nothing 536 Nothing is returned 537 538 Examples 539 -------- 540 >>> import numpy as np 541 >>> import libpysal 542 >>> from spreg import check_spat_diag 543 >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') 544 >>> # Extract CRIME column from the dbf file 545 >>> y = np.array(db.by_col("CRIME")) 546 >>> y = np.reshape(y, (49,1)) 547 >>> X = [] 548 >>> X.append(db.by_col("INC")) 549 >>> X.append(db.by_col("HOVAL")) 550 >>> X = np.array(X).T 551 >>> w = libpysal.io.open(libpysal.examples.get_path("columbus.gal"), 'r').read() 552 >>> check_spat_diag(True, w) 553 554 # should not raise an exception 555 556 """ 557 if spat_diag: 558 if not isinstance(w, weights.W): 559 raise Exception("w must be a libpysal.W object to run spatial diagnostics") 560 561 562def check_regimes(reg_set, N=None, K=None): 563 """Check if there are at least two regimes 564 565 Parameters 566 ---------- 567 reg_set : list 568 List of the regimes IDs 569 570 Returns 571 ------- 572 Returns : nothing 573 Nothing is returned 574 575 """ 576 if len(reg_set) < 2: 577 raise Exception("At least 2 regimes are needed to run regimes methods. Please check your regimes variable.") 578 if 1.0 * N / len(reg_set) < K + 1: 579 raise Exception("There aren't enough observations for the given number of regimes and variables. Please check your regimes variable.") 580 581 582def check_constant(x,name_x=None,just_rem=False): 583 """Check if the X matrix contains a constant. If it does, drop the constant and replace by a vector of ones. 584 585 Parameters 586 ---------- 587 x : array 588 Value passed by a used to a regression class 589 name_x : list of strings 590 Names of independent variables 591 just_rem : boolean 592 If False (default), remove all constants and add a vector of ones 593 If True, just remove all constants 594 Returns 595 ------- 596 x_constant : array 597 Matrix with independent variables plus constant 598 name_x : list of strings 599 Names of independent variables (updated if any variable droped) 600 Examples 601 -------- 602 >>> import numpy as np 603 >>> import libpysal 604 >>> from spreg import check_constant 605 >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') 606 >>> X = [] 607 >>> X.append(db.by_col("INC")) 608 >>> X.append(db.by_col("HOVAL")) 609 >>> X = np.array(X).T 610 >>> x_constant,name_x,warn = check_constant(X) 611 >>> x_constant.shape 612 (49, 3) 613 614 """ 615 x_constant = COPY.copy(x) 616 keep_x = COPY.copy(name_x) 617 warn = None 618 if isinstance(x_constant, np.ndarray): 619 diffs = np.ptp(x_constant,axis=0) 620 if sum(diffs==0) > 0: 621 x_constant = np.delete(x_constant,np.nonzero(diffs==0),1) 622 else: 623 diffs = (x_constant.max(axis=0).toarray()-x_constant.min(axis=0).toarray())[0] 624 if sum(diffs==0) > 0: 625 x_constant = x_constant[:,np.nonzero(diffs>0)[0]] 626 627 if sum(diffs==0) > 0: 628 if keep_x: 629 rem_x = [keep_x[i] for i in np.nonzero(diffs==0)[0]] 630 warn = 'Variable(s) '+str(rem_x)+' removed for being constant.' 631 keep_x[:] = [keep_x[i] for i in np.nonzero(diffs>0)[0]] 632 else: 633 if sum(diffs==0) == 1: 634 warn = 'One variable has been removed for being constant.' 635 else: 636 warn = str(sum(diffs==0))+' variables have been removed for being constant.' 637 if not just_rem: 638 return spu.sphstack(np.ones((x_constant.shape[0], 1)), x_constant),keep_x,warn 639 else: 640 return x_constant,keep_x,warn 641 642def _test(): 643 import doctest 644 doctest.testmod() 645 646if __name__ == '__main__': 647 _test() 648