1 SUBROUTINE SWILK (INIT, X, N, N1, N2, A, W, PW, IFAULT) 2C 3C ALGORITHM AS R94 APPL. STATIST. (1995) VOL.44, NO.4 4C 5C Calculates the Shapiro-Wilk W test and its significance level 6C 7C IFAULT error code details from the R94 paper: 8C - 0 for no fault 9C - 1 if N1 < 3 10C - 2 if N > 5000 (a non-fatal error) 11C - 3 if N2 < N/2, so insufficient storage for A 12C - 4 if N1 > N or (N1 < N and N < 20) 13C - 5 if the proportion censored (N-N1)/N > 0.8 14C - 6 if the data have zero range (if sorted on input) 15C 16 INTEGER N, N1, N2, IFAULT 17 REAL X(*), A(*), PW, W 18 REAL C1(6), C2(6), C3(4), C4(4), C5(4), C6(3), C7(2) 19 REAL C8(2), C9(2), G(2) 20 REAL Z90, Z95, Z99, ZM, ZSS, BF1, XX90, XX95, ZERO, ONE, TWO 21 REAL THREE, SQRTH, QTR, TH, SMALL, PI6, STQR 22 REAL SUMM2, SSUMM2, FAC, RSN, AN, AN25, A1, A2, DELTA, RANGE 23 REAL SA, SX, SSX, SSA, SAX, ASA, XSX, SSASSX, W1, Y, XX, XI 24 REAL GAMMA, M, S, LD, BF, Z90F, Z95F, Z99F, ZFM, ZSD, ZBAR 25C 26C Auxiliary routines 27C 28 REAL PPND, POLY 29 DOUBLE PRECISION ALNORM 30C 31 INTEGER NCENS, NN2, I, I1, J 32 LOGICAL INIT, UPPER 33C 34 DATA C1 /0.0E0, 0.221157E0, -0.147981E0, -0.207119E1, 35 * 0.4434685E1, -0.2706056E1/ 36 DATA C2 /0.0E0, 0.42981E-1, -0.293762E0, -0.1752461E1, 37 * 0.5682633E1, -0.3582633E1/ 38 DATA C3 /0.5440E0, -0.39978E0, 0.25054E-1, -0.6714E-3/ 39 DATA C4 /0.13822E1, -0.77857E0, 0.62767E-1, -0.20322E-2/ 40 DATA C5 /-0.15861E1, -0.31082E0, -0.83751E-1, 0.38915E-2/ 41 DATA C6 /-0.4803E0, -0.82676E-1, 0.30302E-2/ 42 DATA C7 /0.164E0, 0.533E0/ 43 DATA C8 /0.1736E0, 0.315E0/ 44 DATA C9 /0.256E0, -0.635E-2/ 45 DATA G /-0.2273E1, 0.459E0/ 46 DATA Z90, Z95, Z99 /0.12816E1, 0.16449E1, 0.23263E1/ 47 DATA ZM, ZSS /0.17509E1, 0.56268E0/ 48 DATA BF1 /0.8378E0/, XX90, XX95 /0.556E0, 0.622E0/ 49 DATA ZERO /0.0E0/, ONE/1.0E0/, TWO/2.0E0/, THREE/3.0E0/ 50 DATA SQRTH /0.70711E0/, QTR/0.25E0/, TH/0.375E0/, SMALL/1E-19/ 51 DATA PI6 /0.1909859E1/, STQR/0.1047198E1/, UPPER/.TRUE./ 52C 53 PW = ONE 54 IF (W .GE. ZERO) W = ONE 55 AN = N 56 IFAULT = 3 57 NN2 = N/2 58 IF (N2 .LT. NN2) RETURN 59 IFAULT = 1 60 IF (N .LT. 3) RETURN 61C 62C If INIT is false, calculates coefficients for the test 63C 64 IF (.NOT. INIT) THEN 65 IF (N .EQ. 3) THEN 66 A(1) = SQRTH 67 ELSE 68 AN25 = AN + QTR 69 SUMM2 = ZERO 70 DO 30 I = 1, N2 71 A(I) = PPND((REAL(I) - TH)/AN25,IFAULT) 72 SUMM2 = SUMM2 + A(I) ** 2 7330 CONTINUE 74 SUMM2 = SUMM2 * TWO 75 SSUMM2 = SQRT(SUMM2) 76 RSN = ONE / SQRT(AN) 77 A1 = POLY(C1, 6, RSN) - A(1) / SSUMM2 78C 79C Normalize coefficients 80C 81 IF (N .GT. 5) THEN 82 I1 = 3 83 A2 = -A(2)/SSUMM2 + POLY(C2,6,RSN) 84 FAC = SQRT((SUMM2 - TWO * A(1) ** 2 - TWO * 85 * A(2) ** 2)/(ONE - TWO * A1 ** 2 - TWO * A2 ** 2)) 86 A(1) = A1 87 A(2) = A2 88 ELSE 89 I1 = 2 90 FAC = SQRT((SUMM2 - TWO * A(1) ** 2)/ 91 * (ONE - TWO * A1 ** 2)) 92 A(1) = A1 93 END IF 94 DO 40 I = I1, NN2 95 A(I) = -A(I)/FAC 96 40 CONTINUE 97 END IF 98 INIT = .TRUE. 99 END IF 100 IF (N1 .LT. 3) RETURN 101 NCENS = N - N1 102 IFAULT = 4 103 IF (NCENS .LT. 0 .OR. (NCENS .GT. 0 .AND. N .LT. 20)) RETURN 104 IFAULT = 5 105 DELTA = FLOAT(NCENS)/AN 106 IF (DELTA .GT. 0.8) RETURN 107C 108C If W input as negative, calculate significance level of -W 109C 110 IF (W .LT. ZERO) THEN 111 W1 = ONE + W 112 IFAULT = 0 113 GOTO 70 114 END IF 115C 116C Check for zero range 117C 118 IFAULT = 6 119 RANGE = X(N1) - X(1) 120 IF (RANGE .LT. SMALL) RETURN 121C 122C Check for correct sort order on range - scaled X 123C 124 IFAULT = 7 125 XX = X(1)/RANGE 126 SX = XX 127 SA = -A(1) 128 J = N - 1 129 DO 50 I = 2, N1 130 XI = X(I)/RANGE 131CCCCC IF (XX-XI .GT. SMALL) PRINT *,' ANYTHING' 132 SX = SX + XI 133 IF (I .NE. J) SA = SA + SIGN(1, I - J) * A(MIN(I, J)) 134 XX = XI 135 J = J - 1 13650 CONTINUE 137 IFAULT = 0 138 IF (N .GT. 5000) IFAULT = 2 139C 140C Calculate W statistic as squared correlation 141C between data and coefficients 142C 143 SA = SA/N1 144 SX = SX/N1 145 SSA = ZERO 146 SSX = ZERO 147 SAX = ZERO 148 J = N 149 DO 60 I = 1, N1 150 IF (I .NE. J) THEN 151 ASA = SIGN(1, I - J) * A(MIN(I, J)) - SA 152 ELSE 153 ASA = -SA 154 END IF 155 XSX = X(I)/RANGE - SX 156 SSA = SSA + ASA * ASA 157 SSX = SSX + XSX * XSX 158 SAX = SAX + ASA * XSX 159 J = J - 1 160 60 CONTINUE 161C 162C W1 equals (1-W) calculated to avoid excessive rounding error 163C for W very near 1 (a potential problem in very large samples) 164C 165 SSASSX = SQRT(SSA * SSX) 166 W1 = (SSASSX - SAX) * (SSASSX + SAX)/(SSA * SSX) 167 70 W = ONE - W1 168C 169C Calculate significance level for W (exact for N=3) 170C 171 IF (N .EQ. 3) THEN 172 PW = PI6 * (ASIN(SQRT(W)) - STQR) 173 RETURN 174 END IF 175 Y = LOG(W1) 176 XX = LOG(AN) 177 M = ZERO 178 S = ONE 179 IF (N .LE. 11) THEN 180 GAMMA = POLY(G, 2, AN) 181 IF (Y .GE. GAMMA) THEN 182 PW = SMALL 183 RETURN 184 END IF 185 Y = -LOG(GAMMA - Y) 186 M = POLY(C3, 4, AN) 187 S = EXP(POLY(C4, 4, AN)) 188 ELSE 189 M = POLY(C5, 4, XX) 190 S = EXP(POLY(C6, 3, XX)) 191 END IF 192 IF (NCENS .GT. 0) THEN 193C 194C Censoring by proportion NCENS/N. Calculate mean and sd 195C of normal equivalent deviate of W. 196C 197 LD = -LOG(DELTA) 198 BF = ONE + XX * BF1 199 Z90F = Z90 + BF * POLY(C7, 2, XX90 ** XX) ** LD 200 Z95F = Z95 + BF * POLY(C8, 2, XX95 ** XX) ** LD 201 Z99F = Z99 + BF * POLY(C9, 2, XX) ** LD 202C 203C Regress Z90F,...,Z99F on normal deviates Z90,...,Z99 to get 204C pseudo-mean and pseudo-sd of z as the slope and intercept 205C 206 ZFM = (Z90F + Z95F + Z99F)/THREE 207 ZSD = (Z90*(Z90F-ZFM)+Z95*(Z95F-ZFM)+Z99*(Z99F-ZFM))/ZSS 208 ZBAR = ZFM - ZSD * ZM 209 M = M + ZBAR * S 210 S = S * ZSD 211 END IF 212 PW = REAL(ALNORM(DBLE((Y - M)/S), UPPER)) 213C 214 RETURN 215 END 216 217 DOUBLE PRECISION FUNCTION ALNORM(X, UPPER) 218C 219C EVALUATES THE TAIL AREA OF THE STANDARDIZED NORMAL CURVE FROM 220C X TO INFINITY IF UPPER IS .TRUE. OR FROM MINUS INFINITY TO X 221C IF UPPER IS .FALSE. 222C 223C NOTE NOVEMBER 2001: MODIFY UTZERO. ALTHOUGH NOT NECESSARY 224C WHEN USING ALNORM FOR SIMPLY COMPUTING PERCENT POINTS, 225C EXTENDING RANGE IS HELPFUL FOR USE WITH FUNCTIONS THAT 226C USE ALNORM IN INTERMEDIATE COMPUTATIONS. 227C 228 DOUBLE PRECISION LTONE,UTZERO,ZERO,HALF,ONE,CON, 229 $ A1,A2,A3,A4,A5,A6,A7,B1,B2, 230 $ B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,X,Y,Z,ZEXP 231 LOGICAL UPPER,UP 232C 233C LTONE AND UTZERO MUST BE SET TO SUIT THE PARTICULAR COMPUTER 234C 235CCCCC DATA LTONE, UTZERO /7.0D0, 18.66D0/ 236 DATA LTONE, UTZERO /7.0D0, 38.00D0/ 237 DATA ZERO,HALF,ONE,CON /0.0D0,0.5D0,1.0D0,1.28D0/ 238 DATA A1, A2, A3, 239 $ A4, A5, A6, 240 $ A7 241 $ /0.398942280444D0, 0.399903438504D0, 5.75885480458D0, 242 $ 29.8213557808D0, 2.62433121679D0, 48.6959930692D0, 243 $ 5.92885724438D0/ 244 DATA B1, B2, B3, 245 $ B4, B5, B6, 246 $ B7, B8, B9, 247 $ B10, B11, B12 248 $ /0.398942280385D0, 3.8052D-8, 1.00000615302D0, 249 $ 3.98064794D-4, 1.98615381364D0, 0.151679116635D0, 250 $ 5.29330324926D0, 4.8385912808D0, 15.1508972451D0, 251 $ 0.742380924027D0, 30.789933034D0, 3.99019417011D0/ 252C 253 ZEXP(Z) = DEXP(Z) 254C 255 UP = UPPER 256 Z = X 257 IF (Z .GE. ZERO) GOTO 10 258 UP = .NOT. UP 259 Z = -Z 260 10 IF (Z .LE. LTONE .OR. UP .AND. Z .LE. UTZERO) GOTO 20 261 ALNORM = ZERO 262 GOTO 40 263 20 Y = HALF * Z * Z 264 IF (Z .GT. CON) GOTO 30 265C 266 ALNORM = HALF - Z * (A1- A2 * Y / (Y + A3- A4 / (Y + A5 + A6 / 267 $ (Y + A7)))) 268 GOTO 40 269C 270 30 ALNORM = B1* ZEXP(-Y)/(Z - B2 + B3/ (Z +B4 +B5/(Z -B6 +B7/ 271 $ (Z +B8 -B9/ (Z +B10 +B11/ (Z + B12)))))) 272C 273 40 IF (.NOT. UP) ALNORM = ONE - ALNORM 274 RETURN 275 END 276 277 REAL FUNCTION PPND(P, IFAULT) 278C 279C ALGORITHM AS 111 APPL. STATIST. (1977), VOL.26, NO.1 280C 281C PRODUCES NORMAL DEVIATE CORRESPONDING TO LOWER TAIL AREA OF P 282C REAL VERSION FOR EPS = 2 **(-31) 283C THE HASH SUMS ARE THE SUMS OF THE MODULI OF THE COEFFICIENTS 284C THEY HAVE NO INHERENT MEANINGS BUT ARE INCLUDED FOR USE IN 285C CHECKING TRANSCRIPTIONS 286C STANDARD FUNCTIONS ABS, ALOG AND SQRT ARE USED 287C 288C NOTE: WE COULD USE DATAPLOT NORPPF, BUT VARIOUS APPLIED 289C STATISTICS ALGORITHMS USE THIS. SO WE PROVIDE IT TO 290C MAKE USE OF APPLIED STATISTICS ALGORITHMS EASIER. 291C 292 REAL ZERO, SPLIT, HALF, ONE 293 REAL A0, A1, A2, A3, B1, B2, B3, B4, C0, C1, C2, C3, D1, D2 294 REAL P, Q, R 295 INTEGER IFAULT 296 DATA ZERO /0.0E0/, HALF/0.5E0/, ONE/1.0E0/ 297 DATA SPLIT /0.42E0/ 298 DATA A0 / 2.50662823884E0/ 299 DATA A1 / -18.61500062529E0/ 300 DATA A2 / 41.39119773534E0/ 301 DATA A3 / -25.44106049637E0/ 302 DATA B1 / -8.47351093090E0/ 303 DATA B2 / 23.08336743743E0/ 304 DATA B3 / -21.06224101826E0/ 305 DATA B4 / 3.13082909833E0/ 306 DATA C0 / -2.78718931138E0/ 307 DATA C1 / -2.29796479134E0/ 308 DATA C2 / 4.85014127135E0/ 309 DATA C3 / 2.32121276858E0/ 310 DATA D1 / 3.54388924762E0/ 311 DATA D2 / 1.63706781897E0/ 312C 313 IFAULT = 0 314 Q = P - HALF 315 IF (ABS(Q) .GT. SPLIT) GOTO 1 316 R = Q*Q 317 PPND = Q * (((A3*R + A2)*R + A1) * R + A0) / 318 * ((((B4*R + B3)*R + B2) * R + B1) * R + ONE) 319 RETURN 3201 R = P 321 IF (Q .GT. ZERO)R = ONE - P 322 IF (R .LE. ZERO) GOTO 2 323 R = SQRT(-ALOG(R)) 324 PPND = (((C3 * R + C2) * R + C1) * R + C0)/ 325 * ((D2*R + D1) * R + ONE) 326 IF (Q .LT. ZERO) PPND = -PPND 327 RETURN 3282 IFAULT = 1 329 PPND = ZERO 330 RETURN 331 END 332 333 REAL FUNCTION POLY(C, NORD, X) 334C 335C 336C ALGORITHM AS 181.2 APPL. STATIST. (1982) VOL. 31, NO. 2 337C 338C CALCULATES THE ALGEBRAIC POLYNOMIAL OF ORDER NORED-1 WITH 339C ARRAY OF COEFFICIENTS C. ZERO ORDER COEFFICIENT IS C(1) 340C 341 REAL C(NORD) 342 POLY = C(1) 343 IF(NORD.EQ.1) RETURN 344 P = X*C(NORD) 345 IF(NORD.EQ.2) GOTO 20 346 N2 = NORD-2 347 J = N2+1 348 DO 10 I = 1,N2 349 P = (P+C(J))*X 350 J = J-1 351 10 CONTINUE 352 20 POLY = POLY+P 353 RETURN 354 END 355