1      SUBROUTINE SWILK (INIT, X, N, N1, N2, A, W, PW, IFAULT)
2C
3C        ALGORITHM AS R94 APPL. STATIST. (1995) VOL.44, NO.4
4C
5C        Calculates the Shapiro-Wilk W test and its significance level
6C
7C        IFAULT error code details from the R94 paper:
8C        - 0 for no fault
9C        - 1 if N1 < 3
10C        - 2 if N > 5000 (a non-fatal error)
11C        - 3 if N2 < N/2, so insufficient storage for A
12C        - 4 if N1 > N or (N1 < N and N < 20)
13C        - 5 if the proportion censored (N-N1)/N > 0.8
14C        - 6 if the data have zero range (if sorted on input)
15C
16      INTEGER N, N1, N2, IFAULT
17      REAL X(*), A(*), PW, W
18      REAL C1(6), C2(6), C3(4), C4(4), C5(4), C6(3), C7(2)
19      REAL C8(2), C9(2), G(2)
20      REAL Z90, Z95, Z99, ZM, ZSS, BF1, XX90, XX95, ZERO, ONE, TWO
21      REAL THREE, SQRTH, QTR, TH, SMALL, PI6, STQR
22      REAL SUMM2, SSUMM2, FAC, RSN, AN, AN25, A1, A2, DELTA, RANGE
23      REAL SA, SX, SSX, SSA, SAX, ASA, XSX, SSASSX, W1, Y, XX, XI
24      REAL GAMMA, M, S, LD, BF, Z90F, Z95F, Z99F, ZFM, ZSD, ZBAR
25C
26C        Auxiliary routines
27C
28      REAL PPND, POLY
29      DOUBLE PRECISION ALNORM
30C
31      INTEGER NCENS, NN2, I, I1, J
32      LOGICAL INIT, UPPER
33C
34      DATA C1 /0.0E0, 0.221157E0, -0.147981E0, -0.207119E1,
35     *     0.4434685E1, -0.2706056E1/
36      DATA C2 /0.0E0, 0.42981E-1, -0.293762E0, -0.1752461E1,
37     *     0.5682633E1, -0.3582633E1/
38      DATA C3 /0.5440E0, -0.39978E0, 0.25054E-1, -0.6714E-3/
39      DATA C4 /0.13822E1, -0.77857E0, 0.62767E-1, -0.20322E-2/
40      DATA C5 /-0.15861E1, -0.31082E0, -0.83751E-1, 0.38915E-2/
41      DATA C6 /-0.4803E0, -0.82676E-1, 0.30302E-2/
42      DATA C7 /0.164E0, 0.533E0/
43      DATA C8 /0.1736E0, 0.315E0/
44      DATA C9 /0.256E0, -0.635E-2/
45      DATA G  /-0.2273E1, 0.459E0/
46      DATA Z90, Z95, Z99 /0.12816E1, 0.16449E1, 0.23263E1/
47      DATA ZM, ZSS /0.17509E1, 0.56268E0/
48      DATA BF1 /0.8378E0/, XX90, XX95 /0.556E0, 0.622E0/
49      DATA ZERO /0.0E0/, ONE/1.0E0/, TWO/2.0E0/, THREE/3.0E0/
50      DATA SQRTH /0.70711E0/, QTR/0.25E0/, TH/0.375E0/, SMALL/1E-19/
51      DATA PI6 /0.1909859E1/, STQR/0.1047198E1/, UPPER/.TRUE./
52C
53      PW  =  ONE
54      IF (W .GE. ZERO) W = ONE
55      AN = N
56      IFAULT = 3
57      NN2 = N/2
58      IF (N2 .LT. NN2) RETURN
59      IFAULT = 1
60      IF (N .LT. 3) RETURN
61C
62C        If INIT is false, calculates coefficients for the test
63C
64      IF (.NOT. INIT) THEN
65        IF (N .EQ. 3) THEN
66           A(1) = SQRTH
67        ELSE
68           AN25 = AN + QTR
69           SUMM2 = ZERO
70           DO 30 I = 1, N2
71              A(I) = PPND((REAL(I) - TH)/AN25,IFAULT)
72              SUMM2 = SUMM2 + A(I) ** 2
7330          CONTINUE
74           SUMM2 = SUMM2 * TWO
75           SSUMM2 = SQRT(SUMM2)
76           RSN = ONE / SQRT(AN)
77           A1 = POLY(C1, 6, RSN) - A(1) / SSUMM2
78C
79C        Normalize coefficients
80C
81           IF (N .GT. 5) THEN
82              I1 = 3
83              A2 = -A(2)/SSUMM2 + POLY(C2,6,RSN)
84              FAC = SQRT((SUMM2 - TWO * A(1) ** 2 - TWO *
85     *               A(2) ** 2)/(ONE - TWO * A1 ** 2 - TWO * A2 ** 2))
86              A(1) = A1
87              A(2) = A2
88           ELSE
89              I1 = 2
90              FAC = SQRT((SUMM2 - TWO * A(1) ** 2)/
91     *                   (ONE - TWO * A1 ** 2))
92              A(1) = A1
93           END IF
94           DO 40 I = I1, NN2
95              A(I) = -A(I)/FAC
96   40       CONTINUE
97        END IF
98        INIT = .TRUE.
99      END IF
100      IF (N1 .LT. 3) RETURN
101      NCENS = N - N1
102      IFAULT = 4
103      IF (NCENS .LT. 0 .OR. (NCENS .GT. 0 .AND. N .LT. 20)) RETURN
104      IFAULT = 5
105      DELTA = FLOAT(NCENS)/AN
106      IF (DELTA .GT. 0.8) RETURN
107C
108C        If W input as negative, calculate significance level of -W
109C
110      IF (W .LT. ZERO) THEN
111        W1 = ONE + W
112        IFAULT = 0
113        GOTO 70
114      END IF
115C
116C        Check for zero range
117C
118      IFAULT = 6
119      RANGE = X(N1) - X(1)
120      IF (RANGE .LT. SMALL) RETURN
121C
122C        Check for correct sort order on range - scaled X
123C
124      IFAULT = 7
125      XX = X(1)/RANGE
126      SX = XX
127      SA = -A(1)
128      J = N - 1
129      DO 50 I = 2, N1
130        XI = X(I)/RANGE
131CCCCC   IF (XX-XI .GT. SMALL) PRINT *,' ANYTHING'
132        SX = SX + XI
133        IF (I .NE. J) SA = SA + SIGN(1, I - J) * A(MIN(I, J))
134        XX = XI
135        J = J - 1
13650    CONTINUE
137      IFAULT = 0
138      IF (N .GT. 5000) IFAULT = 2
139C
140C        Calculate W statistic as squared correlation
141C        between data and coefficients
142C
143      SA = SA/N1
144      SX = SX/N1
145      SSA = ZERO
146      SSX = ZERO
147      SAX = ZERO
148      J = N
149      DO 60 I = 1, N1
150        IF (I .NE. J) THEN
151           ASA = SIGN(1, I - J) * A(MIN(I, J)) - SA
152        ELSE
153           ASA = -SA
154        END IF
155        XSX = X(I)/RANGE - SX
156        SSA = SSA + ASA * ASA
157        SSX = SSX + XSX * XSX
158        SAX = SAX + ASA * XSX
159        J = J - 1
160   60 CONTINUE
161C
162C        W1 equals (1-W) calculated to avoid excessive rounding error
163C        for W very near 1 (a potential problem in very large samples)
164C
165      SSASSX = SQRT(SSA * SSX)
166      W1 = (SSASSX - SAX) * (SSASSX + SAX)/(SSA * SSX)
167   70 W = ONE - W1
168C
169C        Calculate significance level for W (exact for N=3)
170C
171      IF (N .EQ. 3) THEN
172         PW = PI6 * (ASIN(SQRT(W)) - STQR)
173         RETURN
174      END IF
175      Y = LOG(W1)
176      XX = LOG(AN)
177      M = ZERO
178      S = ONE
179      IF (N .LE. 11) THEN
180        GAMMA = POLY(G, 2, AN)
181        IF (Y .GE. GAMMA) THEN
182           PW = SMALL
183           RETURN
184        END IF
185        Y = -LOG(GAMMA - Y)
186        M = POLY(C3, 4, AN)
187        S = EXP(POLY(C4, 4, AN))
188      ELSE
189        M = POLY(C5, 4, XX)
190        S = EXP(POLY(C6, 3, XX))
191      END IF
192      IF (NCENS .GT. 0) THEN
193C
194C        Censoring by proportion NCENS/N.  Calculate mean and sd
195C        of normal equivalent deviate of W.
196C
197        LD = -LOG(DELTA)
198        BF = ONE + XX * BF1
199        Z90F = Z90 + BF * POLY(C7, 2, XX90 ** XX) ** LD
200        Z95F = Z95 + BF * POLY(C8, 2, XX95 ** XX) ** LD
201        Z99F = Z99 + BF * POLY(C9, 2, XX) ** LD
202C
203C        Regress Z90F,...,Z99F on normal deviates Z90,...,Z99 to get
204C        pseudo-mean and pseudo-sd of z as the slope and intercept
205C
206        ZFM = (Z90F + Z95F + Z99F)/THREE
207        ZSD = (Z90*(Z90F-ZFM)+Z95*(Z95F-ZFM)+Z99*(Z99F-ZFM))/ZSS
208        ZBAR = ZFM - ZSD * ZM
209        M = M + ZBAR * S
210        S = S * ZSD
211      END IF
212      PW = REAL(ALNORM(DBLE((Y - M)/S), UPPER))
213C
214      RETURN
215      END
216
217      DOUBLE PRECISION FUNCTION ALNORM(X, UPPER)
218C
219C       EVALUATES THE TAIL AREA OF THE STANDARDIZED NORMAL CURVE FROM
220C       X TO INFINITY IF UPPER IS .TRUE. OR FROM MINUS INFINITY TO X
221C       IF UPPER IS .FALSE.
222C
223C  NOTE NOVEMBER 2001: MODIFY UTZERO.  ALTHOUGH NOT NECESSARY
224C  WHEN USING ALNORM FOR SIMPLY COMPUTING PERCENT POINTS,
225C  EXTENDING RANGE IS HELPFUL FOR USE WITH FUNCTIONS THAT
226C  USE ALNORM IN INTERMEDIATE COMPUTATIONS.
227C
228      DOUBLE PRECISION LTONE,UTZERO,ZERO,HALF,ONE,CON,
229     $ A1,A2,A3,A4,A5,A6,A7,B1,B2,
230     $ B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,X,Y,Z,ZEXP
231      LOGICAL UPPER,UP
232C
233C       LTONE AND UTZERO MUST BE SET TO SUIT THE PARTICULAR COMPUTER
234C
235CCCCC DATA LTONE, UTZERO /7.0D0, 18.66D0/
236      DATA LTONE, UTZERO /7.0D0, 38.00D0/
237      DATA ZERO,HALF,ONE,CON /0.0D0,0.5D0,1.0D0,1.28D0/
238      DATA          A1,             A2,            A3,
239     $              A4,             A5,            A6,
240     $              A7
241     $ /0.398942280444D0, 0.399903438504D0, 5.75885480458D0,
242     $   29.8213557808D0,  2.62433121679D0, 48.6959930692D0,
243     $   5.92885724438D0/
244      DATA          B1,             B2,             B3,
245     $              B4,             B5,             B6,
246     $              B7,             B8,             B9,
247     $             B10,            B11,            B12
248     $ /0.398942280385D0,      3.8052D-8,    1.00000615302D0,
249     $   3.98064794D-4,     1.98615381364D0, 0.151679116635D0,
250     $   5.29330324926D0,   4.8385912808D0,  15.1508972451D0,
251     $  0.742380924027D0,   30.789933034D0,  3.99019417011D0/
252C
253      ZEXP(Z) = DEXP(Z)
254C
255      UP = UPPER
256      Z = X
257      IF (Z .GE. ZERO) GOTO 10
258      UP = .NOT. UP
259      Z = -Z
260  10  IF (Z .LE. LTONE .OR. UP .AND. Z .LE. UTZERO) GOTO 20
261      ALNORM = ZERO
262      GOTO 40
263  20  Y = HALF * Z * Z
264      IF (Z .GT. CON) GOTO 30
265C
266      ALNORM = HALF - Z * (A1- A2 * Y / (Y + A3- A4 / (Y + A5 + A6 /
267     $ (Y + A7))))
268      GOTO 40
269C
270  30  ALNORM = B1* ZEXP(-Y)/(Z - B2 + B3/ (Z +B4 +B5/(Z -B6 +B7/
271     $ (Z +B8 -B9/ (Z +B10 +B11/ (Z + B12))))))
272C
273  40  IF (.NOT. UP) ALNORM = ONE - ALNORM
274      RETURN
275      END
276
277      REAL FUNCTION PPND(P, IFAULT)
278C
279C  ALGORITHM AS 111  APPL. STATIST. (1977), VOL.26, NO.1
280C
281C  PRODUCES NORMAL DEVIATE CORRESPONDING TO LOWER TAIL AREA OF P
282C  REAL VERSION FOR EPS = 2 **(-31)
283C  THE HASH SUMS ARE THE SUMS OF THE MODULI OF THE COEFFICIENTS
284C  THEY HAVE NO INHERENT MEANINGS BUT ARE INCLUDED FOR USE IN
285C  CHECKING TRANSCRIPTIONS
286C  STANDARD FUNCTIONS ABS, ALOG AND SQRT ARE USED
287C
288C  NOTE: WE COULD USE DATAPLOT NORPPF, BUT VARIOUS APPLIED
289C        STATISTICS ALGORITHMS USE THIS.  SO WE PROVIDE IT TO
290C        MAKE USE OF APPLIED STATISTICS ALGORITHMS EASIER.
291C
292      REAL ZERO, SPLIT, HALF, ONE
293      REAL A0, A1, A2, A3, B1, B2, B3, B4, C0, C1, C2, C3, D1, D2
294      REAL P, Q, R
295      INTEGER IFAULT
296      DATA ZERO /0.0E0/, HALF/0.5E0/, ONE/1.0E0/
297      DATA SPLIT /0.42E0/
298      DATA A0 / 2.50662823884E0/
299      DATA A1 / -18.61500062529E0/
300      DATA A2 / 41.39119773534E0/
301      DATA A3 / -25.44106049637E0/
302      DATA B1 / -8.47351093090E0/
303      DATA B2 / 23.08336743743E0/
304      DATA B3 / -21.06224101826E0/
305      DATA B4 / 3.13082909833E0/
306      DATA C0 / -2.78718931138E0/
307      DATA C1 / -2.29796479134E0/
308      DATA C2 / 4.85014127135E0/
309      DATA C3 / 2.32121276858E0/
310      DATA D1 / 3.54388924762E0/
311      DATA D2 / 1.63706781897E0/
312C
313      IFAULT = 0
314      Q = P - HALF
315      IF (ABS(Q) .GT. SPLIT) GOTO 1
316      R = Q*Q
317      PPND = Q * (((A3*R + A2)*R + A1) * R + A0) /
318     *  ((((B4*R + B3)*R + B2) * R + B1) * R + ONE)
319      RETURN
3201     R = P
321      IF (Q .GT. ZERO)R = ONE - P
322      IF (R .LE. ZERO) GOTO 2
323      R = SQRT(-ALOG(R))
324      PPND = (((C3 * R + C2) * R + C1) * R + C0)/
325     *  ((D2*R + D1) * R + ONE)
326      IF (Q .LT. ZERO) PPND = -PPND
327      RETURN
3282     IFAULT = 1
329      PPND = ZERO
330      RETURN
331      END
332
333      REAL FUNCTION POLY(C, NORD, X)
334C
335C
336C        ALGORITHM AS 181.2   APPL. STATIST.  (1982) VOL. 31, NO. 2
337C
338C        CALCULATES THE ALGEBRAIC POLYNOMIAL OF ORDER NORED-1 WITH
339C        ARRAY OF COEFFICIENTS C.  ZERO ORDER COEFFICIENT IS C(1)
340C
341      REAL C(NORD)
342      POLY = C(1)
343      IF(NORD.EQ.1) RETURN
344      P = X*C(NORD)
345      IF(NORD.EQ.2) GOTO 20
346      N2 = NORD-2
347      J = N2+1
348      DO 10 I = 1,N2
349      P = (P+C(J))*X
350      J = J-1
351   10 CONTINUE
352   20 POLY = POLY+P
353      RETURN
354      END
355