1! { dg-do compile } 2! { dg-require-effective-target vect_double } 3! { dg-options "-O3 --param vect-max-peeling-for-alignment=0 -fpredictive-commoning -fdump-tree-pcom-details" } 4! { dg-additional-options "-mprefer-avx128" { target { i?86-*-* x86_64-*-* } } } 5! { dg-additional-options "-mzarch" { target { s390*-*-* } } } 6 7******* RESID COMPUTES THE RESIDUAL: R = V - AU 8* 9* THIS SIMPLE IMPLEMENTATION COSTS 27A + 4M PER RESULT, WHERE 10* A AND M DENOTE THE COSTS OF ADDITION (OR SUBTRACTION) AND 11* MULTIPLICATION, RESPECTIVELY. BY USING SEVERAL TWO-DIMENSIONAL 12* BUFFERS ONE CAN REDUCE THIS COST TO 13A + 4M IN THE GENERAL 13* CASE, OR 10A + 3M WHEN THE COEFFICIENT A(1) IS ZERO. 14* 15 SUBROUTINE RESID(U,V,R,N,A) 16 INTEGER N 17 REAL*8 U(N,N,N),V(N,N,N),R(N,N,N),A(0:3) 18 INTEGER I3, I2, I1 19C 20 DO 600 I3=2,N-1 21 DO 600 I2=2,N-1 22 DO 600 I1=2,N-1 23 600 R(I1,I2,I3)=V(I1,I2,I3) 24 > -A(0)*( U(I1, I2, I3 ) ) 25 > -A(1)*( U(I1-1,I2, I3 ) + U(I1+1,I2, I3 ) 26 > + U(I1, I2-1,I3 ) + U(I1, I2+1,I3 ) 27 > + U(I1, I2, I3-1) + U(I1, I2, I3+1) ) 28 > -A(2)*( U(I1-1,I2-1,I3 ) + U(I1+1,I2-1,I3 ) 29 > + U(I1-1,I2+1,I3 ) + U(I1+1,I2+1,I3 ) 30 > + U(I1, I2-1,I3-1) + U(I1, I2+1,I3-1) 31 > + U(I1, I2-1,I3+1) + U(I1, I2+1,I3+1) 32 > + U(I1-1,I2, I3-1) + U(I1-1,I2, I3+1) 33 > + U(I1+1,I2, I3-1) + U(I1+1,I2, I3+1) ) 34 > -A(3)*( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1) 35 > + U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1) 36 > + U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1) 37 > + U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) ) 38C 39 RETURN 40 END 41! we want to check that predictive commoning did something on the 42! vectorized loop. If vector factor is 2, the vectorized loop can 43! be predictive commoned, we check if predictive commoning PHI node 44! is created with vector(2) type. 45! { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" { xfail vect_variable_length } } } 46! { dg-final { scan-tree-dump "vectp_u.*__lsm.* = PHI <.*vectp_u.*__lsm" "pcom" { xfail vect_variable_length } } } 47