1! { dg-do compile }
2! { dg-require-effective-target vect_cond_mixed }
3! { dg-require-effective-target vect_double }
4! { dg-additional-options "-O3 -ffast-math -floop-interchange -fdump-tree-linterchange-details" }
5! vect_cond_mixed lies on x86, we cannot do vcond[_eq]v2div2df
6! { dg-additional-options "-msse4.1" { target { x86_64-*-* i?86-*-* } } }
7
8        subroutine mat_times_vec(y,x,a,axp,ayp,azp,axm,aym,azm,
9     $  nb,nx,ny,nz)
10        implicit none
11        integer nb,nx,ny,nz,i,j,k,m,l,kit,im1,ip1,jm1,jp1,km1,kp1
12
13        real*8 y(nb,nx,ny,nz),x(nb,nx,ny,nz)
14
15        real*8 a(nb,nb,nx,ny,nz),
16     1  axp(nb,nb,nx,ny,nz),ayp(nb,nb,nx,ny,nz),azp(nb,nb,nx,ny,nz),
17     2  axm(nb,nb,nx,ny,nz),aym(nb,nb,nx,ny,nz),azm(nb,nb,nx,ny,nz)
18
19
20      do k=1,nz
21         km1=mod(k+nz-2,nz)+1
22         kp1=mod(k,nz)+1
23         do j=1,ny
24            jm1=mod(j+ny-2,ny)+1
25            jp1=mod(j,ny)+1
26            do i=1,nx
27               im1=mod(i+nx-2,nx)+1
28               ip1=mod(i,nx)+1
29               do l=1,nb
30                  y(l,i,j,k)=0.0d0
31                  do m=1,nb
32                     y(l,i,j,k)=y(l,i,j,k)+
33     1               a(l,m,i,j,k)*x(m,i,j,k)+
34     2               axp(l,m,i,j,k)*x(m,ip1,j,k)+
35     3               ayp(l,m,i,j,k)*x(m,i,jp1,k)+
36     4               azp(l,m,i,j,k)*x(m,i,j,kp1)+
37     5               axm(l,m,i,j,k)*x(m,im1,j,k)+
38     6               aym(l,m,i,j,k)*x(m,i,jm1,k)+
39     7               azm(l,m,i,j,k)*x(m,i,j,km1)
40                  enddo
41               enddo
42            enddo
43         enddo
44        enddo
45        return
46        end
47
48! verify we can vectorize the inner loop after interchange
49! { dg-final { scan-tree-dump-times "is interchanged" 1 "linterchange" } }
50! { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } }
51