1! { dg-options "-O2" } 2! { dg-skip-if "AArch64 tiny code model does not support programs larger than 1MiB" {aarch64_tiny} {"*"} {""} } 3 4program strassen_matmul 5 use omp_lib 6 integer, parameter :: N = 1024 7 double precision, save :: A(N,N), B(N,N), C(N,N), D(N,N) 8 double precision :: start, end 9 10 call random_seed 11 call random_number (A) 12 call random_number (B) 13 start = omp_get_wtime () 14 C = matmul (A, B) 15 end = omp_get_wtime () 16 write(*,'(a, f10.6)') ' Time for matmul = ', end - start 17 D = 0 18 start = omp_get_wtime () 19 call strassen (A, B, D, N) 20 end = omp_get_wtime () 21 write(*,'(a, f10.6)') ' Time for Strassen = ', end - start 22 if (sqrt (sum ((C - D) ** 2)) / N .gt. 0.1) call abort 23 D = 0 24 start = omp_get_wtime () 25!$omp parallel 26!$omp single 27 call strassen (A, B, D, N) 28!$omp end single nowait 29!$omp end parallel 30 end = omp_get_wtime () 31 write(*,'(a, f10.6)') ' Time for Strassen MP = ', end - start 32 if (sqrt (sum ((C - D) ** 2)) / N .gt. 0.1) call abort 33 34contains 35 36 recursive subroutine strassen (A, B, C, N) 37 integer, intent(in) :: N 38 double precision, intent(in) :: A(N,N), B(N,N) 39 double precision, intent(out) :: C(N,N) 40 double precision :: T(N/2,N/2,7) 41 integer :: K, L 42 43 if (iand (N,1) .ne. 0 .or. N < 64) then 44 C = matmul (A, B) 45 return 46 end if 47 K = N / 2 48 L = N / 2 + 1 49!$omp task shared (A, B, T) 50 call strassen (A(:K,:K) + A(L:,L:), B(:K,:K) + B(L:,L:), T(:,:,1), K) 51!$omp end task 52!$omp task shared (A, B, T) 53 call strassen (A(L:,:K) + A(L:,L:), B(:K,:K), T(:,:,2), K) 54!$omp end task 55!$omp task shared (A, B, T) 56 call strassen (A(:K,:K), B(:K,L:) - B(L:,L:), T(:,:,3), K) 57!$omp end task 58!$omp task shared (A, B, T) 59 call strassen (A(L:,L:), B(L:,:K) - B(:K,:K), T(:,:,4), K) 60!$omp end task 61!$omp task shared (A, B, T) 62 call strassen (A(:K,:K) + A(:K,L:), B(L:,L:), T(:,:,5), K) 63!$omp end task 64!$omp task shared (A, B, T) 65 call strassen (A(L:,:K) - A(:K,:K), B(:K,:K) + B(:K,L:), T(:,:,6), K) 66!$omp end task 67!$omp task shared (A, B, T) 68 call strassen (A(:K,L:) - A(L:,L:), B(L:,:K) + B(L:,L:), T(:,:,7), K) 69!$omp end task 70!$omp taskwait 71 C(:K,:K) = T(:,:,1) + T(:,:,4) - T(:,:,5) + T(:,:,7) 72 C(L:,:K) = T(:,:,2) + T(:,:,4) 73 C(:K,L:) = T(:,:,3) + T(:,:,5) 74 C(L:,L:) = T(:,:,1) - T(:,:,2) + T(:,:,3) + T(:,:,6) 75 end subroutine strassen 76end 77