1! 2! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. 3! 4! Licensed under the Apache License, Version 2.0 (the "License"); 5! you may not use this file except in compliance with the License. 6! You may obtain a copy of the License at 7! 8! http://www.apache.org/licenses/LICENSE-2.0 9! 10! Unless required by applicable law or agreed to in writing, software 11! distributed under the License is distributed on an "AS IS" BASIS, 12! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13! See the License for the specific language governing permissions and 14! limitations under the License. 15! 16 17 18! directives.h -- contains preprocessor directives for F90 rte files 19 20#include "mmul_dir.h" 21 22subroutine ftn_gather_cmplx16( ta, a, lda, alpha, buffer, bufrows, bufcols ) 23 implicit none 24 25 integer*8 lda 26 complex*16 :: a( lda,* ), alpha 27 integer :: bufrows, bufcols 28 integer :: i, j, ndx, ndxsave 29 complex*16 :: buffer(bufrows * bufcols) 30 integer :: ta 31 ! 32 ! This routine gathers the matrix into l1 chunks. The purpose is much as it 33 ! is for the transpose case, and works much like transpose_real8() 34 ! 35 ! What do the parameters mean? 36 ! buffer: buffer array 37 ! a: matrix to be gathered 38 ! bufcols: number of rows in matrix a to gather 39 ! bufrowss: number of cols in matrix a to gather 40 ! lda: number of rows in matrix a 41 ! Note that we don't care what the dimensions of a are. We assume that the 42 ! calling function has done this correctly 43 ! 44 45 ndx = 0 46 if( ta .eq. 2 )then ! conjugate the data 47 if( alpha .eq. ( 1.0, 0.0 ) ) then 48 do j = 1, bufcols 49 do i = 1, bufrows 50 buffer( ndx + i ) = conjg( a( i, j ) ) 51 enddo 52 ndx = ndx + bufrows 53 enddo 54 else 55 do j = 1, bufcols 56 do i = 1, bufrows 57 buffer( ndx + i ) = alpha * conjg( a( i, j ) ) 58 enddo 59 ndx = ndx + bufrows 60 enddo 61 endif 62 else 63 if( alpha .eq. ( 1.0, 0.0 ) ) then 64 do j = 1, bufcols 65 do i = 1, bufrows 66 buffer( ndx + i ) = a( i, j ) 67 enddo 68 ndx = ndx + bufrows 69 enddo 70 else 71 do j = 1, bufcols 72 do i = 1, bufrows 73 buffer( ndx + i ) = alpha * a( i, j ) 74 enddo 75 ndx = ndx + bufrows 76 enddo 77 endif 78 endif 79 ! write( *, * ) ( a(1, j ), j = 1, bufcols ) 80 ! write( *, * )( buffer( i ), i = 1, bufrows * bufcols ) 81 return 82end subroutine ftn_gather_cmplx16 83