1!
2! Copyright (c) 2012-2018, NVIDIA CORPORATION.  All rights reserved.
3!
4! Licensed under the Apache License, Version 2.0 (the "License");
5! you may not use this file except in compliance with the License.
6! You may obtain a copy of the License at
7!
8!     http://www.apache.org/licenses/LICENSE-2.0
9!
10! Unless required by applicable law or agreed to in writing, software
11! distributed under the License is distributed on an "AS IS" BASIS,
12! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13! See the License for the specific language governing permissions and
14! limitations under the License.
15!
16
17
18! directives.h -- contains preprocessor directives for F90 rte files
19
20#include "mmul_dir.h"
21
22subroutine ftn_gather_cmplx16( ta, a, lda, alpha, buffer, bufrows, bufcols )
23  implicit none
24
25  integer*8 lda
26  complex*16 :: a( lda,* ), alpha
27  integer :: bufrows, bufcols
28  integer :: i, j, ndx, ndxsave
29  complex*16 :: buffer(bufrows * bufcols)
30  integer   :: ta
31  !
32  ! This routine gathers the matrix into l1 chunks. The purpose is much as it
33  ! is for the transpose case, and works much like transpose_real8()
34  !
35  !   What do the parameters mean?
36  !   buffer: buffer array
37  !   a: matrix to be gathered
38  !   bufcols: number of rows in matrix a to gather
39  !   bufrowss: number of cols in matrix a to gather
40  !   lda: number of rows in matrix a
41  !   Note that we don't care what the dimensions of a are. We assume that the
42  !   calling function has done this correctly
43  !
44
45  ndx = 0
46  if( ta .eq. 2 )then ! conjugate the data
47     if( alpha .eq. ( 1.0, 0.0 ) ) then
48        do j = 1, bufcols
49           do i = 1, bufrows
50              buffer( ndx + i ) = conjg(  a( i, j ) )
51           enddo
52           ndx = ndx + bufrows
53        enddo
54     else
55        do j = 1, bufcols
56           do i = 1, bufrows
57              buffer( ndx + i ) = alpha * conjg(  a( i, j ) )
58           enddo
59           ndx = ndx + bufrows
60        enddo
61     endif
62  else
63     if( alpha .eq. ( 1.0, 0.0 ) ) then
64        do j = 1, bufcols
65           do i = 1, bufrows
66              buffer( ndx + i ) = a( i, j )
67           enddo
68           ndx = ndx + bufrows
69        enddo
70     else
71        do j = 1, bufcols
72           do i = 1, bufrows
73              buffer( ndx + i ) = alpha * a( i, j )
74           enddo
75           ndx = ndx + bufrows
76        enddo
77     endif
78  endif
79     !      write( *, * ) ( a(1, j ), j = 1, bufcols )
80  !      write( *, * )( buffer( i ), i = 1, bufrows * bufcols )
81  return
82end subroutine ftn_gather_cmplx16
83