1!
2! Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
3!
4! Licensed under the Apache License, Version 2.0 (the "License");
5! you may not use this file except in compliance with the License.
6! You may obtain a copy of the License at
7!
8!     http://www.apache.org/licenses/LICENSE-2.0
9!
10! Unless required by applicable law or agreed to in writing, software
11! distributed under the License is distributed on an "AS IS" BASIS,
12! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13! See the License for the specific language governing permissions and
14! limitations under the License.
15!
16
17
18#include "mmul_dir.h"
19
20
21subroutine ftn_gather_real4( a, lda, alpha, buffer, bufrows, bufcols )
22  implicit none
23
24  integer*8 lda
25  real*4 :: a( lda,* ), alpha
26  integer :: bufrows, bufcols
27  integer i, j, ndx, ndxsave
28  real*4 :: buffer(bufrows * bufcols)
29
30  !
31  ! This routine gathers the matrix into l1 chunks. The purpose is much as it
32  ! is for the transpose case, and works much like transpose_real8()
33  !
34  !   What do the parameters mean?
35  !   buffer: buffer array
36  !   a: matrix to be gathered
37  !   bufcols: number of rows in matrix a to gather
38  !   bufrowss: number of cols in matrix a to gather
39  !   lda: number of rows in matrix a
40  !   Note that we don't care what the dimensions of a are. We assume that the
41  !   calling function has done this correctly
42  !
43
44  ndx = 0
45  do j = 1, bufcols
46     do i = 1, bufrows
47        buffer( ndx + i ) = alpha * a( i, j )
48     enddo
49     ndx = ndx + bufrows
50  enddo
51  !      write( *, * ) ( a(1, j ), j = 1, bufcols )
52  !      write( *, * )( buffer( i ), i = 1, bufrows * bufcols )
53  return
54end subroutine ftn_gather_real4
55