1 !
2 ! Copyright (c) 2012-2018, NVIDIA CORPORATION.  All rights reserved.
3 !
4 ! Licensed under the Apache License, Version 2.0 (the "License");
5 ! you may not use this file except in compliance with the License.
6 ! You may obtain a copy of the License at
7 !
8 !     http://www.apache.org/licenses/LICENSE-2.0
9 !
10 ! Unless required by applicable law or agreed to in writing, software
11 ! distributed under the License is distributed on an "AS IS" BASIS,
12 ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ! See the License for the specific language governing permissions and
14 ! limitations under the License.
15 !
16 
17 
18 #include "mmul_dir.h"
19 
20 
21   !
22   ! Global variables
23   !
24   integer*8 :: mra, ncb, kab, lda, ldb, ldc
25   real*4, dimension( lda, * )::a
26   real*4, dimension( ldb, * )::b
27   real*4, dimension( ldc, * )::c
28   real*4 :: alpha, beta, one = 1.0
29 
30   !
31   ! local variables
32   !
33   integer*8  :: colsa, rowsa, rowsb, colsb
34   integer*8  :: i, j, jb, k, ak, bk, jend
35   integer*8  :: ar, ar_sav,  ac, ac_sav, br, bc
36   integer*8  :: ndxa, ndxasav
37   integer*8  :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3
38   integer*8  :: colachunk, colachunks, colbchunk, colbchunks
39   integer*8  :: rowchunk, rowchunks
40   integer*8  :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
41   integer*8  :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
42   integer*8  :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
43   real*4   :: temp, temp0, temp1, temp2, temp3
44   real*4   :: bufatemp, bufbtemp
45   real*8   :: time_start, time_end, ttime, all_time
46 
47   integer, parameter :: bufrows = 512, bufcols = 8192
48 !  integer, parameter :: bufrows = 2, bufcols = 3
49 !  real*4, dimension( bufrows * bufcols ) :: buffera, bufferb
50   real*4, allocatable, dimension(:) :: buffera, bufferb
51 
52   !Minimun number of multiplications needed to activate the blocked optimization.
53 #ifdef TARGET_X8664
54   integer, parameter :: min_blocked_mult = 5000
55 #elif TARGET_LINUX_POWER
56   integer, parameter :: min_blocked_mult = 10000
57 #else
58   #warning untuned matrix multiplication parameter
59   integer, parameter :: min_blocked_mult = 5000
60 #endif
61