1 ! 2 ! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. 3 ! 4 ! Licensed under the Apache License, Version 2.0 (the "License"); 5 ! you may not use this file except in compliance with the License. 6 ! You may obtain a copy of the License at 7 ! 8 ! http://www.apache.org/licenses/LICENSE-2.0 9 ! 10 ! Unless required by applicable law or agreed to in writing, software 11 ! distributed under the License is distributed on an "AS IS" BASIS, 12 ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ! See the License for the specific language governing permissions and 14 ! limitations under the License. 15 ! 16 17 18 #include "mmul_dir.h" 19 20 21 ! 22 ! Global variables 23 ! 24 integer*8 :: mra, ncb, kab, lda, ldb, ldc 25 real*4, dimension( lda, * )::a 26 real*4, dimension( ldb, * )::b 27 real*4, dimension( ldc, * )::c 28 real*4 :: alpha, beta, one = 1.0 29 30 ! 31 ! local variables 32 ! 33 integer*8 :: colsa, rowsa, rowsb, colsb 34 integer*8 :: i, j, jb, k, ak, bk, jend 35 integer*8 :: ar, ar_sav, ac, ac_sav, br, bc 36 integer*8 :: ndxa, ndxasav 37 integer*8 :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3 38 integer*8 :: colachunk, colachunks, colbchunk, colbchunks 39 integer*8 :: rowchunk, rowchunks 40 integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end 41 integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end 42 integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav 43 real*4 :: temp, temp0, temp1, temp2, temp3 44 real*4 :: bufatemp, bufbtemp 45 real*8 :: time_start, time_end, ttime, all_time 46 47 integer, parameter :: bufrows = 512, bufcols = 8192 48 ! integer, parameter :: bufrows = 2, bufcols = 3 49 ! real*4, dimension( bufrows * bufcols ) :: buffera, bufferb 50 real*4, allocatable, dimension(:) :: buffera, bufferb 51 52 !Minimun number of multiplications needed to activate the blocked optimization. 53 #ifdef TARGET_X8664 54 integer, parameter :: min_blocked_mult = 5000 55 #elif TARGET_LINUX_POWER 56 integer, parameter :: min_blocked_mult = 10000 57 #else 58 #warning untuned matrix multiplication parameter 59 integer, parameter :: min_blocked_mult = 5000 60 #endif 61