1/* ************************************************************************ 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * ************************************************************************/ 16 17static const char *GEMM_HELPER = " 18void getBlockNumber(uint nBlocks, uint blockID, uint *bidY, uint *bidX, uint flag) 19{ 20 #ifndef HERK 21 { 22 if(flag) //Column Major ordering for NT kernels 23 { 24 *bidY = ( blockID % ( nBlocks)); 25 *bidX = ( blockID / ( nBlocks)); 26 } 27 else //Row Major ordering for TN kernels 28 { 29 *bidX = ( blockID % ( nBlocks)); 30 *bidY = ( blockID / ( nBlocks)); 31 } 32 } 33 #else 34 { 35 volatile uint _i = 0, _j = 0; 36 for ( _j = (blockID / nBlocks); _j < nBlocks; _j++) 37 { 38 _i = blockID - ((_j*((2* nBlocks) + 1 - _j))/2) + _j; 39 if ( _i < nBlocks && ( _i >= 0) ) 40 { 41 break; 42 } 43 } 44 #ifdef HERK_LOWER_TRIANGLE 45 *bidY = _i; 46 *bidX = _j; 47 #else 48 *bidY = _j; 49 *bidX = _i; 50 #endif 51 } 52 #endif 53} 54 55// 56// mapWorkGroupToTileNumber() - Maps a workgroup number to a Tile position in output matrix 57// Groups the full tiles together and half-tiles together and maps the workgroup number 58// such that full tiles are processed wholly by consecutive workgroups and half-tiles are 59// processed by consecutive workgroups 60// 61// ASSUMPTION: 62// Assumes column major numbering of workgroup 63// 64// Observation: 65// This new grouping yielded worse results than normal column-major order. 66// Tested with GEMM NN kernel. So, we will not be using this function. 67// This is here just for completeness sake 68// 69void mapWorkGroupToTileNumber(uint M, uint N, uint *bidY, uint *bidX) 70{ 71 uint fullTilesOnY, numTilesOnX; 72 uint relativeGroupId; 73 74 numTilesOnX = ((N-1) / ((get_local_size(0) / %WIDTH) * %ITEMX)) + 1; 75 fullTilesOnY = (M / (%WIDTH * %ITEMY)); 76 if (get_group_id(0) < (numTilesOnX * fullTilesOnY) ) 77 { 78 *bidY = ( get_group_id(0) % ( fullTilesOnY)); 79 *bidX = ( get_group_id(0) / ( fullTilesOnY)); 80 } else { 81 relativeGroupId = get_group_id(0) - (numTilesOnX * fullTilesOnY); 82 *bidY = fullTilesOnY; 83 *bidX = relativeGroupId; 84 } 85} 86"; 87 88