1/* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17static const char *GEMM_HELPER = "
18void getBlockNumber(uint nBlocks, uint blockID, uint *bidY, uint *bidX, uint flag)
19{
20    #ifndef HERK
21    {
22        if(flag) //Column Major ordering for NT kernels
23        {
24            *bidY = ( blockID % ( nBlocks));
25            *bidX = ( blockID / ( nBlocks));
26        }
27        else //Row Major ordering for TN kernels
28        {
29            *bidX = ( blockID % ( nBlocks));
30            *bidY = ( blockID / ( nBlocks));
31        }
32    }
33    #else
34    {
35        volatile uint _i = 0, _j = 0;
36        for ( _j = (blockID / nBlocks); _j < nBlocks; _j++)
37        {
38            _i = blockID - ((_j*((2* nBlocks) + 1 - _j))/2) + _j;
39            if ( _i < nBlocks && ( _i >= 0) )
40            {
41                break;
42            }
43        }
44        #ifdef HERK_LOWER_TRIANGLE
45            *bidY = _i;
46            *bidX = _j;
47        #else
48            *bidY = _j;
49            *bidX = _i;
50        #endif
51    }
52    #endif
53}
54
55//
56// mapWorkGroupToTileNumber() - Maps a workgroup number to a Tile position in output matrix
57// Groups the full tiles together and half-tiles together and maps the workgroup number
58// such that full tiles are processed wholly by consecutive workgroups and half-tiles are
59// processed by consecutive workgroups
60//
61// ASSUMPTION:
62//  Assumes column major numbering of workgroup
63//
64// Observation:
65//  This new grouping yielded worse results than normal column-major order.
66//  Tested with GEMM NN kernel. So, we will not be using this function.
67//  This is here just for completeness sake
68//
69void mapWorkGroupToTileNumber(uint M, uint N, uint *bidY, uint *bidX)
70{
71    uint fullTilesOnY, numTilesOnX;
72    uint relativeGroupId;
73
74    numTilesOnX = ((N-1) / ((get_local_size(0) / %WIDTH) * %ITEMX)) + 1;
75	fullTilesOnY = (M / (%WIDTH * %ITEMY));
76    if (get_group_id(0) < (numTilesOnX * fullTilesOnY) )
77    {
78	    *bidY = ( get_group_id(0) % ( fullTilesOnY));
79	    *bidX = ( get_group_id(0) / ( fullTilesOnY));
80    } else {
81        relativeGroupId = get_group_id(0) - (numTilesOnX * fullTilesOnY);
82        *bidY = fullTilesOnY;
83        *bidX = relativeGroupId;
84    }
85}
86";
87
88