1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 #if defined(FP16) || defined(FP32) || defined(F16F16F32) || defined(BF16BF16F32)
18 INST_TEST_CASE(TestGEMM,
19         test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true,
20                 dnnl_invalid_arguments},
21         test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true,
22                 dnnl_invalid_arguments},
23         test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true,
24                 dnnl_invalid_arguments},
25         test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true,
26                 dnnl_invalid_arguments},
27 
28         test_params {'N', 'N', 1, 1, 1, 1.0, 0.0, 4, 4, 4},
29         test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
30         test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
31         test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
32         test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
33         test_params {'N', 'n', 31, 21, 11, 2.0, 1.5, 61, 51, 81},
34         test_params {'n', 'T', 31, 21, 11, 2.0, 1.5, 61, 51, 81},
35         test_params {'T', 'N', 31, 21, 11, 2.0, 1.5, 61, 51, 81},
36         test_params {'t', 't', 31, 21, 11, 2.0, 1.5, 61, 51, 81},
37         test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100},
38         test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100},
39         test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100},
40         test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100},
41         test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2},
42         test_params {'t', 't', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2},
43 
44         make_test_params_with_offset(
45                 {1, 2, 3}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 100, 100, 100),
46         make_test_params_with_offset(
47                 {30, 20, 10}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100),
48 
49         test_params {'n', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000},
50         test_params {'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000},
51         test_params {'t', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000},
52         test_params {'t', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000},
53         test_params {'n', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000},
54         test_params {'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000},
55         test_params {'t', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000},
56         test_params {'t', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000});
57 
58 CPU_INST_TEST_CASE(TestGEMV,
59         test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1, 1},
60         test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2000, 3000, 3000},
61         test_params {'n', 'n', 1, 300, 8000, 1.0f, 0.0f, 8000, 300, 300},
62         test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1, 1},
63         test_params {'t', 'n', 200, 1, 8000, 1.0f, 0.0f, 200, 1, 1},
64         test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 1, 3000, 3000},
65         test_params {'t', 'n', 1, 300, 8000, 1.0f, 0.0f, 1, 300, 300},
66         test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1000, 1},
67         test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2000, 2000, 3000},
68         test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1000, 1},
69         test_params {'t', 't', 200, 1, 8000, 1.0f, 0.0f, 200, 8000, 1},
70         test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 1, 2000, 3000},
71 
72         test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 1, 30},
73         test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 20, 1},
74         test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 20, 30},
75         test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2010, 3010, 3010},
76         test_params {'n', 'n', 1, 300, 8000, 1.0f, 0.0f, 8010, 310, 310},
77         test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2010, 20, 30},
78         test_params {'t', 'n', 200, 1, 8000, 1.0f, 0.0f, 210, 20, 30},
79         test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 20, 3010, 3010},
80         test_params {'t', 'n', 1, 300, 8000, 1.0f, 0.0f, 20, 310, 310},
81         test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1010, 1010, 20},
82         test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2010, 2010, 3010},
83         test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2010, 1010, 20},
84         test_params {'t', 't', 200, 1, 8000, 1.0f, 0.0f, 210, 8010, 20},
85         test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 20, 2010, 3010},
86 
87         test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1},
88         test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000},
89         test_params {'n', 'n', 1, 300, 8000, 1.0f, 1.0f, 8000, 300, 300},
90         test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1},
91         test_params {'t', 'n', 200, 1, 8000, 1.0f, 1.0f, 200, 1, 1},
92         test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000},
93         test_params {'t', 'n', 1, 300, 8000, 1.0f, 1.0f, 1, 300, 300},
94         test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1},
95         test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000},
96         test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1},
97         test_params {'t', 't', 200, 1, 8000, 1.0f, 1.0f, 200, 8000, 1},
98         test_params {'t', 't', 1, 3000, 4000, 1.0f, 1.0f, 1, 4000, 3000});
99 
100 /**
101  * These cases are used to test the small-N avx-512 sgemm TN kernels.
102  * Note: The kernels assume a column major layout while the external
103  * APIs assume row major layout, so the M/N and transA/transB values
104  * are swapped.
105  */
106 CPU_INST_TEST_CASE(TestGEMM_smalln,
107         test_params {'n', 't', 5, 512, 512, 1.0f, 1.0f, 512, 512, 512},
108         test_params {'n', 't', 5, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512},
109         test_params {'n', 't', 5, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512},
110         test_params {'n', 't', 5, 2048, 512, 1.0f, 1.0f, 512, 512, 2048},
111         test_params {'n', 't', 7, 512, 512, 0.0f, 1.0f, 512, 512, 512},
112         test_params {'n', 't', 7, 512, 1536, 1.0f, 0.0f, 1536, 1536, 512},
113         test_params {'n', 't', 7, 512, 2048, 0.5f, 0.5f, 2048, 2048, 512},
114         test_params {'n', 't', 7, 2048, 512, 1.0f, 1.0f, 512, 512, 2048},
115         test_params {'n', 't', 4, 512, 512, 1.0f, 1.0f, 512, 512, 512},
116         test_params {'n', 't', 4, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512},
117         test_params {'n', 't', 4, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512},
118         test_params {'n', 't', 4, 2048, 512, 1.0f, 1.0f, 512, 512, 2048},
119         test_params {'n', 't', 8, 512, 512, 1.0f, 1.0f, 512, 512, 512},
120         test_params {'n', 't', 8, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512},
121         test_params {'n', 't', 8, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512},
122         test_params {'n', 't', 8, 2048, 512, 1.0f, 1.0f, 512, 512, 2048});
123 
124 #if defined(FP32) || defined(BF16BF16F32)
125 INST_TEST_CASE(TestGEMM_packed,
126         test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {false, true},
127                 true, dnnl_invalid_arguments},
128         test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {true, false},
129                 true, dnnl_invalid_arguments},
130         test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {true, true},
131                 true, dnnl_invalid_arguments},
132         test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {true, true},
133                 true, dnnl_invalid_arguments},
134 
135         make_test_params_pack(
136                 {true, false}, 'N', 'n', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81),
137         make_test_params_pack(
138                 {false, true}, 'n', 'T', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81),
139         make_test_params_pack(
140                 {true, false}, 'T', 'N', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81),
141         make_test_params_pack(
142                 {true, true}, 't', 't', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81),
143         make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f,
144                 100, 100, 100),
145         make_test_params_pack(
146                 {true, true}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100),
147         make_test_params_pack(
148                 {true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, 10000, 2, 2),
149         make_test_params_pack(
150                 {true, true}, 'n', 'n', 100, 1, 100, 1.0f, 2.0f, 100, 100, 100),
151         make_test_params_pack({true, false}, 'n', 'n', 1, 100, 100, 1.0f, 2.0f,
152                 100, 100, 100),
153         make_test_params_pack({false, true}, 'n', 'n', 1, 100, 100, 1.0f, 2.0f,
154                 100, 100, 100),
155 
156         make_test_params_pack({true, false}, 'n', 'n', 3000, 3000, 3000, 1.0f,
157                 2.0f, 3000, 3000, 3000),
158         make_test_params_pack({true, false}, 't', 'n', 3000, 3000, 3000, 1.0f,
159                 0.0f, 3000, 3000, 3000),
160         make_test_params_pack({true, false}, 'n', 't', 3000, 3000, 3000, 1.0f,
161                 1.0f, 3000, 3000, 3000),
162         make_test_params_pack({true, false}, 't', 't', 3000, 3000, 3000, 1.0f,
163                 2.0f, 3000, 3000, 3000),
164 
165         make_test_params_pack({false, true}, 'n', 'n', 200, 20000, 2000, 1.0f,
166                 2.0f, 2000, 20000, 20000),
167         make_test_params_pack({false, true}, 'n', 'n', 2000, 2000, 2000, 1.0f,
168                 2.0f, 2000, 2000, 2000),
169         make_test_params_pack({true, true}, 'n', 'n', 2000, 5000, 2000, 1.0f,
170                 2.0f, 2000, 5000, 5000),
171         make_test_params_pack({true, true}, 'n', 'n', 5000, 100, 2000, 1.0f,
172                 2.0f, 2000, 100, 100),
173         make_test_params_pack({false, true}, 't', 'n', 2000, 2000, 2000, 1.0f,
174                 0.0f, 2000, 2000, 2000),
175         make_test_params_pack({false, true}, 't', 'n', 2000, 5000, 2000, 1.0f,
176                 2.0f, 2000, 5000, 5000),
177         make_test_params_pack({false, true}, 't', 'n', 5000, 100, 2000, 1.0f,
178                 2.0f, 5000, 100, 100),
179         make_test_params_pack({false, true}, 'n', 't', 2000, 2000, 2000, 1.0f,
180                 1.0f, 2000, 2000, 2000),
181         make_test_params_pack({false, true}, 't', 't', 2000, 2000, 2000, 1.0f,
182                 2.0f, 2000, 2000, 2000),
183         make_test_params_pack({true, true}, 't', 't', 2000, 5000, 2000, 1.0f,
184                 2.0f, 2000, 2000, 5000),
185         make_test_params_pack({true, true}, 't', 't', 5000, 100, 2000, 1.0f,
186                 2.0f, 5000, 2000, 100),
187 
188         make_test_params_pack({true, false}, 'n', 'n', 150, 150, 8000, 1.0f,
189                 3.0f, 8000, 150, 150),
190         make_test_params_pack({true, true}, 'n', 't', 200, 200, 8000, 1.0f,
191                 3.0f, 8000, 8000, 200),
192         make_test_params_pack({false, true}, 't', 'n', 200, 300, 8000, 1.0f,
193                 3.0f, 200, 300, 300));
194 #endif
195 
196 #elif defined(BF16BF16BF16)
197 
198 INST_TEST_CASE(TestGEMM,
199         test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true,
200                 dnnl_invalid_arguments},
201         test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true,
202                 dnnl_invalid_arguments},
203         test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true,
204                 dnnl_invalid_arguments},
205         test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true,
206                 dnnl_invalid_arguments},
207 
208         test_params {'N', 'N', 1, 1, 1, 1.0, 0.0, 4, 4, 4},
209         test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
210         test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
211         test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
212         test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
213         test_params {'N', 'n', 31, 21, 11, 2.5, 1.5, 61, 51, 81},
214         test_params {'n', 'T', 31, 21, 11, 2.5, 1.5, 61, 51, 81},
215         test_params {'T', 'N', 31, 21, 11, 2.5, 1.5, 61, 51, 81},
216         test_params {'t', 't', 31, 21, 11, 2.5, 1.5, 61, 51, 81},
217         test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100},
218         test_params {'n', 't', 100, 2, 58, 1.0, 2.0, 100, 100, 100},
219         test_params {'t', 'n', 2, 100, 61, 1.0, 2.0, 100, 100, 100},
220         test_params {'t', 't', 2, 100, 60, 1.0, 2.0, 100, 100, 100},
221         test_params {'n', 'n', 2, 2, 11, 1.0, -1.0, 20, 2, 2},
222         test_params {'t', 't', 2, 2, 11, 1.0, -1.0, 2, 20, 2},
223 
224         make_test_params_with_offset(
225                 {1, 2, 3}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 100, 100, 100),
226         make_test_params_with_offset(
227                 {30, 20, 10}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100),
228 
229         test_params {'n', 'n', 2000, 2000, 20, 1.0, 0.0, 20, 2000, 2000},
230         test_params {'n', 'n', 3000, 3000, 30, 1.0, 0.0, 30, 3000, 3000},
231         test_params {'t', 'n', 2000, 2000, 20, 1.0, 0.0, 2000, 2000, 2000},
232         test_params {'t', 'n', 3000, 3000, 30, 1.0, 0.0, 3000, 3000, 3000},
233         test_params {'n', 't', 2000, 2000, 20, 1.0, 0.0, 20, 20, 2000},
234         test_params {'n', 't', 3000, 3000, 30, 1.0, 0.0, 30, 30, 3000},
235         test_params {'t', 't', 2000, 2000, 20, 1.0, 0.0, 2000, 20, 2000},
236         test_params {'t', 't', 3000, 3000, 30, 1.0, 0.0, 3000, 30, 3000});
237 
238 #else
239 constexpr test_igemm_params fix_use_oc = {'F', false, false, true};
240 constexpr test_igemm_params col_use_oc = {'C', false, false, true};
241 constexpr test_igemm_params row_use_oc = {'R', false, false, true};
242 
243 constexpr test_igemm_params fix_use_all_offsets = {'F', true, true, true};
244 constexpr test_igemm_params col_use_all_offsets = {'C', true, true, true};
245 constexpr test_igemm_params row_use_all_offsets = {'R', true, true, true};
246 
247 constexpr test_igemm_params fix_no_offsets = {'F', false, false, false};
248 constexpr test_igemm_params col_no_offsets = {'C', false, false, false};
249 constexpr test_igemm_params row_no_offsets = {'R', false, false, false};
250 
251 INST_TEST_CASE(TestGEMM_expected_failures,
252         test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true,
253                 dnnl_invalid_arguments},
254         test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true,
255                 dnnl_invalid_arguments},
256         test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true,
257                 dnnl_invalid_arguments},
258         test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true,
259                 dnnl_invalid_arguments},
260 
261         test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, fix_use_oc, {}, true,
262                 dnnl_invalid_arguments},
263         test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, fix_use_oc, {}, true,
264                 dnnl_invalid_arguments},
265         test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, fix_use_oc, {}, true,
266                 dnnl_invalid_arguments},
267         test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, fix_use_oc, {}, true,
268                 dnnl_invalid_arguments},
269 
270         test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, fix_use_all_offsets,
271                 {}, true, dnnl_invalid_arguments},
272         test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, fix_use_all_offsets,
273                 {}, true, dnnl_invalid_arguments},
274         test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, fix_use_all_offsets,
275                 {}, true, dnnl_invalid_arguments},
276         test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, fix_use_all_offsets,
277                 {}, true, dnnl_invalid_arguments},
278 
279         test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {true, true},
280                 true, dnnl_invalid_arguments},
281         test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {false, true},
282                 true, dnnl_invalid_arguments},
283         test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {true, false},
284                 true, dnnl_invalid_arguments},
285         test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {false, true},
286                 true, dnnl_invalid_arguments});
287 
288 INST_TEST_CASE(TestGEMM_general_cases_fix_offset,
289         test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc},
290         test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc},
291         test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc},
292         test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc},
293         test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc},
294         test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc},
295         test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc},
296         test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc},
297         test_params {
298                 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, fix_use_oc},
299         test_params {
300                 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc},
301         test_params {
302                 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc},
303         test_params {
304                 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc},
305         test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, fix_use_oc},
306 
307         test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
308                 fix_use_all_offsets},
309         test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
310                 fix_use_all_offsets},
311         test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
312                 fix_use_all_offsets},
313         test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
314                 fix_use_all_offsets},
315         test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100,
316                 fix_use_all_offsets},
317         test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100,
318                 fix_use_all_offsets},
319         test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
320                 fix_use_all_offsets},
321         test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
322                 fix_use_all_offsets},
323         test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2,
324                 fix_use_all_offsets},
325 
326         test_params {
327                 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets},
328         test_params {
329                 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets},
330         test_params {
331                 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets},
332         test_params {
333                 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets},
334         test_params {
335                 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, fix_no_offsets},
336         test_params {
337                 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets},
338         test_params {
339                 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets},
340         test_params {
341                 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets},
342         test_params {
343                 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, fix_no_offsets});
344 
345 INST_TEST_CASE(TestGEMM_general_cases_col_offset,
346         test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc},
347         test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc},
348         test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc},
349         test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc},
350         test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc},
351         test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc},
352         test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc},
353         test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc},
354         test_params {
355                 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, col_use_oc},
356         test_params {
357                 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, col_use_oc},
358         test_params {
359                 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_oc},
360         test_params {
361                 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_oc},
362         test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, col_use_oc},
363 
364         test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
365                 col_use_all_offsets},
366         test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
367                 col_use_all_offsets},
368         test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
369                 col_use_all_offsets},
370         test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
371                 col_use_all_offsets},
372         test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100,
373                 col_use_all_offsets},
374         test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100,
375                 col_use_all_offsets},
376         test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
377                 col_use_all_offsets},
378         test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
379                 col_use_all_offsets},
380         test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2,
381                 col_use_all_offsets},
382 
383         test_params {
384                 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets},
385         test_params {
386                 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets},
387         test_params {
388                 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets},
389         test_params {
390                 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets},
391         test_params {
392                 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, col_no_offsets},
393         test_params {
394                 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets},
395         test_params {
396                 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets},
397         test_params {
398                 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets},
399         test_params {
400                 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, col_no_offsets});
401 
402 INST_TEST_CASE(TestGEMM_general_cases_row_offset,
403         test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc},
404         test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc},
405         test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc},
406         test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc},
407         test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc},
408         test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc},
409         test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc},
410         test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc},
411         test_params {
412                 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, row_use_oc},
413         test_params {
414                 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, row_use_oc},
415         test_params {
416                 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_oc},
417         test_params {
418                 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_oc},
419         test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, row_use_oc},
420 
421         test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
422                 row_use_all_offsets},
423         test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
424                 row_use_all_offsets},
425         test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
426                 row_use_all_offsets},
427         test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
428                 row_use_all_offsets},
429         test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100,
430                 row_use_all_offsets},
431         test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100,
432                 row_use_all_offsets},
433         test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
434                 row_use_all_offsets},
435         test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
436                 row_use_all_offsets},
437         test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2,
438                 row_use_all_offsets},
439 
440         test_params {
441                 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets},
442         test_params {
443                 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets},
444         test_params {
445                 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets},
446         test_params {
447                 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets},
448         test_params {
449                 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, row_no_offsets},
450         test_params {
451                 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets},
452         test_params {
453                 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets},
454         test_params {
455                 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets},
456         test_params {
457                 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, row_no_offsets});
458 
459 CPU_INST_TEST_CASE(TestGEMM_fractional_scales_fix_offset,
460         /* alpha and beta have non-zero fractional part */
461         test_params {
462                 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, fix_use_oc},
463         test_params {
464                 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, fix_use_oc},
465         test_params {
466                 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, fix_use_oc},
467         test_params {
468                 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, fix_use_oc},
469         test_params {
470                 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, fix_use_oc},
471         test_params {
472                 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, fix_use_oc},
473         test_params {
474                 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, fix_use_oc},
475         test_params {
476                 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, fix_use_oc},
477         test_params {
478                 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, fix_use_oc},
479 
480         test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80,
481                 fix_use_all_offsets},
482         test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
483                 fix_use_all_offsets},
484         test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80,
485                 fix_use_all_offsets},
486         test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80,
487                 fix_use_all_offsets},
488         test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
489                 fix_use_all_offsets},
490         test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
491                 fix_use_all_offsets},
492         test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
493                 fix_use_all_offsets},
494         test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
495                 fix_use_all_offsets},
496         test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
497                 fix_use_all_offsets},
498 
499         test_params {
500                 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, fix_no_offsets},
501         test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
502                 fix_no_offsets},
503         test_params {
504                 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, fix_no_offsets},
505         test_params {
506                 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, fix_no_offsets},
507         test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
508                 fix_no_offsets},
509         test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
510                 fix_no_offsets},
511         test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
512                 fix_no_offsets},
513         test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
514                 fix_no_offsets},
515         test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
516                 fix_no_offsets});
517 
518 CPU_INST_TEST_CASE(TestGEMM_fractional_scales_col_offset,
519         /* alpha and beta have non-zero fractional part */
520         test_params {
521                 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, col_use_oc},
522         test_params {
523                 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, col_use_oc},
524         test_params {
525                 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, col_use_oc},
526         test_params {
527                 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, col_use_oc},
528         test_params {
529                 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, col_use_oc},
530         test_params {
531                 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, col_use_oc},
532         test_params {
533                 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, col_use_oc},
534         test_params {
535                 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, col_use_oc},
536         test_params {
537                 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, col_use_oc},
538 
539         test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80,
540                 col_use_all_offsets},
541         test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
542                 col_use_all_offsets},
543         test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80,
544                 col_use_all_offsets},
545         test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80,
546                 col_use_all_offsets},
547         test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
548                 col_use_all_offsets},
549         test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
550                 col_use_all_offsets},
551         test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
552                 col_use_all_offsets},
553         test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
554                 col_use_all_offsets},
555         test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
556                 col_use_all_offsets},
557 
558         test_params {
559                 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, col_no_offsets},
560         test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
561                 col_no_offsets},
562         test_params {
563                 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, col_no_offsets},
564         test_params {
565                 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, col_no_offsets},
566         test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
567                 col_no_offsets},
568         test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
569                 col_no_offsets},
570         test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
571                 col_no_offsets},
572         test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
573                 col_no_offsets},
574         test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
575                 col_no_offsets});
576 
577 CPU_INST_TEST_CASE(TestGEMM_fractional_scales_row_offset,
578         /* alpha and beta have non-zero fractional part */
579         test_params {
580                 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, row_use_oc},
581         test_params {
582                 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, row_use_oc},
583         test_params {
584                 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, row_use_oc},
585         test_params {
586                 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, row_use_oc},
587         test_params {
588                 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, row_use_oc},
589         test_params {
590                 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, row_use_oc},
591         test_params {
592                 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, row_use_oc},
593         test_params {
594                 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, row_use_oc},
595         test_params {
596                 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, row_use_oc},
597 
598         test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80,
599                 row_use_all_offsets},
600         test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
601                 row_use_all_offsets},
602         test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80,
603                 row_use_all_offsets},
604         test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80,
605                 row_use_all_offsets},
606         test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
607                 row_use_all_offsets},
608         test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
609                 row_use_all_offsets},
610         test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
611                 row_use_all_offsets},
612         test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
613                 row_use_all_offsets},
614         test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
615                 row_use_all_offsets},
616 
617         test_params {
618                 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, row_no_offsets},
619         test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
620                 row_no_offsets},
621         test_params {
622                 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, row_no_offsets},
623         test_params {
624                 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, row_no_offsets},
625         test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
626                 row_no_offsets},
627         test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
628                 row_no_offsets},
629         test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
630                 row_no_offsets},
631         test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
632                 row_no_offsets},
633         test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
634                 row_no_offsets});
635 
636 CPU_INST_TEST_CASE(TestGEMV,
637         test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1, 1,
638                 fix_no_offsets},
639         test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2000, 3000, 3000,
640                 fix_no_offsets},
641         test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1, 1,
642                 fix_no_offsets},
643         test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 1, 3000, 3000,
644                 fix_no_offsets},
645         test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1000, 1,
646                 fix_no_offsets},
647         test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2000, 2000, 3000,
648                 fix_no_offsets},
649         test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1000, 1,
650                 fix_no_offsets},
651         test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 1, 2000, 3000,
652                 fix_no_offsets},
653 
654         test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1,
655                 fix_no_offsets},
656         test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000,
657                 fix_no_offsets},
658         test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1,
659                 fix_no_offsets},
660         test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000,
661                 fix_no_offsets},
662         test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1,
663                 fix_no_offsets},
664         test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000,
665                 fix_no_offsets},
666         test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1,
667                 fix_no_offsets},
668         test_params {'t', 't', 1, 3000, 2000, 1.0f, 1.0f, 1, 2000, 3000,
669                 fix_no_offsets},
670 
671         test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1,
672                 {'F', true, false, false}},
673         test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000,
674                 {'F', true, true, false}},
675         test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1,
676                 {'F', false, true, false}},
677         test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000,
678                 {'F', true, false, true}},
679         test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1,
680                 {'F', false, true, true}},
681         test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000,
682                 {'F', true, true, false}},
683         test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1,
684                 {'F', true, false, false}},
685         test_params {'t', 't', 1, 3000, 2000, 1.0f, 1.0f, 1, 2000, 3000,
686                 {'F', false, true, false}});
687 
688 CPU_INST_TEST_CASE(TestGEMV_kblocking,
689         test_params {
690                 't', 'n', 20, 1, 7000, 1.0f, 0.0f, 20, 1, 500, fix_no_offsets},
691         test_params {'t', 't', 50, 1, 7000, 1.0f, 0.0f, 50, 7000, 500,
692                 fix_no_offsets},
693         test_params {'t', 'n', 400, 1, 7000, 1.0f, 0.0f, 400, 1, 500,
694                 fix_no_offsets},
695         test_params {'t', 't', 500, 1, 7000, 1.0f, 0.0f, 500, 7000, 500,
696                 fix_no_offsets},
697         test_params {
698                 't', 'n', 20, 1, 7000, 1.0f, 1.0f, 20, 1, 500, fix_no_offsets},
699         test_params {'t', 't', 50, 1, 7000, 1.0f, 1.0f, 50, 7000, 500,
700                 fix_no_offsets},
701         test_params {'t', 'n', 500, 1, 7000, 1.0f, 1.0f, 500, 1, 500,
702                 fix_no_offsets},
703         test_params {'t', 't', 500, 1, 7000, 1.0f, 1.0f, 500, 7000, 500,
704                 fix_no_offsets},
705 
706         test_params {'n', 'n', 1, 40, 7000, 1.0f, 0.0f, 7000, 40, 500,
707                 fix_no_offsets},
708         test_params {'t', 'n', 1, 10, 7000, 1.0f, 0.0f, 7000, 10, 10,
709                 fix_no_offsets},
710         test_params {'n', 'n', 1, 400, 7000, 1.0f, 0.0f, 7000, 400, 500,
711                 fix_no_offsets},
712         test_params {'t', 'n', 1, 100, 7000, 1.0f, 0.0f, 7000, 100, 500,
713                 fix_no_offsets},
714         test_params {'n', 'n', 1, 40, 7000, 1.0f, 1.0f, 7000, 40, 500,
715                 fix_no_offsets},
716         test_params {'t', 'n', 1, 10, 7000, 1.0f, 1.0f, 7000, 10, 500,
717                 fix_no_offsets},
718         test_params {'n', 'n', 1, 400, 7000, 1.0f, 1.0f, 7000, 400, 500,
719                 fix_no_offsets},
720         test_params {'t', 'n', 1, 550, 7000, 1.0f, 1.0f, 7000, 550, 550,
721                 fix_no_offsets});
722 
723 CPU_INST_TEST_CASE(TestGEMM_packed,
724         make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f,
725                 60, 50, 80, fix_use_oc),
726         make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f,
727                 60, 50, 80, fix_use_oc),
728         make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f,
729                 60, 50, 80, fix_use_oc),
730         make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f,
731                 60, 50, 80, fix_use_oc),
732 
733         make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f,
734                 60, 50, 80, fix_no_offsets),
735         make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f,
736                 60, 50, 80, fix_no_offsets),
737         make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f,
738                 60, 50, 80, fix_no_offsets),
739         make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f,
740                 60, 50, 80, fix_no_offsets),
741 
742         make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f,
743                 100, 100, 100, fix_use_oc),
744         make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f,
745                 100, 100, 100, fix_use_oc),
746         make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f,
747                 100, 100, 100, fix_use_oc),
748         make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f,
749                 100, 100, 100, fix_use_oc),
750         make_test_params_pack({true, false}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f,
751                 10000, 2, 2, fix_use_oc),
752 
753         make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f,
754                 100, 100, 100, row_use_oc),
755         make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f,
756                 100, 100, 100, row_use_oc),
757         make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f,
758                 100, 100, 100, row_use_oc),
759         make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f,
760                 100, 100, 100, row_use_oc),
761 
762         make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f,
763                 100, 100, 100, row_no_offsets),
764         make_test_params_pack({true, false}, 'n', 't', 100, 1, 100, 1.0f, 2.0f,
765                 100, 100, 100, row_no_offsets),
766         make_test_params_pack({true, true}, 't', 'n', 1, 100, 100, 1.0f, 2.0f,
767                 100, 100, 100, row_no_offsets),
768         make_test_params_pack({false, true}, 't', 't', 1, 100, 100, 1.0f, 2.0f,
769                 100, 100, 100, row_no_offsets),
770 
771         make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f,
772                 60, 50, 80, row_use_oc),
773         make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f,
774                 60, 50, 80, row_use_oc),
775         make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f,
776                 60, 50, 80, row_use_oc),
777         make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f,
778                 60, 50, 80, row_use_oc),
779         make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f,
780                 10000, 2, 2, row_use_oc),
781 
782         make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f,
783                 100, 100, 100, col_use_oc),
784         make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f,
785                 100, 100, 100, col_use_oc),
786         make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f,
787                 100, 100, 100, col_use_oc),
788         make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f,
789                 10000, 2, 2, col_use_oc),
790 
791         make_test_params_pack({true, false}, 'n', 't', 100, 1, 100, 1.0f, 2.0f,
792                 100, 100, 100, col_no_offsets),
793         make_test_params_pack({true, true}, 't', 'n', 1, 100, 100, 1.0f, 2.0f,
794                 100, 100, 100, col_no_offsets),
795         make_test_params_pack({false, true}, 't', 't', 1, 100, 100, 1.0f, 2.0f,
796                 100, 100, 100, col_no_offsets),
797         make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f,
798                 10000, 2, 2, col_no_offsets),
799 
800         make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f,
801                 60, 50, 80, col_use_oc),
802         make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f,
803                 60, 50, 80, col_use_oc),
804         make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f,
805                 60, 50, 80, col_use_oc),
806         make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f,
807                 60, 50, 80, col_use_oc),
808         make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f,
809                 10000, 2, 2, col_use_oc),
810 
811         make_test_params_pack({false, true}, 'N', 'n', 200, 1, 200, 1.0f, 1.0f,
812                 200, 200, 200, fix_no_offsets),
813         make_test_params_pack({true, false}, 't', 'N', 200, 1, 200, 1.0f, 0.0f,
814                 200, 200, 200, fix_no_offsets),
815         make_test_params_pack({true, true}, 'T', 'N', 1, 200, 200, 1.0f, 1.0f,
816                 1, 200, 200, fix_no_offsets),
817         make_test_params_pack({false, true}, 'n', 'T', 1, 200, 200, 1.0f, 0.0f,
818                 200, 200, 200, fix_no_offsets));
819 
820 CPU_INST_TEST_CASE(TestGEMM_heavy,
821         test_params {'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000,
822                 fix_use_oc},
823         test_params {'t', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000,
824                 fix_use_oc},
825         test_params {'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000,
826                 fix_use_oc},
827         test_params {'t', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000,
828                 fix_use_oc});
829 
830 CPU_INST_TEST_CASE(TestGEMM_packed_heavy,
831         make_test_params_pack({false, true}, 'n', 'n', 3000, 3000, 3000, 1.0f,
832                 0.0f, 3000, 3000, 3000, fix_use_oc),
833         make_test_params_pack({true, false}, 't', 'n', 3000, 3000, 3000, 1.0f,
834                 0.0f, 3000, 3000, 3000, fix_use_oc),
835         make_test_params_pack({true, true}, 'n', 't', 3000, 3000, 3000, 1.0f,
836                 0.0f, 3000, 3000, 3000, row_use_oc),
837         make_test_params_pack({true, true}, 't', 't', 3000, 3000, 3000, 1.0f,
838                 0.0f, 3000, 3000, 3000, row_use_oc),
839 
840         make_test_params_pack({true, true}, 'n', 'n', 2000, 5000, 2000, 1.0f,
841                 1.35f, 2000, 5000, 5000, col_use_oc),
842         make_test_params_pack({false, true}, 't', 'n', 2000, 5000, 2000, 1.0f,
843                 1.77f, 2000, 5000, 5000, col_use_oc),
844 
845         make_test_params_pack({false, true}, 'n', 'n', 200, 20000, 2000, 1.0f,
846                 2.0f, 2000, 20000, 20000, fix_use_oc),
847         make_test_params_pack({true, true}, 'n', 'n', 200, 20000, 2000, 1.0f,
848                 2.0f, 2000, 20000, 20000, row_use_oc),
849         make_test_params_pack({true, false}, 'n', 'n', 200, 20000, 2000, 1.0f,
850                 2.0f, 2000, 20000, 20000, col_use_oc),
851 
852         make_test_params_pack({true, true}, 'n', 'n', 5000, 100, 2000, 1.0f,
853                 2.0f, 2000, 100, 100, row_use_oc),
854         make_test_params_pack({false, true}, 't', 'n', 5000, 100, 2000, 1.0f,
855                 2.0f, 5000, 100, 100, col_use_oc),
856 
857         make_test_params_pack({true, false}, 'n', 'n', 150, 150, 8000, 1.0f,
858                 1.7f, 8000, 150, 150, fix_use_oc),
859         make_test_params_pack({true, true}, 'n', 't', 200, 200, 8000, 1.0f,
860                 3.0f, 8000, 8000, 200, row_use_oc),
861         make_test_params_pack({false, true}, 't', 'n', 200, 300, 8000, 1.0f,
862                 0.0f, 200, 300, 300, col_use_oc));
863 
864 #endif
865