1 /******************************************************************************* 2 * Copyright 2019-2021 Intel Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 *******************************************************************************/ 16 17 #if defined(FP16) || defined(FP32) || defined(F16F16F32) || defined(BF16BF16F32) 18 INST_TEST_CASE(TestGEMM, 19 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true, 20 dnnl_invalid_arguments}, 21 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true, 22 dnnl_invalid_arguments}, 23 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true, 24 dnnl_invalid_arguments}, 25 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true, 26 dnnl_invalid_arguments}, 27 28 test_params {'N', 'N', 1, 1, 1, 1.0, 0.0, 4, 4, 4}, 29 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, 30 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, 31 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, 32 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, 33 test_params {'N', 'n', 31, 21, 11, 2.0, 1.5, 61, 51, 81}, 34 test_params {'n', 'T', 31, 21, 11, 2.0, 1.5, 61, 51, 81}, 35 test_params {'T', 'N', 31, 21, 11, 2.0, 1.5, 61, 51, 81}, 36 test_params {'t', 't', 31, 21, 11, 2.0, 1.5, 61, 51, 81}, 37 test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100}, 38 test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100}, 39 test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100}, 40 test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100}, 41 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2}, 42 test_params {'t', 't', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2}, 43 44 make_test_params_with_offset( 45 {1, 2, 3}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 100, 100, 100), 46 make_test_params_with_offset( 47 {30, 20, 10}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100), 48 49 test_params {'n', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000}, 50 test_params {'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000}, 51 test_params {'t', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000}, 52 test_params {'t', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000}, 53 test_params {'n', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000}, 54 test_params {'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000}, 55 test_params {'t', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000}, 56 test_params {'t', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000}); 57 58 CPU_INST_TEST_CASE(TestGEMV, 59 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1, 1}, 60 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2000, 3000, 3000}, 61 test_params {'n', 'n', 1, 300, 8000, 1.0f, 0.0f, 8000, 300, 300}, 62 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1, 1}, 63 test_params {'t', 'n', 200, 1, 8000, 1.0f, 0.0f, 200, 1, 1}, 64 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 1, 3000, 3000}, 65 test_params {'t', 'n', 1, 300, 8000, 1.0f, 0.0f, 1, 300, 300}, 66 test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1000, 1}, 67 test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2000, 2000, 3000}, 68 test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1000, 1}, 69 test_params {'t', 't', 200, 1, 8000, 1.0f, 0.0f, 200, 8000, 1}, 70 test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 1, 2000, 3000}, 71 72 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 1, 30}, 73 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 20, 1}, 74 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 20, 30}, 75 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2010, 3010, 3010}, 76 test_params {'n', 'n', 1, 300, 8000, 1.0f, 0.0f, 8010, 310, 310}, 77 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2010, 20, 30}, 78 test_params {'t', 'n', 200, 1, 8000, 1.0f, 0.0f, 210, 20, 30}, 79 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 20, 3010, 3010}, 80 test_params {'t', 'n', 1, 300, 8000, 1.0f, 0.0f, 20, 310, 310}, 81 test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1010, 1010, 20}, 82 test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2010, 2010, 3010}, 83 test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2010, 1010, 20}, 84 test_params {'t', 't', 200, 1, 8000, 1.0f, 0.0f, 210, 8010, 20}, 85 test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 20, 2010, 3010}, 86 87 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1}, 88 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000}, 89 test_params {'n', 'n', 1, 300, 8000, 1.0f, 1.0f, 8000, 300, 300}, 90 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1}, 91 test_params {'t', 'n', 200, 1, 8000, 1.0f, 1.0f, 200, 1, 1}, 92 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000}, 93 test_params {'t', 'n', 1, 300, 8000, 1.0f, 1.0f, 1, 300, 300}, 94 test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1}, 95 test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000}, 96 test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1}, 97 test_params {'t', 't', 200, 1, 8000, 1.0f, 1.0f, 200, 8000, 1}, 98 test_params {'t', 't', 1, 3000, 4000, 1.0f, 1.0f, 1, 4000, 3000}); 99 100 /** 101 * These cases are used to test the small-N avx-512 sgemm TN kernels. 102 * Note: The kernels assume a column major layout while the external 103 * APIs assume row major layout, so the M/N and transA/transB values 104 * are swapped. 105 */ 106 CPU_INST_TEST_CASE(TestGEMM_smalln, 107 test_params {'n', 't', 5, 512, 512, 1.0f, 1.0f, 512, 512, 512}, 108 test_params {'n', 't', 5, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512}, 109 test_params {'n', 't', 5, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512}, 110 test_params {'n', 't', 5, 2048, 512, 1.0f, 1.0f, 512, 512, 2048}, 111 test_params {'n', 't', 7, 512, 512, 0.0f, 1.0f, 512, 512, 512}, 112 test_params {'n', 't', 7, 512, 1536, 1.0f, 0.0f, 1536, 1536, 512}, 113 test_params {'n', 't', 7, 512, 2048, 0.5f, 0.5f, 2048, 2048, 512}, 114 test_params {'n', 't', 7, 2048, 512, 1.0f, 1.0f, 512, 512, 2048}, 115 test_params {'n', 't', 4, 512, 512, 1.0f, 1.0f, 512, 512, 512}, 116 test_params {'n', 't', 4, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512}, 117 test_params {'n', 't', 4, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512}, 118 test_params {'n', 't', 4, 2048, 512, 1.0f, 1.0f, 512, 512, 2048}, 119 test_params {'n', 't', 8, 512, 512, 1.0f, 1.0f, 512, 512, 512}, 120 test_params {'n', 't', 8, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512}, 121 test_params {'n', 't', 8, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512}, 122 test_params {'n', 't', 8, 2048, 512, 1.0f, 1.0f, 512, 512, 2048}); 123 124 #if defined(FP32) || defined(BF16BF16F32) 125 INST_TEST_CASE(TestGEMM_packed, 126 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {false, true}, 127 true, dnnl_invalid_arguments}, 128 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {true, false}, 129 true, dnnl_invalid_arguments}, 130 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {true, true}, 131 true, dnnl_invalid_arguments}, 132 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {true, true}, 133 true, dnnl_invalid_arguments}, 134 135 make_test_params_pack( 136 {true, false}, 'N', 'n', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81), 137 make_test_params_pack( 138 {false, true}, 'n', 'T', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81), 139 make_test_params_pack( 140 {true, false}, 'T', 'N', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81), 141 make_test_params_pack( 142 {true, true}, 't', 't', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81), 143 make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 144 100, 100, 100), 145 make_test_params_pack( 146 {true, true}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100), 147 make_test_params_pack( 148 {true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, 10000, 2, 2), 149 make_test_params_pack( 150 {true, true}, 'n', 'n', 100, 1, 100, 1.0f, 2.0f, 100, 100, 100), 151 make_test_params_pack({true, false}, 'n', 'n', 1, 100, 100, 1.0f, 2.0f, 152 100, 100, 100), 153 make_test_params_pack({false, true}, 'n', 'n', 1, 100, 100, 1.0f, 2.0f, 154 100, 100, 100), 155 156 make_test_params_pack({true, false}, 'n', 'n', 3000, 3000, 3000, 1.0f, 157 2.0f, 3000, 3000, 3000), 158 make_test_params_pack({true, false}, 't', 'n', 3000, 3000, 3000, 1.0f, 159 0.0f, 3000, 3000, 3000), 160 make_test_params_pack({true, false}, 'n', 't', 3000, 3000, 3000, 1.0f, 161 1.0f, 3000, 3000, 3000), 162 make_test_params_pack({true, false}, 't', 't', 3000, 3000, 3000, 1.0f, 163 2.0f, 3000, 3000, 3000), 164 165 make_test_params_pack({false, true}, 'n', 'n', 200, 20000, 2000, 1.0f, 166 2.0f, 2000, 20000, 20000), 167 make_test_params_pack({false, true}, 'n', 'n', 2000, 2000, 2000, 1.0f, 168 2.0f, 2000, 2000, 2000), 169 make_test_params_pack({true, true}, 'n', 'n', 2000, 5000, 2000, 1.0f, 170 2.0f, 2000, 5000, 5000), 171 make_test_params_pack({true, true}, 'n', 'n', 5000, 100, 2000, 1.0f, 172 2.0f, 2000, 100, 100), 173 make_test_params_pack({false, true}, 't', 'n', 2000, 2000, 2000, 1.0f, 174 0.0f, 2000, 2000, 2000), 175 make_test_params_pack({false, true}, 't', 'n', 2000, 5000, 2000, 1.0f, 176 2.0f, 2000, 5000, 5000), 177 make_test_params_pack({false, true}, 't', 'n', 5000, 100, 2000, 1.0f, 178 2.0f, 5000, 100, 100), 179 make_test_params_pack({false, true}, 'n', 't', 2000, 2000, 2000, 1.0f, 180 1.0f, 2000, 2000, 2000), 181 make_test_params_pack({false, true}, 't', 't', 2000, 2000, 2000, 1.0f, 182 2.0f, 2000, 2000, 2000), 183 make_test_params_pack({true, true}, 't', 't', 2000, 5000, 2000, 1.0f, 184 2.0f, 2000, 2000, 5000), 185 make_test_params_pack({true, true}, 't', 't', 5000, 100, 2000, 1.0f, 186 2.0f, 5000, 2000, 100), 187 188 make_test_params_pack({true, false}, 'n', 'n', 150, 150, 8000, 1.0f, 189 3.0f, 8000, 150, 150), 190 make_test_params_pack({true, true}, 'n', 't', 200, 200, 8000, 1.0f, 191 3.0f, 8000, 8000, 200), 192 make_test_params_pack({false, true}, 't', 'n', 200, 300, 8000, 1.0f, 193 3.0f, 200, 300, 300)); 194 #endif 195 196 #elif defined(BF16BF16BF16) 197 198 INST_TEST_CASE(TestGEMM, 199 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true, 200 dnnl_invalid_arguments}, 201 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true, 202 dnnl_invalid_arguments}, 203 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true, 204 dnnl_invalid_arguments}, 205 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true, 206 dnnl_invalid_arguments}, 207 208 test_params {'N', 'N', 1, 1, 1, 1.0, 0.0, 4, 4, 4}, 209 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, 210 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, 211 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, 212 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, 213 test_params {'N', 'n', 31, 21, 11, 2.5, 1.5, 61, 51, 81}, 214 test_params {'n', 'T', 31, 21, 11, 2.5, 1.5, 61, 51, 81}, 215 test_params {'T', 'N', 31, 21, 11, 2.5, 1.5, 61, 51, 81}, 216 test_params {'t', 't', 31, 21, 11, 2.5, 1.5, 61, 51, 81}, 217 test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100}, 218 test_params {'n', 't', 100, 2, 58, 1.0, 2.0, 100, 100, 100}, 219 test_params {'t', 'n', 2, 100, 61, 1.0, 2.0, 100, 100, 100}, 220 test_params {'t', 't', 2, 100, 60, 1.0, 2.0, 100, 100, 100}, 221 test_params {'n', 'n', 2, 2, 11, 1.0, -1.0, 20, 2, 2}, 222 test_params {'t', 't', 2, 2, 11, 1.0, -1.0, 2, 20, 2}, 223 224 make_test_params_with_offset( 225 {1, 2, 3}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 100, 100, 100), 226 make_test_params_with_offset( 227 {30, 20, 10}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100), 228 229 test_params {'n', 'n', 2000, 2000, 20, 1.0, 0.0, 20, 2000, 2000}, 230 test_params {'n', 'n', 3000, 3000, 30, 1.0, 0.0, 30, 3000, 3000}, 231 test_params {'t', 'n', 2000, 2000, 20, 1.0, 0.0, 2000, 2000, 2000}, 232 test_params {'t', 'n', 3000, 3000, 30, 1.0, 0.0, 3000, 3000, 3000}, 233 test_params {'n', 't', 2000, 2000, 20, 1.0, 0.0, 20, 20, 2000}, 234 test_params {'n', 't', 3000, 3000, 30, 1.0, 0.0, 30, 30, 3000}, 235 test_params {'t', 't', 2000, 2000, 20, 1.0, 0.0, 2000, 20, 2000}, 236 test_params {'t', 't', 3000, 3000, 30, 1.0, 0.0, 3000, 30, 3000}); 237 238 #else 239 constexpr test_igemm_params fix_use_oc = {'F', false, false, true}; 240 constexpr test_igemm_params col_use_oc = {'C', false, false, true}; 241 constexpr test_igemm_params row_use_oc = {'R', false, false, true}; 242 243 constexpr test_igemm_params fix_use_all_offsets = {'F', true, true, true}; 244 constexpr test_igemm_params col_use_all_offsets = {'C', true, true, true}; 245 constexpr test_igemm_params row_use_all_offsets = {'R', true, true, true}; 246 247 constexpr test_igemm_params fix_no_offsets = {'F', false, false, false}; 248 constexpr test_igemm_params col_no_offsets = {'C', false, false, false}; 249 constexpr test_igemm_params row_no_offsets = {'R', false, false, false}; 250 251 INST_TEST_CASE(TestGEMM_expected_failures, 252 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true, 253 dnnl_invalid_arguments}, 254 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true, 255 dnnl_invalid_arguments}, 256 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true, 257 dnnl_invalid_arguments}, 258 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true, 259 dnnl_invalid_arguments}, 260 261 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, fix_use_oc, {}, true, 262 dnnl_invalid_arguments}, 263 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, fix_use_oc, {}, true, 264 dnnl_invalid_arguments}, 265 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, fix_use_oc, {}, true, 266 dnnl_invalid_arguments}, 267 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, fix_use_oc, {}, true, 268 dnnl_invalid_arguments}, 269 270 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, fix_use_all_offsets, 271 {}, true, dnnl_invalid_arguments}, 272 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, fix_use_all_offsets, 273 {}, true, dnnl_invalid_arguments}, 274 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, fix_use_all_offsets, 275 {}, true, dnnl_invalid_arguments}, 276 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, fix_use_all_offsets, 277 {}, true, dnnl_invalid_arguments}, 278 279 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {true, true}, 280 true, dnnl_invalid_arguments}, 281 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {false, true}, 282 true, dnnl_invalid_arguments}, 283 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {true, false}, 284 true, dnnl_invalid_arguments}, 285 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {false, true}, 286 true, dnnl_invalid_arguments}); 287 288 INST_TEST_CASE(TestGEMM_general_cases_fix_offset, 289 test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc}, 290 test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc}, 291 test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc}, 292 test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc}, 293 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc}, 294 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc}, 295 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc}, 296 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc}, 297 test_params { 298 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, fix_use_oc}, 299 test_params { 300 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc}, 301 test_params { 302 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc}, 303 test_params { 304 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc}, 305 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, fix_use_oc}, 306 307 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 308 fix_use_all_offsets}, 309 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 310 fix_use_all_offsets}, 311 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 312 fix_use_all_offsets}, 313 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 314 fix_use_all_offsets}, 315 test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, 316 fix_use_all_offsets}, 317 test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, 318 fix_use_all_offsets}, 319 test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, 320 fix_use_all_offsets}, 321 test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, 322 fix_use_all_offsets}, 323 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, 324 fix_use_all_offsets}, 325 326 test_params { 327 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets}, 328 test_params { 329 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets}, 330 test_params { 331 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets}, 332 test_params { 333 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets}, 334 test_params { 335 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, fix_no_offsets}, 336 test_params { 337 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets}, 338 test_params { 339 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets}, 340 test_params { 341 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets}, 342 test_params { 343 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, fix_no_offsets}); 344 345 INST_TEST_CASE(TestGEMM_general_cases_col_offset, 346 test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc}, 347 test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc}, 348 test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc}, 349 test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc}, 350 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc}, 351 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc}, 352 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc}, 353 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc}, 354 test_params { 355 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, col_use_oc}, 356 test_params { 357 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, col_use_oc}, 358 test_params { 359 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_oc}, 360 test_params { 361 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_oc}, 362 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, col_use_oc}, 363 364 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 365 col_use_all_offsets}, 366 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 367 col_use_all_offsets}, 368 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 369 col_use_all_offsets}, 370 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 371 col_use_all_offsets}, 372 test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, 373 col_use_all_offsets}, 374 test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, 375 col_use_all_offsets}, 376 test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, 377 col_use_all_offsets}, 378 test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, 379 col_use_all_offsets}, 380 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, 381 col_use_all_offsets}, 382 383 test_params { 384 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets}, 385 test_params { 386 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets}, 387 test_params { 388 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets}, 389 test_params { 390 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets}, 391 test_params { 392 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, col_no_offsets}, 393 test_params { 394 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets}, 395 test_params { 396 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets}, 397 test_params { 398 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets}, 399 test_params { 400 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, col_no_offsets}); 401 402 INST_TEST_CASE(TestGEMM_general_cases_row_offset, 403 test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc}, 404 test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc}, 405 test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc}, 406 test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc}, 407 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc}, 408 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc}, 409 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc}, 410 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc}, 411 test_params { 412 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, row_use_oc}, 413 test_params { 414 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, row_use_oc}, 415 test_params { 416 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_oc}, 417 test_params { 418 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_oc}, 419 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, row_use_oc}, 420 421 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 422 row_use_all_offsets}, 423 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 424 row_use_all_offsets}, 425 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 426 row_use_all_offsets}, 427 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, 428 row_use_all_offsets}, 429 test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, 430 row_use_all_offsets}, 431 test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, 432 row_use_all_offsets}, 433 test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, 434 row_use_all_offsets}, 435 test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, 436 row_use_all_offsets}, 437 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, 438 row_use_all_offsets}, 439 440 test_params { 441 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets}, 442 test_params { 443 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets}, 444 test_params { 445 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets}, 446 test_params { 447 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets}, 448 test_params { 449 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, row_no_offsets}, 450 test_params { 451 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets}, 452 test_params { 453 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets}, 454 test_params { 455 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets}, 456 test_params { 457 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, row_no_offsets}); 458 459 CPU_INST_TEST_CASE(TestGEMM_fractional_scales_fix_offset, 460 /* alpha and beta have non-zero fractional part */ 461 test_params { 462 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, fix_use_oc}, 463 test_params { 464 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, fix_use_oc}, 465 test_params { 466 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, fix_use_oc}, 467 test_params { 468 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, fix_use_oc}, 469 test_params { 470 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, fix_use_oc}, 471 test_params { 472 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, fix_use_oc}, 473 test_params { 474 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, fix_use_oc}, 475 test_params { 476 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, fix_use_oc}, 477 test_params { 478 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, fix_use_oc}, 479 480 test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, 481 fix_use_all_offsets}, 482 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, 483 fix_use_all_offsets}, 484 test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, 485 fix_use_all_offsets}, 486 test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, 487 fix_use_all_offsets}, 488 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, 489 fix_use_all_offsets}, 490 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, 491 fix_use_all_offsets}, 492 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, 493 fix_use_all_offsets}, 494 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, 495 fix_use_all_offsets}, 496 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, 497 fix_use_all_offsets}, 498 499 test_params { 500 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, fix_no_offsets}, 501 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, 502 fix_no_offsets}, 503 test_params { 504 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, fix_no_offsets}, 505 test_params { 506 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, fix_no_offsets}, 507 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, 508 fix_no_offsets}, 509 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, 510 fix_no_offsets}, 511 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, 512 fix_no_offsets}, 513 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, 514 fix_no_offsets}, 515 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, 516 fix_no_offsets}); 517 518 CPU_INST_TEST_CASE(TestGEMM_fractional_scales_col_offset, 519 /* alpha and beta have non-zero fractional part */ 520 test_params { 521 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, col_use_oc}, 522 test_params { 523 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, col_use_oc}, 524 test_params { 525 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, col_use_oc}, 526 test_params { 527 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, col_use_oc}, 528 test_params { 529 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, col_use_oc}, 530 test_params { 531 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, col_use_oc}, 532 test_params { 533 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, col_use_oc}, 534 test_params { 535 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, col_use_oc}, 536 test_params { 537 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, col_use_oc}, 538 539 test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, 540 col_use_all_offsets}, 541 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, 542 col_use_all_offsets}, 543 test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, 544 col_use_all_offsets}, 545 test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, 546 col_use_all_offsets}, 547 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, 548 col_use_all_offsets}, 549 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, 550 col_use_all_offsets}, 551 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, 552 col_use_all_offsets}, 553 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, 554 col_use_all_offsets}, 555 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, 556 col_use_all_offsets}, 557 558 test_params { 559 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, col_no_offsets}, 560 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, 561 col_no_offsets}, 562 test_params { 563 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, col_no_offsets}, 564 test_params { 565 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, col_no_offsets}, 566 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, 567 col_no_offsets}, 568 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, 569 col_no_offsets}, 570 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, 571 col_no_offsets}, 572 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, 573 col_no_offsets}, 574 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, 575 col_no_offsets}); 576 577 CPU_INST_TEST_CASE(TestGEMM_fractional_scales_row_offset, 578 /* alpha and beta have non-zero fractional part */ 579 test_params { 580 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, row_use_oc}, 581 test_params { 582 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, row_use_oc}, 583 test_params { 584 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, row_use_oc}, 585 test_params { 586 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, row_use_oc}, 587 test_params { 588 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, row_use_oc}, 589 test_params { 590 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, row_use_oc}, 591 test_params { 592 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, row_use_oc}, 593 test_params { 594 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, row_use_oc}, 595 test_params { 596 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, row_use_oc}, 597 598 test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, 599 row_use_all_offsets}, 600 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, 601 row_use_all_offsets}, 602 test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, 603 row_use_all_offsets}, 604 test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, 605 row_use_all_offsets}, 606 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, 607 row_use_all_offsets}, 608 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, 609 row_use_all_offsets}, 610 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, 611 row_use_all_offsets}, 612 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, 613 row_use_all_offsets}, 614 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, 615 row_use_all_offsets}, 616 617 test_params { 618 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, row_no_offsets}, 619 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, 620 row_no_offsets}, 621 test_params { 622 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, row_no_offsets}, 623 test_params { 624 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, row_no_offsets}, 625 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, 626 row_no_offsets}, 627 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, 628 row_no_offsets}, 629 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, 630 row_no_offsets}, 631 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, 632 row_no_offsets}, 633 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, 634 row_no_offsets}); 635 636 CPU_INST_TEST_CASE(TestGEMV, 637 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1, 1, 638 fix_no_offsets}, 639 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2000, 3000, 3000, 640 fix_no_offsets}, 641 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1, 1, 642 fix_no_offsets}, 643 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 1, 3000, 3000, 644 fix_no_offsets}, 645 test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1000, 1, 646 fix_no_offsets}, 647 test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2000, 2000, 3000, 648 fix_no_offsets}, 649 test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1000, 1, 650 fix_no_offsets}, 651 test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 1, 2000, 3000, 652 fix_no_offsets}, 653 654 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1, 655 fix_no_offsets}, 656 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000, 657 fix_no_offsets}, 658 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1, 659 fix_no_offsets}, 660 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000, 661 fix_no_offsets}, 662 test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1, 663 fix_no_offsets}, 664 test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000, 665 fix_no_offsets}, 666 test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1, 667 fix_no_offsets}, 668 test_params {'t', 't', 1, 3000, 2000, 1.0f, 1.0f, 1, 2000, 3000, 669 fix_no_offsets}, 670 671 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1, 672 {'F', true, false, false}}, 673 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000, 674 {'F', true, true, false}}, 675 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1, 676 {'F', false, true, false}}, 677 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000, 678 {'F', true, false, true}}, 679 test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1, 680 {'F', false, true, true}}, 681 test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000, 682 {'F', true, true, false}}, 683 test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1, 684 {'F', true, false, false}}, 685 test_params {'t', 't', 1, 3000, 2000, 1.0f, 1.0f, 1, 2000, 3000, 686 {'F', false, true, false}}); 687 688 CPU_INST_TEST_CASE(TestGEMV_kblocking, 689 test_params { 690 't', 'n', 20, 1, 7000, 1.0f, 0.0f, 20, 1, 500, fix_no_offsets}, 691 test_params {'t', 't', 50, 1, 7000, 1.0f, 0.0f, 50, 7000, 500, 692 fix_no_offsets}, 693 test_params {'t', 'n', 400, 1, 7000, 1.0f, 0.0f, 400, 1, 500, 694 fix_no_offsets}, 695 test_params {'t', 't', 500, 1, 7000, 1.0f, 0.0f, 500, 7000, 500, 696 fix_no_offsets}, 697 test_params { 698 't', 'n', 20, 1, 7000, 1.0f, 1.0f, 20, 1, 500, fix_no_offsets}, 699 test_params {'t', 't', 50, 1, 7000, 1.0f, 1.0f, 50, 7000, 500, 700 fix_no_offsets}, 701 test_params {'t', 'n', 500, 1, 7000, 1.0f, 1.0f, 500, 1, 500, 702 fix_no_offsets}, 703 test_params {'t', 't', 500, 1, 7000, 1.0f, 1.0f, 500, 7000, 500, 704 fix_no_offsets}, 705 706 test_params {'n', 'n', 1, 40, 7000, 1.0f, 0.0f, 7000, 40, 500, 707 fix_no_offsets}, 708 test_params {'t', 'n', 1, 10, 7000, 1.0f, 0.0f, 7000, 10, 10, 709 fix_no_offsets}, 710 test_params {'n', 'n', 1, 400, 7000, 1.0f, 0.0f, 7000, 400, 500, 711 fix_no_offsets}, 712 test_params {'t', 'n', 1, 100, 7000, 1.0f, 0.0f, 7000, 100, 500, 713 fix_no_offsets}, 714 test_params {'n', 'n', 1, 40, 7000, 1.0f, 1.0f, 7000, 40, 500, 715 fix_no_offsets}, 716 test_params {'t', 'n', 1, 10, 7000, 1.0f, 1.0f, 7000, 10, 500, 717 fix_no_offsets}, 718 test_params {'n', 'n', 1, 400, 7000, 1.0f, 1.0f, 7000, 400, 500, 719 fix_no_offsets}, 720 test_params {'t', 'n', 1, 550, 7000, 1.0f, 1.0f, 7000, 550, 550, 721 fix_no_offsets}); 722 723 CPU_INST_TEST_CASE(TestGEMM_packed, 724 make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f, 725 60, 50, 80, fix_use_oc), 726 make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f, 727 60, 50, 80, fix_use_oc), 728 make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f, 729 60, 50, 80, fix_use_oc), 730 make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f, 731 60, 50, 80, fix_use_oc), 732 733 make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f, 734 60, 50, 80, fix_no_offsets), 735 make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f, 736 60, 50, 80, fix_no_offsets), 737 make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f, 738 60, 50, 80, fix_no_offsets), 739 make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f, 740 60, 50, 80, fix_no_offsets), 741 742 make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 743 100, 100, 100, fix_use_oc), 744 make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 745 100, 100, 100, fix_use_oc), 746 make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f, 747 100, 100, 100, fix_use_oc), 748 make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f, 749 100, 100, 100, fix_use_oc), 750 make_test_params_pack({true, false}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, 751 10000, 2, 2, fix_use_oc), 752 753 make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 754 100, 100, 100, row_use_oc), 755 make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 756 100, 100, 100, row_use_oc), 757 make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f, 758 100, 100, 100, row_use_oc), 759 make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f, 760 100, 100, 100, row_use_oc), 761 762 make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 763 100, 100, 100, row_no_offsets), 764 make_test_params_pack({true, false}, 'n', 't', 100, 1, 100, 1.0f, 2.0f, 765 100, 100, 100, row_no_offsets), 766 make_test_params_pack({true, true}, 't', 'n', 1, 100, 100, 1.0f, 2.0f, 767 100, 100, 100, row_no_offsets), 768 make_test_params_pack({false, true}, 't', 't', 1, 100, 100, 1.0f, 2.0f, 769 100, 100, 100, row_no_offsets), 770 771 make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f, 772 60, 50, 80, row_use_oc), 773 make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f, 774 60, 50, 80, row_use_oc), 775 make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f, 776 60, 50, 80, row_use_oc), 777 make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f, 778 60, 50, 80, row_use_oc), 779 make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, 780 10000, 2, 2, row_use_oc), 781 782 make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 783 100, 100, 100, col_use_oc), 784 make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f, 785 100, 100, 100, col_use_oc), 786 make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f, 787 100, 100, 100, col_use_oc), 788 make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, 789 10000, 2, 2, col_use_oc), 790 791 make_test_params_pack({true, false}, 'n', 't', 100, 1, 100, 1.0f, 2.0f, 792 100, 100, 100, col_no_offsets), 793 make_test_params_pack({true, true}, 't', 'n', 1, 100, 100, 1.0f, 2.0f, 794 100, 100, 100, col_no_offsets), 795 make_test_params_pack({false, true}, 't', 't', 1, 100, 100, 1.0f, 2.0f, 796 100, 100, 100, col_no_offsets), 797 make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, 798 10000, 2, 2, col_no_offsets), 799 800 make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f, 801 60, 50, 80, col_use_oc), 802 make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f, 803 60, 50, 80, col_use_oc), 804 make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f, 805 60, 50, 80, col_use_oc), 806 make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f, 807 60, 50, 80, col_use_oc), 808 make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, 809 10000, 2, 2, col_use_oc), 810 811 make_test_params_pack({false, true}, 'N', 'n', 200, 1, 200, 1.0f, 1.0f, 812 200, 200, 200, fix_no_offsets), 813 make_test_params_pack({true, false}, 't', 'N', 200, 1, 200, 1.0f, 0.0f, 814 200, 200, 200, fix_no_offsets), 815 make_test_params_pack({true, true}, 'T', 'N', 1, 200, 200, 1.0f, 1.0f, 816 1, 200, 200, fix_no_offsets), 817 make_test_params_pack({false, true}, 'n', 'T', 1, 200, 200, 1.0f, 0.0f, 818 200, 200, 200, fix_no_offsets)); 819 820 CPU_INST_TEST_CASE(TestGEMM_heavy, 821 test_params {'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, 822 fix_use_oc}, 823 test_params {'t', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, 824 fix_use_oc}, 825 test_params {'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, 826 fix_use_oc}, 827 test_params {'t', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, 828 fix_use_oc}); 829 830 CPU_INST_TEST_CASE(TestGEMM_packed_heavy, 831 make_test_params_pack({false, true}, 'n', 'n', 3000, 3000, 3000, 1.0f, 832 0.0f, 3000, 3000, 3000, fix_use_oc), 833 make_test_params_pack({true, false}, 't', 'n', 3000, 3000, 3000, 1.0f, 834 0.0f, 3000, 3000, 3000, fix_use_oc), 835 make_test_params_pack({true, true}, 'n', 't', 3000, 3000, 3000, 1.0f, 836 0.0f, 3000, 3000, 3000, row_use_oc), 837 make_test_params_pack({true, true}, 't', 't', 3000, 3000, 3000, 1.0f, 838 0.0f, 3000, 3000, 3000, row_use_oc), 839 840 make_test_params_pack({true, true}, 'n', 'n', 2000, 5000, 2000, 1.0f, 841 1.35f, 2000, 5000, 5000, col_use_oc), 842 make_test_params_pack({false, true}, 't', 'n', 2000, 5000, 2000, 1.0f, 843 1.77f, 2000, 5000, 5000, col_use_oc), 844 845 make_test_params_pack({false, true}, 'n', 'n', 200, 20000, 2000, 1.0f, 846 2.0f, 2000, 20000, 20000, fix_use_oc), 847 make_test_params_pack({true, true}, 'n', 'n', 200, 20000, 2000, 1.0f, 848 2.0f, 2000, 20000, 20000, row_use_oc), 849 make_test_params_pack({true, false}, 'n', 'n', 200, 20000, 2000, 1.0f, 850 2.0f, 2000, 20000, 20000, col_use_oc), 851 852 make_test_params_pack({true, true}, 'n', 'n', 5000, 100, 2000, 1.0f, 853 2.0f, 2000, 100, 100, row_use_oc), 854 make_test_params_pack({false, true}, 't', 'n', 5000, 100, 2000, 1.0f, 855 2.0f, 5000, 100, 100, col_use_oc), 856 857 make_test_params_pack({true, false}, 'n', 'n', 150, 150, 8000, 1.0f, 858 1.7f, 8000, 150, 150, fix_use_oc), 859 make_test_params_pack({true, true}, 'n', 't', 200, 200, 8000, 1.0f, 860 3.0f, 8000, 8000, 200, row_use_oc), 861 make_test_params_pack({false, true}, 't', 'n', 200, 300, 8000, 1.0f, 862 0.0f, 200, 300, 300, col_use_oc)); 863 864 #endif 865