/dports/math/libxsmm/libxsmm-1.16.3/src/template/ |
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused.tpl.c | 27 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 29 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); 32 blocks = CB_BLOCKS; 70 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 72 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); 75 blocks = CB_BLOCKS; 113 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 115 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); 118 blocks = CB_BLOCKS; 156 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { [all …]
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused.tpl.c | 27 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 29 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); 32 blocks = CB_BLOCKS; 47 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 49 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); 52 blocks = CB_BLOCKS; 67 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 69 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); 72 blocks = CB_BLOCKS; 87 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { [all …]
|
H A D | libxsmm_dnn_rnncell_st_gru_fwd_nc_kcck.tpl.c | 13 libxsmm_blasint j, ik, ikb, in, ic, icb, inik, BF, CB, CB_BLOCKS, KB_BLOCKS; variable 99 CB_BLOCKS = cBlocks/BF; 117 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 119 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); 122 blocks = CB_BLOCKS; 142 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 144 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); 147 blocks = CB_BLOCKS; 187 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 189 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); [all …]
|
H A D | libxsmm_dnn_rnncell_st_gru_fwd_nc_ck_generic.tpl.c | 13 libxsmm_blasint j, ik, ikb, in, ic, icb, inik, BF, CB, CB_BLOCKS, KB_BLOCKS, ikic, jk, jc; variable 133 CB_BLOCKS = cBlocks/BF; 180 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 182 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); 185 blocks = CB_BLOCKS; 205 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 207 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); 210 blocks = CB_BLOCKS; 250 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) { 252 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C); [all …]
|
H A D | libxsmm_dnn_rnncell_st_rnn_fwd_nc_kcck.tpl.c | 13 libxsmm_blasint i, ik, in, ic, inik, BF, CB, CB_BLOCKS, KB_BLOCKS; variable 66 CB_BLOCKS = cBlocks/BF; 68 assert(CB_BLOCKS <= 1024); 94 for (ic = 0; ic < CB_BLOCKS; ic++) { 96 A_array[ic] = &LIBXSMM_VLA_ACCESS(4, w, ik, ic + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk); 97 B_array[ic] = &LIBXSMM_VLA_ACCESS(3, x, i, in*bn, (ic + CB*CB_BLOCKS)*bc, N, C); 100 blocks = CB_BLOCKS;
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused_bf16.tpl.c | 26 blocksa = CB_BLOCKS; 43 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wi, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 44 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C), 71 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wc, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 72 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C), 100 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C), 128 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C), 215 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C), 243 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C), 271 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C), [all …]
|
H A D | libxsmm_dnn_fullyconnected_st_fwd_ncnc_kcck_generic.tpl.c | 46 int CB_BLOCKS = nBlocksIFm, BF = 1; variable 49 CB_BLOCKS = nBlocksIFm/BF; 50 blocks = CB_BLOCKS; 89 …batchreduce_kernel_beta( &LIBXSMM_VLA_ACCESS(4, filter, ofm1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, ha… 90 … &LIBXSMM_VLA_ACCESS(4, input, mb1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, handle->bn, handle->bc), 172 …batchreduce_kernel_beta( &LIBXSMM_VLA_ACCESS(4, filter, ofm1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, ha… 173 … &LIBXSMM_VLA_ACCESS(4, input, mb1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, handle->bn, handle->bc),
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused_bf16.tpl.c | 27 blocks = CB_BLOCKS; 42 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wi, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 43 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C), 57 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wc, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 58 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C), 72 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wf, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 73 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C), 88 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wo, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 89 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
|
H A D | libxsmm_dnn_fullyconnected_st_fwd_ncnc_kcck_generic_bf16.tpl.c | 53 int CB_BLOCKS = nBlocksIFm, BF = 1; variable 56 CB_BLOCKS = nBlocksIFm/BF; 57 blocks = CB_BLOCKS; 90 …batchreduce_kernel( &LIBXSMM_VLA_ACCESS(5, filter, ofm1, ifm1*CB_BLOCKS, 0, 0, 0, nBlocksIFm, bc_l… 91 … &LIBXSMM_VLA_ACCESS(4, input, mb1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, handle->bn, handle->bc), 241 …batchreduce_kernel( &LIBXSMM_VLA_ACCESS(5, filter, ofm1, ifm1*CB_BLOCKS, 0, 0, 0, nBlocksIFm, bc_l… 242 … &LIBXSMM_VLA_ACCESS(4, input, mb1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, handle->bn, handle->bc),
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck.tpl.c | 16 libxsmm_blasint j, ik, ikb, in, ic, icb, inik, BF, CB, CB_BLOCKS, KB_BLOCKS; variable 120 CB_BLOCKS = cBlocks/BF;
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_bf16.tpl.c | 53 libxsmm_blasint j, ik, ikb, in, /*ic, icb,*/ inik, BF, CB, CB_BLOCKS, KB_BLOCKS; variable 196 CB_BLOCKS = cBlocks/BF;
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic.tpl.c | 16 libxsmm_blasint j, ik, ikb, in, ic, icb, inik, BF, CB, CB_BLOCKS, KB_BLOCKS, ikic, jk, jc; variable 155 CB_BLOCKS = cBlocks/BF;
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic_bf16.tpl.c | 53 libxsmm_blasint j, ik, ikb, in, ic, /*icb,*/ inik, BF, CB, CB_BLOCKS, KB_BLOCKS, ikic, jk, jc; variable 212 CB_BLOCKS = cBlocks/BF;
|
/dports/math/libxsmm/libxsmm-1.16.3/src/ |
H A D | libxsmm_dnn_rnncell.c | 94 libxsmm_blasint BF, CB_BLOCKS, KB_BLOCKS; in libxsmm_dnn_create_rnncell() local 115 CB_BLOCKS = cBlocks/BF; in libxsmm_dnn_create_rnncell() 121 …m_bsmmdispatch_reducebatch_strd_unroll( bk, bn, bc, stride_a, stride_b, CB_BLOCKS, &bk, &C, &K, NU… in libxsmm_dnn_create_rnncell()
|