Home
last modified time | relevance | path

Searched refs:CB_BLOCKS (Results 1 – 14 of 14) sorted by relevance

/dports/math/libxsmm/libxsmm-1.16.3/src/template/
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused.tpl.c27 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
29 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
32 blocks = CB_BLOCKS;
70 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
72 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
75 blocks = CB_BLOCKS;
113 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
115 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
118 blocks = CB_BLOCKS;
156 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
[all …]
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused.tpl.c27 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
29 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
32 blocks = CB_BLOCKS;
47 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
49 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
52 blocks = CB_BLOCKS;
67 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
69 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
72 blocks = CB_BLOCKS;
87 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
[all …]
H A Dlibxsmm_dnn_rnncell_st_gru_fwd_nc_kcck.tpl.c13 libxsmm_blasint j, ik, ikb, in, ic, icb, inik, BF, CB, CB_BLOCKS, KB_BLOCKS; variable
99 CB_BLOCKS = cBlocks/BF;
117 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
119 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
122 blocks = CB_BLOCKS;
142 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
144 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
147 blocks = CB_BLOCKS;
187 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
189 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
[all …]
H A Dlibxsmm_dnn_rnncell_st_gru_fwd_nc_ck_generic.tpl.c13 libxsmm_blasint j, ik, ikb, in, ic, icb, inik, BF, CB, CB_BLOCKS, KB_BLOCKS, ikic, jk, jc; variable
133 CB_BLOCKS = cBlocks/BF;
180 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
182 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
185 blocks = CB_BLOCKS;
205 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
207 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
210 blocks = CB_BLOCKS;
250 for (icb = 0, ic = 0; icb < CB_BLOCKS; ic += bc, icb++) {
252 B_array[icb] = &LIBXSMM_VLA_ACCESS(3, x, j, in, ic + CB*CB_BLOCKS*bc, N, C);
[all …]
H A Dlibxsmm_dnn_rnncell_st_rnn_fwd_nc_kcck.tpl.c13 libxsmm_blasint i, ik, in, ic, inik, BF, CB, CB_BLOCKS, KB_BLOCKS; variable
66 CB_BLOCKS = cBlocks/BF;
68 assert(CB_BLOCKS <= 1024);
94 for (ic = 0; ic < CB_BLOCKS; ic++) {
96 A_array[ic] = &LIBXSMM_VLA_ACCESS(4, w, ik, ic + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
97 B_array[ic] = &LIBXSMM_VLA_ACCESS(3, x, i, in*bn, (ic + CB*CB_BLOCKS)*bc, N, C);
100 blocks = CB_BLOCKS;
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused_bf16.tpl.c26 blocksa = CB_BLOCKS;
43 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wi, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
44 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
71 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wc, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
72 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
100 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
128 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
215 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
243 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
271 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
[all …]
H A Dlibxsmm_dnn_fullyconnected_st_fwd_ncnc_kcck_generic.tpl.c46 int CB_BLOCKS = nBlocksIFm, BF = 1; variable
49 CB_BLOCKS = nBlocksIFm/BF;
50 blocks = CB_BLOCKS;
89 …batchreduce_kernel_beta( &LIBXSMM_VLA_ACCESS(4, filter, ofm1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, ha…
90 … &LIBXSMM_VLA_ACCESS(4, input, mb1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, handle->bn, handle->bc),
172 …batchreduce_kernel_beta( &LIBXSMM_VLA_ACCESS(4, filter, ofm1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, ha…
173 … &LIBXSMM_VLA_ACCESS(4, input, mb1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, handle->bn, handle->bc),
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused_bf16.tpl.c27 blocks = CB_BLOCKS;
42 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wi, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
43 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
57 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wc, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
58 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
72 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wf, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
73 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
88 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wo, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
89 &LIBXSMM_VLA_ACCESS(3, x, j, in, CB*CB_BLOCKS*bc, N, C),
H A Dlibxsmm_dnn_fullyconnected_st_fwd_ncnc_kcck_generic_bf16.tpl.c53 int CB_BLOCKS = nBlocksIFm, BF = 1; variable
56 CB_BLOCKS = nBlocksIFm/BF;
57 blocks = CB_BLOCKS;
90 …batchreduce_kernel( &LIBXSMM_VLA_ACCESS(5, filter, ofm1, ifm1*CB_BLOCKS, 0, 0, 0, nBlocksIFm, bc_l…
91 … &LIBXSMM_VLA_ACCESS(4, input, mb1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, handle->bn, handle->bc),
241 …batchreduce_kernel( &LIBXSMM_VLA_ACCESS(5, filter, ofm1, ifm1*CB_BLOCKS, 0, 0, 0, nBlocksIFm, bc_l…
242 … &LIBXSMM_VLA_ACCESS(4, input, mb1, ifm1*CB_BLOCKS, 0, 0, nBlocksIFm, handle->bn, handle->bc),
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck.tpl.c16 libxsmm_blasint j, ik, ikb, in, ic, icb, inik, BF, CB, CB_BLOCKS, KB_BLOCKS; variable
120 CB_BLOCKS = cBlocks/BF;
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_bf16.tpl.c53 libxsmm_blasint j, ik, ikb, in, /*ic, icb,*/ inik, BF, CB, CB_BLOCKS, KB_BLOCKS; variable
196 CB_BLOCKS = cBlocks/BF;
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic.tpl.c16 libxsmm_blasint j, ik, ikb, in, ic, icb, inik, BF, CB, CB_BLOCKS, KB_BLOCKS, ikic, jk, jc; variable
155 CB_BLOCKS = cBlocks/BF;
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic_bf16.tpl.c53 libxsmm_blasint j, ik, ikb, in, ic, /*icb,*/ inik, BF, CB, CB_BLOCKS, KB_BLOCKS, ikic, jk, jc; variable
212 CB_BLOCKS = cBlocks/BF;
/dports/math/libxsmm/libxsmm-1.16.3/src/
H A Dlibxsmm_dnn_rnncell.c94 libxsmm_blasint BF, CB_BLOCKS, KB_BLOCKS; in libxsmm_dnn_create_rnncell() local
115 CB_BLOCKS = cBlocks/BF; in libxsmm_dnn_create_rnncell()
121 …m_bsmmdispatch_reducebatch_strd_unroll( bk, bn, bc, stride_a, stride_b, CB_BLOCKS, &bk, &C, &K, NU… in libxsmm_dnn_create_rnncell()