/dports/math/libxsmm/libxsmm-1.16.3/src/template/ |
H A D | libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_bf16.tpl.c | 25 const libxsmm_blasint cBlocks = C/bc; variable 121 LIBXSMM_VLA_DECL(5, element_filter_type, wi, wiD, cBlocks, bc_lp, bk, lpb); 122 LIBXSMM_VLA_DECL(5, element_filter_type, wc, wcD, cBlocks, bc_lp, bk, lpb); 123 LIBXSMM_VLA_DECL(5, element_filter_type, wf, wfD, cBlocks, bc_lp, bk, lpb); 124 LIBXSMM_VLA_DECL(5, element_filter_type, wo, woD, cBlocks, bc_lp, bk, lpb); 142 LIBXSMM_VLA_DECL(4, float, dwi, dwiD_scratch, cBlocks, bc, bk); 143 LIBXSMM_VLA_DECL(4, float, dwf, dwfD_scratch, cBlocks, bc, bk); 144 LIBXSMM_VLA_DECL(4, float, dwo, dwoD_scratch, cBlocks, bc, bk); 145 LIBXSMM_VLA_DECL(4, float, dwc, dwcD_scratch, cBlocks, bc, bk); 334 tmp.f = LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, jc, jk, cBlocks, bc, bk); [all …]
|
H A D | libxsmm_dnn_rnncell_st_rnn_fwd_ncnc_kcck.tpl.c | 31 libxsmm_blasint cBlocks = C/bc; variable 34 LIBXSMM_VLA_DECL(5, element_input_type, x, xt, nBlocks, cBlocks, bn, bc); 36 LIBXSMM_VLA_DECL(4, element_filter_type, w, wD, cBlocks, bc, bk); 92 for (ic = 0; ic < cBlocks; ic++) { 94 A_array[ii][jj][ic] = &LIBXSMM_VLA_ACCESS(4, w, ik, ic, 0, 0, cBlocks, bc, bk); 118 libxsmm_blasint total_blocks = in_tasks_per_thread*ik_tasks_per_thread*cBlocks; 148 blocks = cBlocks; 174 assert(cBlocks <= 1024); 191 for (ic = 0; ic < cBlocks; ic++) { 193 A_array[ic] = &LIBXSMM_VLA_ACCESS(4, w, ik, ic, 0, 0, cBlocks, bc, bk); [all …]
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck.tpl.c | 25 const libxsmm_blasint cBlocks = C/bc; variable 58 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD, cBlocks, bc, bk); 59 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD, cBlocks, bc, bk); 60 LIBXSMM_VLA_DECL(4, element_filter_type, wo, woD, cBlocks, bc, bk); 61 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD, cBlocks, bc, bk); 105 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 111 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 120 CB_BLOCKS = cBlocks/BF;
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic.tpl.c | 25 const libxsmm_blasint cBlocks = C/bc; variable 68 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD_scratch, cBlocks, bc, bk); 69 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD_scratch, cBlocks, bc, bk); 70 LIBXSMM_VLA_DECL(4, element_filter_type, wo, woD_scratch, cBlocks, bc, bk); 71 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD_scratch, cBlocks, bc, bk); 140 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 146 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 155 CB_BLOCKS = cBlocks/BF; 168 …LIBXSMM_VLA_ACCESS(4, wi, ik, ic, jc, jk, cBlocks, bc, bk) = LIBXSMM_VLA_ACCESS(2, wi_ck, ic*bc+j… 169 …LIBXSMM_VLA_ACCESS(4, wc, ik, ic, jc, jk, cBlocks, bc, bk) = LIBXSMM_VLA_ACCESS(2, wc_ck, ic*bc+j… [all …]
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic_bf16.tpl.c | 62 const libxsmm_blasint cBlocks = C/bc; variable 120 LIBXSMM_VLA_DECL(5, element_filter_type, wi, wiD_scratch, cBlocks, bc_lp, bk, lpb); 121 LIBXSMM_VLA_DECL(5, element_filter_type, wf, wfD_scratch, cBlocks, bc_lp, bk, lpb); 122 LIBXSMM_VLA_DECL(5, element_filter_type, wo, woD_scratch, cBlocks, bc_lp, bk, lpb); 123 LIBXSMM_VLA_DECL(5, element_filter_type, wc, wcD_scratch, cBlocks, bc_lp, bk, lpb); 197 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 203 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 212 CB_BLOCKS = cBlocks/BF; 225 …LIBXSMM_VLA_ACCESS(5, wi, ik, ic, jc/lpb, jk, jc%lpb, cBlocks, bc_lp, bk, lpb) = LIBXSMM_VLA_ACCE… 226 …LIBXSMM_VLA_ACCESS(5, wc, ik, ic, jc/lpb, jk, jc%lpb, cBlocks, bc_lp, bk, lpb) = LIBXSMM_VLA_ACCE… [all …]
|
H A D | libxsmm_dnn_rnncell_st_gru_fwd_nc_ck_generic.tpl.c | 23 const libxsmm_blasint cBlocks = C/bc; variable 57 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD_scratch, cBlocks, bc, bk); 58 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD_scratch, cBlocks, bc, bk); 59 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD_scratch, cBlocks, bc, bk); 118 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 124 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 133 CB_BLOCKS = cBlocks/BF; 143 …LIBXSMM_VLA_ACCESS(4, wi, ik, ic, jc, jk, cBlocks, bc, bk) = LIBXSMM_VLA_ACCESS(2, wi_ck, ic*bc+j… 181 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wi, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk); 206 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wc, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk); [all …]
|
H A D | libxsmm_dnn_rnncell_st_gru_fwd_nc_kcck.tpl.c | 22 const libxsmm_blasint cBlocks = C/bc; variable 48 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD, cBlocks, bc, bk); 49 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD, cBlocks, bc, bk); 50 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD, cBlocks, bc, bk); 84 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 90 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 99 CB_BLOCKS = cBlocks/BF; 118 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wi, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk); 143 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wc, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk); 188 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wf, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
|
H A D | libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck.tpl.c | 25 const libxsmm_blasint cBlocks = C/bc; variable 100 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD, cBlocks, bc, bk); 101 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD, cBlocks, bc, bk); 102 LIBXSMM_VLA_DECL(4, element_filter_type, wo, woD, cBlocks, bc, bk); 103 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD, cBlocks, bc, bk); 119 LIBXSMM_VLA_DECL(4, element_filter_type, dwi, dwiD, cBlocks, bc, bk); 120 LIBXSMM_VLA_DECL(4, element_filter_type, dwf, dwfD, cBlocks, bc, bk); 121 LIBXSMM_VLA_DECL(4, element_filter_type, dwo, dwoD, cBlocks, bc, bk); 122 LIBXSMM_VLA_DECL(4, element_filter_type, dwc, dwcD, cBlocks, bc, bk); 261 …iT, ic, ik, jk, jc, kBlocks, bk, bc) = LIBXSMM_VLA_ACCESS(4, wi, ik, ic, jc, jk, cBlocks, bc, bk); [all …]
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_bf16.tpl.c | 62 const libxsmm_blasint cBlocks = C/bc; variable 112 LIBXSMM_VLA_DECL(5, element_filter_type, wi, wiD, cBlocks, bc_lp, bk, lpb); 113 LIBXSMM_VLA_DECL(5, element_filter_type, wf, wfD, cBlocks, bc_lp, bk, lpb); 114 LIBXSMM_VLA_DECL(5, element_filter_type, wo, woD, cBlocks, bc_lp, bk, lpb); 115 LIBXSMM_VLA_DECL(5, element_filter_type, wc, wcD, cBlocks, bc_lp, bk, lpb); 181 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 187 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) { 196 CB_BLOCKS = cBlocks/BF;
|
H A D | libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_core_bf16.tpl.c | 216 &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks); 224 &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks); 232 &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks); 240 &LIBXSMM_VLA_ACCESS(4, dwo, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks); 275 &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks); 279 &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks); 283 &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks); 287 &LIBXSMM_VLA_ACCESS(4, dwo, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks);
|
H A D | libxsmm_dnn_rnncell_st_gru_bwdupd_nc_kcck.tpl.c | 22 const libxsmm_blasint cBlocks = C/bc; variable 80 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD, cBlocks, bc, bk); 81 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD, cBlocks, bc, bk); 82 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD, cBlocks, bc, bk); 93 LIBXSMM_VLA_DECL(4, element_filter_type, dwi, dwiD, cBlocks, bc, bk); 94 LIBXSMM_VLA_DECL(4, element_filter_type, dwc, dwcD, cBlocks, bc, bk); 95 LIBXSMM_VLA_DECL(4, element_filter_type, dwf, dwfD, cBlocks, bc, bk); 531 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk)… 599 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk… 605 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk… [all …]
|
H A D | libxsmm_dnn_rnncell_st_rnn_fwd_nc_kcck.tpl.c | 31 libxsmm_blasint cBlocks = C/bc; variable 36 LIBXSMM_VLA_DECL(4, element_filter_type, w, wD, cBlocks, bc, bk); 66 CB_BLOCKS = cBlocks/BF; 96 A_array[ic] = &LIBXSMM_VLA_ACCESS(4, w, ik, ic + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
|
H A D | libxsmm_dnn_rnncell_st_gru_bwdupd_nc_ck_generic.tpl.c | 23 const libxsmm_blasint cBlocks = C/bc; variable 102 LIBXSMM_VLA_DECL(4, element_filter_type, dwi, dwiD_scratch, cBlocks, bc, bk); 103 LIBXSMM_VLA_DECL(4, element_filter_type, dwc, dwcD_scratch, cBlocks, bc, bk); 104 LIBXSMM_VLA_DECL(4, element_filter_type, dwf, dwfD_scratch, cBlocks, bc, bk); 464 …e_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks… 511 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk)… 523 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk)… 535 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk)… 579 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk… 585 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk… [all …]
|
H A D | libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_ck_generic.tpl.c | 26 const libxsmm_blasint cBlocks = C/bc; variable 124 LIBXSMM_VLA_DECL(4, element_filter_type, dwi, dwiD_scratch, cBlocks, bc, bk); 125 LIBXSMM_VLA_DECL(4, element_filter_type, dwf, dwfD_scratch, cBlocks, bc, bk); 126 LIBXSMM_VLA_DECL(4, element_filter_type, dwo, dwoD_scratch, cBlocks, bc, bk); 127 LIBXSMM_VLA_DECL(4, element_filter_type, dwc, dwcD_scratch, cBlocks, bc, bk); 315 …CESS(2, dwi_ck, ic+jc, ik+jk , K4) = LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, jc, jk, cBlocks, bc, bk); 316 …CESS(2, dwc_ck, ic+jc, ik+jk , K4) = LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, jc, jk, cBlocks, bc, bk); 317 …CESS(2, dwf_ck, ic+jc, ik+jk , K4) = LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, jc, jk, cBlocks, bc, bk); 318 …CESS(2, dwo_ck, ic+jc, ik+jk , K4) = LIBXSMM_VLA_ACCESS(4, dwo, ikb, icb, jc, jk, cBlocks, bc, bk);
|
H A D | libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_ck_generic_bf16.tpl.c | 26 const libxsmm_blasint cBlocks = C/bc; variable 137 LIBXSMM_VLA_DECL(4, float, dwi, dwiD_scratch, cBlocks, bc, bk); 138 LIBXSMM_VLA_DECL(4, float, dwf, dwfD_scratch, cBlocks, bc, bk); 139 LIBXSMM_VLA_DECL(4, float, dwo, dwoD_scratch, cBlocks, bc, bk); 140 LIBXSMM_VLA_DECL(4, float, dwc, dwcD_scratch, cBlocks, bc, bk); 320 …LIBXSMM_INTRINSICS_MM512_LOAD_PS(&LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, jc, jk, cBlocks, bc, bk)))); 321 …LIBXSMM_INTRINSICS_MM512_LOAD_PS(&LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, jc, jk, cBlocks, bc, bk)))); 322 …LIBXSMM_INTRINSICS_MM512_LOAD_PS(&LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, jc, jk, cBlocks, bc, bk)))); 323 …LIBXSMM_INTRINSICS_MM512_LOAD_PS(&LIBXSMM_VLA_ACCESS(4, dwo, ikb, icb, jc, jk, cBlocks, bc, bk))));
|
H A D | libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_core.tpl.c | 304 …e_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks… 316 …e_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks… 328 …e_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks… 362 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk)… 374 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk)… 386 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk)… 398 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwo, ikb, icb, 0, 0, cBlocks, bc, bk)… 447 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk… 453 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk… 459 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk… [all …]
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused_bf16.tpl.c | 43 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wi, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 71 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wc, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 99 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wf, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 127 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wo, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 214 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wi, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 242 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wc, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 270 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wf, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 298 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wo, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused.tpl.c | 28 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wi, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk); 71 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wc, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk); 114 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wf, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk); 157 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wo, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused.tpl.c | 28 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wi, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk); 48 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wc, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk); 68 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wf, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk); 88 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wo, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
|
H A D | libxsmm_dnn_rnncell_st_rnn_bwdupd_nc_kcck.tpl.c | 42 libxsmm_blasint cBlocks = C/bc; variable 53 LIBXSMM_VLA_DECL(4, element_filter_type, dw, dwD, cBlocks, bc, bk); 155 … wT, ic, ik, jk, jc, kBlocks, bk, bc) = LIBXSMM_VLA_ACCESS(4, w, ik, ic, jc, jk, cBlocks, bc, bk); 269 …batchreduce_kernelcz(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dw, ikb, icb, 0, 0, cBlocks, bc, bk)… 398 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dw, ikb, icb, 0, 0, cBlocks, bc, bk),…
|
H A D | libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused_bf16.tpl.c | 42 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wi, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 57 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wc, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 72 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wf, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb… 88 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wo, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
|
/dports/math/libxsmm/libxsmm-1.16.3/samples/deeplearning/common/ |
H A D | dnn_common.h | 647 int cBlocks = C/bc; in matrix_copy_NC_to_NCNC() local 673 int cBlocks = C/bc; in matrix_copy_NCNC_to_NC() local 699 int cBlocks = C/bc; in matrix_copy_NC_to_NCNC_bf16() local 725 int cBlocks = C/bc; in matrix_copy_NCNC_to_NC_bf16() local 751 int cBlocks = C/bc; in matrix_copy_CK_to_KCCK() local 775 int cBlocks = C/bc; in matrix_copy_CK_to_CKKC() local 799 int cBlocks = C/bc; in matrix_copy_KC_to_KCCK() local 823 int cBlocks = C/bc; in matrix_copy_KCCK_to_KC() local 847 int cBlocks = C/bc; in matrix_copy_KCCK_to_CK() local 871 int cBlocks = C/bc; in matrix_copy_CK_to_KCCK_bf16() local [all …]
|
/dports/math/apache-commons-math/commons-math3-3.6.1-src/src/main/java/org/apache/commons/math3/linear/ |
H A D | QRDecomposition.java | 402 final int cBlocks = (columns + blockSize - 1) / blockSize; in solve() local 407 for (int kBlock = 0; kBlock < cBlocks; ++kBlock) { in solve() 447 final double[] xBlock = xBlocks[jBlock * cBlocks + kBlock]; in solve()
|
/dports/multimedia/kodi/xbmc-19.3-Matrix/lib/win32/Effects11/ |
H A D | EffectLoad.h | 123 …template<class T> HRESULT ReallocateBlockAssignments(T* &pBlocks, uint32_t cBlocks, T* pOldBlocks… 128 template<class T> uint32_t CalculateBlockAssignmentSize(T* &pBlocks, uint32_t cBlocks);
|
H A D | EffectLoad.cpp | 1952 size_t cBlocks = m_pHeader->Effect.cObjectVariables; in LoadObjectVariables() local 1954 for (size_t iBlock=0; iBlock<cBlocks; iBlock++) in LoadObjectVariables() 2286 uint32_t cBlocks; in LoadInterfaceVariables() local 2288 cBlocks = m_pHeader->cInterfaceVariables; in LoadInterfaceVariables() 2290 for (iBlock=0; iBlock<cBlocks; iBlock++) in LoadInterfaceVariables() 3362 template<class T> HRESULT CEffectLoader::ReallocateBlockAssignments(T* &pBlocks, uint32_t cBlocks,… in ReallocateBlockAssignments() argument 3367 for(size_t i=0; i<cBlocks; i++) in ReallocateBlockAssignments() 3477 …late<class T> uint32_t CEffectLoader::CalculateBlockAssignmentSize(T* &pBlocks, uint32_t cBlocks) in CalculateBlockAssignmentSize() argument 3481 for(size_t i=0; i<cBlocks; i++) in CalculateBlockAssignmentSize()
|