Home
last modified time | relevance | path

Searched refs:cBlocks (Results 1 – 25 of 27) sorted by relevance

12

/dports/math/libxsmm/libxsmm-1.16.3/src/template/
H A Dlibxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_bf16.tpl.c25 const libxsmm_blasint cBlocks = C/bc; variable
121 LIBXSMM_VLA_DECL(5, element_filter_type, wi, wiD, cBlocks, bc_lp, bk, lpb);
122 LIBXSMM_VLA_DECL(5, element_filter_type, wc, wcD, cBlocks, bc_lp, bk, lpb);
123 LIBXSMM_VLA_DECL(5, element_filter_type, wf, wfD, cBlocks, bc_lp, bk, lpb);
124 LIBXSMM_VLA_DECL(5, element_filter_type, wo, woD, cBlocks, bc_lp, bk, lpb);
142 LIBXSMM_VLA_DECL(4, float, dwi, dwiD_scratch, cBlocks, bc, bk);
143 LIBXSMM_VLA_DECL(4, float, dwf, dwfD_scratch, cBlocks, bc, bk);
144 LIBXSMM_VLA_DECL(4, float, dwo, dwoD_scratch, cBlocks, bc, bk);
145 LIBXSMM_VLA_DECL(4, float, dwc, dwcD_scratch, cBlocks, bc, bk);
334 tmp.f = LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, jc, jk, cBlocks, bc, bk);
[all …]
H A Dlibxsmm_dnn_rnncell_st_rnn_fwd_ncnc_kcck.tpl.c31 libxsmm_blasint cBlocks = C/bc; variable
34 LIBXSMM_VLA_DECL(5, element_input_type, x, xt, nBlocks, cBlocks, bn, bc);
36 LIBXSMM_VLA_DECL(4, element_filter_type, w, wD, cBlocks, bc, bk);
92 for (ic = 0; ic < cBlocks; ic++) {
94 A_array[ii][jj][ic] = &LIBXSMM_VLA_ACCESS(4, w, ik, ic, 0, 0, cBlocks, bc, bk);
118 libxsmm_blasint total_blocks = in_tasks_per_thread*ik_tasks_per_thread*cBlocks;
148 blocks = cBlocks;
174 assert(cBlocks <= 1024);
191 for (ic = 0; ic < cBlocks; ic++) {
193 A_array[ic] = &LIBXSMM_VLA_ACCESS(4, w, ik, ic, 0, 0, cBlocks, bc, bk);
[all …]
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck.tpl.c25 const libxsmm_blasint cBlocks = C/bc; variable
58 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD, cBlocks, bc, bk);
59 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD, cBlocks, bc, bk);
60 LIBXSMM_VLA_DECL(4, element_filter_type, wo, woD, cBlocks, bc, bk);
61 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD, cBlocks, bc, bk);
105 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
111 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
120 CB_BLOCKS = cBlocks/BF;
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic.tpl.c25 const libxsmm_blasint cBlocks = C/bc; variable
68 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD_scratch, cBlocks, bc, bk);
69 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD_scratch, cBlocks, bc, bk);
70 LIBXSMM_VLA_DECL(4, element_filter_type, wo, woD_scratch, cBlocks, bc, bk);
71 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD_scratch, cBlocks, bc, bk);
140 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
146 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
155 CB_BLOCKS = cBlocks/BF;
168 …LIBXSMM_VLA_ACCESS(4, wi, ik, ic, jc, jk, cBlocks, bc, bk) = LIBXSMM_VLA_ACCESS(2, wi_ck, ic*bc+j…
169 …LIBXSMM_VLA_ACCESS(4, wc, ik, ic, jc, jk, cBlocks, bc, bk) = LIBXSMM_VLA_ACCESS(2, wc_ck, ic*bc+j…
[all …]
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic_bf16.tpl.c62 const libxsmm_blasint cBlocks = C/bc; variable
120 LIBXSMM_VLA_DECL(5, element_filter_type, wi, wiD_scratch, cBlocks, bc_lp, bk, lpb);
121 LIBXSMM_VLA_DECL(5, element_filter_type, wf, wfD_scratch, cBlocks, bc_lp, bk, lpb);
122 LIBXSMM_VLA_DECL(5, element_filter_type, wo, woD_scratch, cBlocks, bc_lp, bk, lpb);
123 LIBXSMM_VLA_DECL(5, element_filter_type, wc, wcD_scratch, cBlocks, bc_lp, bk, lpb);
197 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
203 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
212 CB_BLOCKS = cBlocks/BF;
225 …LIBXSMM_VLA_ACCESS(5, wi, ik, ic, jc/lpb, jk, jc%lpb, cBlocks, bc_lp, bk, lpb) = LIBXSMM_VLA_ACCE…
226 …LIBXSMM_VLA_ACCESS(5, wc, ik, ic, jc/lpb, jk, jc%lpb, cBlocks, bc_lp, bk, lpb) = LIBXSMM_VLA_ACCE…
[all …]
H A Dlibxsmm_dnn_rnncell_st_gru_fwd_nc_ck_generic.tpl.c23 const libxsmm_blasint cBlocks = C/bc; variable
57 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD_scratch, cBlocks, bc, bk);
58 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD_scratch, cBlocks, bc, bk);
59 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD_scratch, cBlocks, bc, bk);
118 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
124 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
133 CB_BLOCKS = cBlocks/BF;
143 …LIBXSMM_VLA_ACCESS(4, wi, ik, ic, jc, jk, cBlocks, bc, bk) = LIBXSMM_VLA_ACCESS(2, wi_ck, ic*bc+j…
181 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wi, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
206 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wc, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
[all …]
H A Dlibxsmm_dnn_rnncell_st_gru_fwd_nc_kcck.tpl.c22 const libxsmm_blasint cBlocks = C/bc; variable
48 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD, cBlocks, bc, bk);
49 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD, cBlocks, bc, bk);
50 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD, cBlocks, bc, bk);
84 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
90 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
99 CB_BLOCKS = cBlocks/BF;
118 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wi, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
143 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wc, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
188 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wf, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
H A Dlibxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck.tpl.c25 const libxsmm_blasint cBlocks = C/bc; variable
100 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD, cBlocks, bc, bk);
101 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD, cBlocks, bc, bk);
102 LIBXSMM_VLA_DECL(4, element_filter_type, wo, woD, cBlocks, bc, bk);
103 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD, cBlocks, bc, bk);
119 LIBXSMM_VLA_DECL(4, element_filter_type, dwi, dwiD, cBlocks, bc, bk);
120 LIBXSMM_VLA_DECL(4, element_filter_type, dwf, dwfD, cBlocks, bc, bk);
121 LIBXSMM_VLA_DECL(4, element_filter_type, dwo, dwoD, cBlocks, bc, bk);
122 LIBXSMM_VLA_DECL(4, element_filter_type, dwc, dwcD, cBlocks, bc, bk);
261 …iT, ic, ik, jk, jc, kBlocks, bk, bc) = LIBXSMM_VLA_ACCESS(4, wi, ik, ic, jc, jk, cBlocks, bc, bk);
[all …]
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_bf16.tpl.c62 const libxsmm_blasint cBlocks = C/bc; variable
112 LIBXSMM_VLA_DECL(5, element_filter_type, wi, wiD, cBlocks, bc_lp, bk, lpb);
113 LIBXSMM_VLA_DECL(5, element_filter_type, wf, wfD, cBlocks, bc_lp, bk, lpb);
114 LIBXSMM_VLA_DECL(5, element_filter_type, wo, woD, cBlocks, bc_lp, bk, lpb);
115 LIBXSMM_VLA_DECL(5, element_filter_type, wc, wcD, cBlocks, bc_lp, bk, lpb);
181 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
187 while ( (cBlocks % BF != 0) || (kBlocks % BF != 0) ) {
196 CB_BLOCKS = cBlocks/BF;
H A Dlibxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_core_bf16.tpl.c216 &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks);
224 &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks);
232 &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks);
240 &LIBXSMM_VLA_ACCESS(4, dwo, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks);
275 &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks);
279 &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks);
283 &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks);
287 &LIBXSMM_VLA_ACCESS(4, dwo, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks);
H A Dlibxsmm_dnn_rnncell_st_gru_bwdupd_nc_kcck.tpl.c22 const libxsmm_blasint cBlocks = C/bc; variable
80 LIBXSMM_VLA_DECL(4, element_filter_type, wi, wiD, cBlocks, bc, bk);
81 LIBXSMM_VLA_DECL(4, element_filter_type, wc, wcD, cBlocks, bc, bk);
82 LIBXSMM_VLA_DECL(4, element_filter_type, wf, wfD, cBlocks, bc, bk);
93 LIBXSMM_VLA_DECL(4, element_filter_type, dwi, dwiD, cBlocks, bc, bk);
94 LIBXSMM_VLA_DECL(4, element_filter_type, dwc, dwcD, cBlocks, bc, bk);
95 LIBXSMM_VLA_DECL(4, element_filter_type, dwf, dwfD, cBlocks, bc, bk);
531 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk)…
599 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk…
605 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk…
[all …]
H A Dlibxsmm_dnn_rnncell_st_rnn_fwd_nc_kcck.tpl.c31 libxsmm_blasint cBlocks = C/bc; variable
36 LIBXSMM_VLA_DECL(4, element_filter_type, w, wD, cBlocks, bc, bk);
66 CB_BLOCKS = cBlocks/BF;
96 A_array[ic] = &LIBXSMM_VLA_ACCESS(4, w, ik, ic + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
H A Dlibxsmm_dnn_rnncell_st_gru_bwdupd_nc_ck_generic.tpl.c23 const libxsmm_blasint cBlocks = C/bc; variable
102 LIBXSMM_VLA_DECL(4, element_filter_type, dwi, dwiD_scratch, cBlocks, bc, bk);
103 LIBXSMM_VLA_DECL(4, element_filter_type, dwc, dwcD_scratch, cBlocks, bc, bk);
104 LIBXSMM_VLA_DECL(4, element_filter_type, dwf, dwfD_scratch, cBlocks, bc, bk);
464 …e_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks…
511 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk)…
523 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk)…
535 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk)…
579 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk…
585 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk…
[all …]
H A Dlibxsmm_dnn_rnncell_st_lstm_bwdupd_nc_ck_generic.tpl.c26 const libxsmm_blasint cBlocks = C/bc; variable
124 LIBXSMM_VLA_DECL(4, element_filter_type, dwi, dwiD_scratch, cBlocks, bc, bk);
125 LIBXSMM_VLA_DECL(4, element_filter_type, dwf, dwfD_scratch, cBlocks, bc, bk);
126 LIBXSMM_VLA_DECL(4, element_filter_type, dwo, dwoD_scratch, cBlocks, bc, bk);
127 LIBXSMM_VLA_DECL(4, element_filter_type, dwc, dwcD_scratch, cBlocks, bc, bk);
315 …CESS(2, dwi_ck, ic+jc, ik+jk , K4) = LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, jc, jk, cBlocks, bc, bk);
316 …CESS(2, dwc_ck, ic+jc, ik+jk , K4) = LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, jc, jk, cBlocks, bc, bk);
317 …CESS(2, dwf_ck, ic+jc, ik+jk , K4) = LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, jc, jk, cBlocks, bc, bk);
318 …CESS(2, dwo_ck, ic+jc, ik+jk , K4) = LIBXSMM_VLA_ACCESS(4, dwo, ikb, icb, jc, jk, cBlocks, bc, bk);
H A Dlibxsmm_dnn_rnncell_st_lstm_bwdupd_nc_ck_generic_bf16.tpl.c26 const libxsmm_blasint cBlocks = C/bc; variable
137 LIBXSMM_VLA_DECL(4, float, dwi, dwiD_scratch, cBlocks, bc, bk);
138 LIBXSMM_VLA_DECL(4, float, dwf, dwfD_scratch, cBlocks, bc, bk);
139 LIBXSMM_VLA_DECL(4, float, dwo, dwoD_scratch, cBlocks, bc, bk);
140 LIBXSMM_VLA_DECL(4, float, dwc, dwcD_scratch, cBlocks, bc, bk);
320 …LIBXSMM_INTRINSICS_MM512_LOAD_PS(&LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, jc, jk, cBlocks, bc, bk))));
321 …LIBXSMM_INTRINSICS_MM512_LOAD_PS(&LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, jc, jk, cBlocks, bc, bk))));
322 …LIBXSMM_INTRINSICS_MM512_LOAD_PS(&LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, jc, jk, cBlocks, bc, bk))));
323 …LIBXSMM_INTRINSICS_MM512_LOAD_PS(&LIBXSMM_VLA_ACCESS(4, dwo, ikb, icb, jc, jk, cBlocks, bc, bk))));
H A Dlibxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_core.tpl.c304 …e_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks…
316 …e_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks…
328 …e_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk), &blocks…
362 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk)…
374 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk)…
386 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk)…
398 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwo, ikb, icb, 0, 0, cBlocks, bc, bk)…
447 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwi, ikb, icb, 0, 0, cBlocks, bc, bk…
453 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwc, ikb, icb, 0, 0, cBlocks, bc, bk…
459 …batchreduce_kernelc1(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dwf, ikb, icb, 0, 0, cBlocks, bc, bk…
[all …]
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused_bf16.tpl.c43 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wi, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
71 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wc, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
99 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wf, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
127 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wo, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
214 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wi, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
242 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wc, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
270 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wf, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
298 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wo, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused.tpl.c28 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wi, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
71 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wc, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
114 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wf, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
157 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wo, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused.tpl.c28 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wi, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
48 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wc, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
68 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wf, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
88 A_array[icb] = &LIBXSMM_VLA_ACCESS(4, wo, ikb, icb + CB*CB_BLOCKS, 0, 0, cBlocks, bc, bk);
H A Dlibxsmm_dnn_rnncell_st_rnn_bwdupd_nc_kcck.tpl.c42 libxsmm_blasint cBlocks = C/bc; variable
53 LIBXSMM_VLA_DECL(4, element_filter_type, dw, dwD, cBlocks, bc, bk);
155 … wT, ic, ik, jk, jc, kBlocks, bk, bc) = LIBXSMM_VLA_ACCESS(4, w, ik, ic, jc, jk, cBlocks, bc, bk);
269 …batchreduce_kernelcz(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dw, ikb, icb, 0, 0, cBlocks, bc, bk)…
398 …batchreduce_kernelc(A_array, B_array, &LIBXSMM_VLA_ACCESS(4, dw, ikb, icb, 0, 0, cBlocks, bc, bk),…
H A Dlibxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused_bf16.tpl.c42 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wi, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
57 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wc, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
72 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wf, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
88 …batchreduce_kernela(&LIBXSMM_VLA_ACCESS(5, wo, ikb, CB*CB_BLOCKS, 0, 0, 0, cBlocks, bc_lp, bk, lpb…
/dports/math/libxsmm/libxsmm-1.16.3/samples/deeplearning/common/
H A Ddnn_common.h647 int cBlocks = C/bc; in matrix_copy_NC_to_NCNC() local
673 int cBlocks = C/bc; in matrix_copy_NCNC_to_NC() local
699 int cBlocks = C/bc; in matrix_copy_NC_to_NCNC_bf16() local
725 int cBlocks = C/bc; in matrix_copy_NCNC_to_NC_bf16() local
751 int cBlocks = C/bc; in matrix_copy_CK_to_KCCK() local
775 int cBlocks = C/bc; in matrix_copy_CK_to_CKKC() local
799 int cBlocks = C/bc; in matrix_copy_KC_to_KCCK() local
823 int cBlocks = C/bc; in matrix_copy_KCCK_to_KC() local
847 int cBlocks = C/bc; in matrix_copy_KCCK_to_CK() local
871 int cBlocks = C/bc; in matrix_copy_CK_to_KCCK_bf16() local
[all …]
/dports/math/apache-commons-math/commons-math3-3.6.1-src/src/main/java/org/apache/commons/math3/linear/
H A DQRDecomposition.java402 final int cBlocks = (columns + blockSize - 1) / blockSize; in solve() local
407 for (int kBlock = 0; kBlock < cBlocks; ++kBlock) { in solve()
447 final double[] xBlock = xBlocks[jBlock * cBlocks + kBlock]; in solve()
/dports/multimedia/kodi/xbmc-19.3-Matrix/lib/win32/Effects11/
H A DEffectLoad.h123 …template<class T> HRESULT ReallocateBlockAssignments(T* &pBlocks, uint32_t cBlocks, T* pOldBlocks…
128 template<class T> uint32_t CalculateBlockAssignmentSize(T* &pBlocks, uint32_t cBlocks);
H A DEffectLoad.cpp1952 size_t cBlocks = m_pHeader->Effect.cObjectVariables; in LoadObjectVariables() local
1954 for (size_t iBlock=0; iBlock<cBlocks; iBlock++) in LoadObjectVariables()
2286 uint32_t cBlocks; in LoadInterfaceVariables() local
2288 cBlocks = m_pHeader->cInterfaceVariables; in LoadInterfaceVariables()
2290 for (iBlock=0; iBlock<cBlocks; iBlock++) in LoadInterfaceVariables()
3362 template<class T> HRESULT CEffectLoader::ReallocateBlockAssignments(T* &pBlocks, uint32_t cBlocks,… in ReallocateBlockAssignments() argument
3367 for(size_t i=0; i<cBlocks; i++) in ReallocateBlockAssignments()
3477 …late<class T> uint32_t CEffectLoader::CalculateBlockAssignmentSize(T* &pBlocks, uint32_t cBlocks) in CalculateBlockAssignmentSize() argument
3481 for(size_t i=0; i<cBlocks; i++) in CalculateBlockAssignmentSize()

12