Lines Matching refs:handle

15 libxsmm_blasint LDA = handle->blocksofm * handle->ofmblock;
16 …xsmm_blasint LDB = (handle->upd_pack_input == 1) ? handle->blocksifm * handle->ifmblock : handle->…
18 libxsmm_blasint LDC = handle->ofmblock;
21 libxsmm_blasint LDC = handle->blocksofm * handle->ofmblock;
24 …utput_type*)handle->grad_output->data + ((size_t)handle->desc.pad_h_out * handle->ofwp + handle->d…
25 …ut_type, output, (const element_output_type*)out, handle->ofhp, handle->ofwp, handle->blocksofm, h…
26 const int IFWP = (handle->upd_padding_copy == 1) ? handle->ifwp + 2*handle->desc.pad_w : handle->i…
27 const int IFHP = (handle->upd_padding_copy == 1) ? handle->ifhp + 2*handle->desc.pad_h : handle->i…
28handle->upd_padding_copy == 1) ? (element_input_type*) ((char*)handle->scratch + handle->upd_packi…
29 …ype, input, (element_input_type*) input_ptr_to_use, IFHP, IFWP, handle->blocksifm, handle->ifmbloc…
31 …l, (element_filter_type*)handle->grad_filter->data, handle->blocksifm, handle->desc.R, handle->des…
34 …, (element_filter_type*)handle->grad_filter->data, handle->desc.S, handle->blocksifm, handle->ifmb…
36handle->weight_copies == 1) ? (element_filter_type*)handle->grad_filter->data : (element_filter_ty…
38 …ate, (element_filter_type*)weight_ptr, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
41 …e, (element_filter_type*)weight_ptr, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
43 int prefetch_mode = (handle->desc.u == 2 || (handle->desc.R == 3 && handle->ofw == 7) ) ? libxsmm_g…
50 libxsmm_barrier_init(handle->barrier, ltid);
53 if (handle->upd_padding_copy == 1) {
54 …ype, input_src, (element_input_type*)handle->reg_input->data, handle->ifhp, handle->ifwp, handle->…
55 int imgpt = LIBXSMM_UPDIV(handle->desc.N, handle->desc.threads);
56 my_img_start = LIBXSMM_MIN(ltid * imgpt, handle->desc.N);
57 my_img_end = LIBXSMM_MIN((ltid+1) * imgpt, handle->desc.N);
59 my_ifm_end = handle->blocksifm;
64 for (ij = 0; ij < handle->ifhp+(2*handle->desc.pad_h); ij++) {
65 for (ii = 0; ii < handle->ifwp+(2*handle->desc.pad_w); ii++) {
66 …if ( (ij >= handle->desc.pad_h) && (ii >= handle->desc.pad_w) && (ij < handle->ifhp+handle->desc.p…
68 for (ifm2 = 0; ifm2 < handle->ifmblock; ifm2++) {
69 …XSMM_VLA_ACCESS(5, input, img, ij, ii, ifm1, ifm2, IFHP, IFWP, handle->blocksifm, handle->ifmbloc…
70 …, input_src, img, ij-handle->desc.pad_h, ii-handle->desc.pad_w, ifm1, ifm2, handle->ifhp, handle-…
74 for (ifm2 = 0; ifm2 < handle->ifmblock; ifm2++) {
75 …XSMM_VLA_ACCESS(5, input, img, ij, ii, ifm1, ifm2, IFHP, IFWP, handle->blocksifm, handle->ifmbloc…
82 libxsmm_barrier_wait(handle->barrier, ltid);
85 if (handle->upd_use_batchreduce == 0 && handle->upd_linearized_tasklist == 0) {
87 const int img_work = handle->desc.N;
88 …st int img_chunksize = (img_work % handle->desc.threads == 0) ? (img_work / handle->desc.threads) …
89 …nst float beta = ((img_chunksize == 1) && (handle->upd_ofh_rb == handle->ofh) && (handle->upd_ofw_…
90 …m_function gemm_kernel = libxsmm_smmdispatch(handle->ofmblock, handle->ifmblock, handle->upd_ofw_r…
95 …if (!((img_chunksize == 1) && (handle->upd_ofh_rb == handle->ofh) && (handle->upd_ofw_rb == handle
96 …memset(weight_ptr, 0, handle->desc.C * handle->desc.K * handle->desc.R * handle->desc.S * sizeof(e…
99 if (handle->upd_loop_order == 0) {
101 for (ofmb = 0; ofmb < handle->blocksofm; ofmb += handle->block_upd_ofm) {
102 for (ifmb = 0; ifmb < handle->blocksifm; ifmb += handle->block_upd_ifm) {
103 for (ojb = 0; ojb < handle->ofh; ojb += handle->upd_ofh_rb) {
104 … for (ofm1 = ofmb; ofm1 < LIBXSMM_MIN(ofmb+handle->block_upd_ofm, handle->blocksofm); ofm1++ ) {
105 … for (ifm1 = ifmb; ifm1 < LIBXSMM_MIN(ifmb+handle->block_upd_ifm, handle->blocksifm); ifm1++) {
106 … for (oj = ojb; oj < LIBXSMM_MIN(ojb+handle->upd_ofh_rb,handle->ofh); oj+= handle->upd_ofh_rb) {
107 for (oi = 0; oi < handle->ofw; oi += handle->upd_ofw_rb) {
108 for (kj = 0; kj < handle->desc.R; ++kj) {
109 for (ki = 0; ki < handle->desc.S; ++ki) {
110 ii = oi * handle->desc.u + ki;
111 ij = oj * handle->desc.v + kj;
113 …BXSMM_VLA_ACCESS(5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
114 …&LIBXSMM_VLA_ACCESS(5, input, img, ij, ii, ifm1, 0, IFHP, IFWP, handle->blocksifm, handle->ifmbloc…
115 …ght_private, ofm1, ifm1, kj, ki, 0, 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
118 …BXSMM_VLA_ACCESS(5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
119 …&LIBXSMM_VLA_ACCESS(5, input, img, ij, ii, ifm1, 0, IFHP, IFWP, handle->blocksifm, handle->ifmbloc…
120 …t_private, kj, ki, ifm1, 0, ofm1, 0, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
133 if (handle->upd_loop_order == 1) {
135 for (ifmb = 0; ifmb < handle->blocksifm; ifmb += handle->block_upd_ifm) {
136 for (ofmb = 0; ofmb < handle->blocksofm; ofmb += handle->block_upd_ofm) {
137 for (ojb = 0; ojb < handle->ofh; ojb += handle->upd_ofh_rb) {
138 … for (ifm1 = ifmb; ifm1 < LIBXSMM_MIN(ifmb+handle->block_upd_ifm, handle->blocksifm); ifm1++) {
139 … for (ofm1 = ofmb; ofm1 < LIBXSMM_MIN(ofmb+handle->block_upd_ofm, handle->blocksofm); ofm1++ ) {
140 … for (oj = ojb; oj < LIBXSMM_MIN(ojb+handle->upd_ofh_rb,handle->ofh); oj+= handle->upd_ofh_rb) {
141 for (oi = 0; oi < handle->ofw; oi += handle->upd_ofw_rb) {
142 for (kj = 0; kj < handle->desc.R; ++kj) {
143 for (ki = 0; ki < handle->desc.S; ++ki) {
144 ii = oi * handle->desc.u + ki;
145 ij = oj * handle->desc.v + kj;
147 …BXSMM_VLA_ACCESS(5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
148 …&LIBXSMM_VLA_ACCESS(5, input, img, ij, ii, ifm1, 0, IFHP, IFWP, handle->blocksifm, handle->ifmbloc…
149 …ght_private, ofm1, ifm1, kj, ki, 0, 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
152 …BXSMM_VLA_ACCESS(5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
153 …&LIBXSMM_VLA_ACCESS(5, input, img, ij, ii, ifm1, 0, IFHP, IFWP, handle->blocksifm, handle->ifmbloc…
154 …t_private, kj, ki, ifm1, 0, ofm1, 0, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
168 if (handle->upd_linearized_tasklist == 1) {
170 const int work = handle->desc.R * handle->desc.S * handle->blocksofm * handle->blocksifm;
171 …const int chunksize = (work % handle->desc.threads == 0) ? (work / handle->desc.threads) : (work /…
175 int Cb = handle->blocksifm;
177 int Kb = handle->blocksofm;
179 int R = handle->desc.R;
180 int S = handle->desc.S;
182 if (handle->upd_avoid_rim_fmas == 0) {
183 const int IFH = (handle->upd_pack_input == 1) ? handle->ifhp/handle->desc.u : IFHP;
184 const int IFW = (handle->upd_pack_input == 1) ? handle->ifwp/handle->desc.v : IFWP;
185 …ent_input_type *input_ptr_base = (handle->upd_pack_input == 1) ? (element_input_type*)((char*)hand…
186 …type, input_use, (element_input_type*)input_ptr_base, IFH, IFW, handle->blocksifm, handle->ifmbloc…
187 …const float beta = ((handle->desc.N == 1) && (handle->upd_ofh_rb == handle->ofh) && (handle->upd_o…
188 …m_function gemm_kernel = libxsmm_smmdispatch(handle->ofmblock, handle->ifmblock, handle->upd_ofw_r…
191 if (handle->upd_pack_input == 1) {
192 …ype, input_src, (element_input_type*)handle->reg_input->data, handle->ifhp, handle->ifwp, handle->…
193 …const int img_chunk = (handle->desc.N % handle->desc.threads == 0) ? handle->desc.N/handle->desc.t…
194 const int img_copy_start = LIBXSMM_MIN(ltid*img_chunk, handle->desc.N);
195 const int img_copy_end = LIBXSMM_MIN((ltid+1)*img_chunk, handle->desc.N);
198 for (ifm1 = 0; ifm1 < handle->blocksifm; ifm1++) {
199 for (oj = 0; oj < handle->ofh; oj++) {
200 for (oi = 0; oi < handle->ofw; oi++) {
201 ij = oj * handle->desc.u;
202 ii = oi * handle->desc.v;
204 for (ifm2 = 0; ifm2 < handle->ifmblock; ifm2++) {
205handle->blocksifm, handle->ifmblock) = LIBXSMM_VLA_ACCESS(5, input_src, img, ij, ii, ifm1, ifm2,
211 libxsmm_barrier_wait(handle->barrier, ltid);
215 …if (!((handle->desc.N == 1) && (handle->upd_ofh_rb == handle->ofh) && (handle->upd_ofw_rb == handl…
222 for (ifm2 = 0; ifm2 < handle->ifmblock; ifm2++) {
224 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++) {
226 …lobal, ofm1, ifm1, kj, ki, ifm2, ofm2, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
229 …bal, kj, ki, ifm1, ifm2, ofm1, ofm2, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
236 for (img = 0; img < handle->desc.N; img++) {
244 for (oj = 0; oj < handle->ofh; oj += handle->upd_ofh_rb) {
245 ij = oj * handle->desc.u + kj;
247 …BXSMM_VLA_ACCESS(5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
248 …IBXSMM_VLA_ACCESS(5, input_use, img, ij, ii, ifm1, 0, IFH, IFW, handle->blocksifm, handle->ifmbloc…
249 …ight_global, ofm1, ifm1, kj, ki, 0, 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
252 …BXSMM_VLA_ACCESS(5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
253 …IBXSMM_VLA_ACCESS(5, input_use, img, ij, ii, ifm1, 0, IFH, IFW, handle->blocksifm, handle->ifmbloc…
254 …ht_global, kj, ki, ifm1, 0, ofm1, 0, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
260 …const float beta = ((handle->upd_ofh_rb == handle->ofh) && (handle->upd_ofw_rb == handle->ofw)) ? …
261 … br_gemm_kernel = libxsmm_smmdispatch_reducebatch_addr(handle->ofmblock, handle->ifmblock, handle-…
262 …br_gemm_kernel2 = libxsmm_smmdispatch_reducebatch_addr(handle->ofmblock, handle->ifmblock, handle-…
271 ii = oi * handle->desc.u + ki;
272 ij = oj * handle->desc.v + kj;
274 img_block_size = handle->desc.N;
279 for (j_br = 1; j_br < handle->upd_ofh_rb; j_br++) {
280 …(5, output, img + img_br, oj + j_br, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
281 …ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii, ifm1, 0, IFHP, IFWP, handle->blocks…
287 …ight_global, ofm1, ifm1, kj, ki, 0, 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
290 …ht_global, kj, ki, ifm1, 0, ofm1, 0, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
295 for (j_br = 0; j_br < handle->upd_ofh_rb; j_br++) {
296 …output, img + img_br, oj + j_br, oi + 1, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
297 …CESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii + 1, ifm1, 0, IFHP, IFWP, handle->bloc…
303 …ight_global, ofm1, ifm1, kj, ki, 0, 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
306 …ht_global, kj, ki, ifm1, 0, ofm1, 0, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
308 } else if (oi == handle->ofw-handle->fwd_ofw_rb && ki == handle->desc.S-1) {
311 for (j_br = 0; j_br < handle->upd_ofh_rb; j_br++) {
312 …(5, output, img + img_br, oj + j_br, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
313 …ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii, ifm1, 0, IFHP, IFWP, handle->blocks…
319 …ight_global, ofm1, ifm1, kj, ki, 0, 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
322 …ht_global, kj, ki, ifm1, 0, ofm1, 0, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
325 if (kj == handle->desc.R-1) {
328 for (j_br = 0; j_br < handle->upd_ofh_rb-1; j_br++) {
329 …(5, output, img + img_br, oj + j_br, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
330 …ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii, ifm1, 0, IFHP, IFWP, handle->blocks…
336 …ight_global, ofm1, ifm1, kj, ki, 0, 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
339 …ht_global, kj, ki, ifm1, 0, ofm1, 0, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
344 for (j_br = 0; j_br < handle->upd_ofh_rb; j_br++) {
345 …(5, output, img + img_br, oj + j_br, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
346 …ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii, ifm1, 0, IFHP, IFWP, handle->blocks…
352 …ight_global, ofm1, ifm1, kj, ki, 0, 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
355 …ht_global, kj, ki, ifm1, 0, ofm1, 0, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
364handle->desc.threads == 27 && handle->desc.N == 27 && handle->ofw == 14 && handle->desc.R == 1 &&
365 int tile_id = ltid / LIBXSMM_UPDIV(handle->desc.threads, handle->weight_copies);
366 int tiles = handle->weight_copies;
367 int img_per_tile = LIBXSMM_UPDIV(handle->desc.N, tiles);
369 int ifms_per_thread = LIBXSMM_UPDIV(handle->blocksifm, group_size);
370 int ofms_per_thread = LIBXSMM_UPDIV(handle->blocksofm, group_size);
372 int my_R_end = handle->desc.R;
373 …const float beta = ((handle->upd_ofh_rb == handle->ofh) && (handle->upd_ofw_rb == handle->ofw)) ? …
374 … br_gemm_kernel = libxsmm_smmdispatch_reducebatch_addr(handle->ofmblock, handle->ifmblock, handle-…
376 …emm_kernel_flat = libxsmm_smmdispatch_reducebatch_addr(handle->ofmblock, handle->ifmblock, handle-…
377handle->weight_copies > 1) ? (element_filter_type*)((char*)handle->scratch + handle->upd_filter_sc…
379 …element_filter_type*)weight_ptr_group, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
382 …ement_filter_type*)weight_ptr_group, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
384 my_img_start = LIBXSMM_MIN(tile_id * img_per_tile, handle->desc.N);
385 my_img_end = LIBXSMM_MIN((tile_id+1) * img_per_tile, handle->desc.N);
386 my_ifm_start = LIBXSMM_MIN(my_in_tile_id * ifms_per_thread, handle->blocksifm );
387 my_ifm_end = LIBXSMM_MIN((my_in_tile_id+1) * ifms_per_thread, handle->blocksifm );
389 my_ofm_end = handle->blocksofm;
391 …if (handle->desc.threads == 27 && handle->desc.N == 27 && handle->desc.C == 256 && handle->desc.K …
392 my_ofm_start = LIBXSMM_MIN(my_in_tile_id * ofms_per_thread, handle->blocksofm);
393 my_ofm_end = LIBXSMM_MIN((my_in_tile_id+1) * ofms_per_thread, handle->blocksofm);
395 my_ifm_end = handle->blocksifm;
397 …if (handle->desc.threads == 27 && handle->desc.N == 27 && handle->desc.R == 3 && handle->desc.S ==…
398 int r_per_tile = LIBXSMM_UPDIV(handle->desc.R, group_size);
400 my_ifm_end = handle->blocksifm;
402 my_ofm_end = handle->blocksofm;
403 my_R_start = LIBXSMM_MIN(my_in_tile_id * r_per_tile, handle->desc.R);
404 my_R_end = LIBXSMM_MIN((my_in_tile_id+1) * r_per_tile, handle->desc.R);
410 if (handle->desc.N != handle->desc.threads) {
412 …const int work = handle->desc.R * handle->desc.S * handle->blocksofm * handle->blocksifm * handle-…
413 …const int chunksize = (work % handle->desc.threads == 0) ? (work / handle->desc.threads) : (work /…
417 int Cb = handle->blocksifm;
418 int Kb = handle->blocksofm;
419 int R = handle->desc.R;
420 int S = handle->desc.S;
421 const int IFH = (handle->upd_pack_input == 1) ? handle->ifhp/handle->desc.u : IFHP;
422 const int IFW = (handle->upd_pack_input == 1) ? handle->ifwp/handle->desc.v : IFWP;
423 …(handle->upd_pack_input == 1) ? (element_input_type*)((char*)handle->scratch + handle->upd_packing…
424 …type, input_use, (element_input_type*)input_ptr_base, IFH, IFW, handle->blocksifm, handle->ifmbloc…
427 if (handle->upd_pack_input == 1) {
428 …ype, input_src, (element_input_type*)handle->reg_input->data, handle->ifhp, handle->ifwp, handle->…
429 …const int img_chunk = (handle->desc.N % handle->desc.threads == 0) ? handle->desc.N/handle->desc.t…
430 const int img_copy_start = LIBXSMM_MIN(ltid*img_chunk, handle->desc.N);
431 const int img_copy_end = LIBXSMM_MIN((ltid+1)*img_chunk, handle->desc.N);
434 for (ifm1 = 0; ifm1 < handle->blocksifm; ifm1++) {
435 for (oj = 0; oj < handle->ofh; oj++) {
436 for (oi = 0; oi < handle->ofw; oi++) {
437 ij = oj * handle->desc.u;
438 ii = oi * handle->desc.v;
440 for (ifm2 = 0; ifm2 < handle->ifmblock; ifm2++) {
441handle->blocksifm, handle->ifmblock) = LIBXSMM_VLA_ACCESS(5, input_src, img, ij, ii, ifm1, ifm2,
447 libxsmm_barrier_wait(handle->barrier, ltid);
451 if (handle->upd_ofw_rb != handle->ofw) {
459handle->weight_copies > 1) ? (element_filter_type*)((char*)handle->scratch + handle->upd_filter_sc…
461 …ement_filter_type*)weight_ptr_current, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
464 …ent_filter_type*)weight_ptr_current, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
466 for (ifm2 = 0; ifm2 < handle->ifmblock; ifm2++) {
468 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++) {
470 …rrent, ofm1, ifm1, kj, ki, ifm2, ofm2, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
473 …ent, kj, ki, ifm1, ifm2, ofm1, ofm2, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
492handle->weight_copies > 1) ? (element_filter_type*)((char*)handle->scratch + handle->upd_filter_sc…
494 …ement_filter_type*)weight_ptr_current, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
497 …ent_filter_type*)weight_ptr_current, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
500 for (j_br = 0; j_br < handle->ofh; j_br++) {
501 …A_ACCESS(5, output, img , oj + j_br, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
502 …_VLA_ACCESS(5, input_use, img, ij + j_br * handle->desc.u, ii, ifm1, 0, IFHP, IFWP, handle->blocks…
507 …ght_current, ofm1, ifm1, kj, ki, 0, 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
510 …t_current, kj, ki, ifm1, 0, ofm1, 0, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
516 if (!((handle->upd_ofh_rb == handle->ofh) && (handle->upd_ofw_rb == handle->ofw))) {
520 for (ki = 0; ki < handle->desc.S; ++ki) {
521 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++ ) {
522 for (ifm2 = 0; ifm2 < handle->ifmblock; ifm2++) {
524 …group, ofm1, ifm1, kj, ki, ifm2, ofm2, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
527 …oup, kj, ki, ifm1, ifm2, ofm1, ofm2, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
537 if (handle->upd_loop_order == 0) {
541 for (ojb = 0; ojb < handle->ofh; ojb += handle->upd_ofh_rb) {
544 … for (oj = ojb; oj < LIBXSMM_MIN(ojb+handle->upd_ofh_rb,handle->ofh); oj+= handle->upd_ofh_rb) {
545 for (oi = 0; oi < handle->ofw; oi += handle->upd_ofw_rb) {
547 for (ki = 0; ki < handle->desc.S; ++ki) {
548 ii = oi * handle->desc.u + ki;
549 ij = oj * handle->desc.v + kj;
552 for (j_br = 0; j_br < handle->upd_ofh_rb; j_br++) {
553 …(5, output, img + img_br, oj + j_br, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
554 …ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii, ifm1, 0, IFHP, IFWP, handle->blocks…
560 …ivate_group, ofm1, ifm1, kj, ki, 0, 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
563 …ate_group, kj, ki, ifm1, 0, ofm1, 0, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
579 for (ojb = 0; ojb < handle->ofh; ojb += handle->upd_ofh_rb) {
582 … for (oj = ojb; oj < LIBXSMM_MIN(ojb+handle->upd_ofh_rb,handle->ofh); oj+= handle->upd_ofh_rb) {
583 for (oi = 0; oi < handle->ofw; oi += handle->upd_ofw_rb) {
585 for (ki = 0; ki < handle->desc.S; ++ki) {
586 ii = oi * handle->desc.u + ki;
587 ij = oj * handle->desc.v + kj;
590 for (j_br = 0; j_br < handle->upd_ofh_rb; j_br++) {
591 …(5, output, img + img_br, oj + j_br, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, h…
592 …ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii, ifm1, 0, IFHP, IFWP, handle->blocks…
598 …ivate_group, ofm1, ifm1, kj, ki, 0, 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->…
601 …ate_group, kj, ki, ifm1, 0, ofm1, 0, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->…
618 if (handle->weight_copies > 1) {
620 const int fm_blocking = (handle->ofmblock % 16 == 0) ? 16 : handle->ofmblock;
621 …t int reduce_work = handle->blocksofm * handle->blocksifm * handle->desc.R * handle->desc.S * (han…
622 …duce_chunksize = (reduce_work % handle->desc.threads == 0) ? (reduce_work / handle->desc.threads) …
627 libxsmm_barrier_wait(handle->barrier, ltid);
630 element_filter_type *weight_ptr_glb = (element_filter_type*) handle->grad_filter->data;
634 assert( handle->ofmblock <= 64 );
641 for ( ii = 0; ii < handle->weight_copies; ii++ ) {
642 …ter_type*)((char*)handle->scratch + handle->upd_filter_scratch_offset)+ ii * handle->desc.C * hand…
655 for ( ii = 0; ii < handle->weight_copies; ii++ ) {
656 …ght_ptr_src = (element_filter_type*)handle->scratch7 + ii * handle->desc.C * handle->desc.K * hand…
664 libxsmm_barrier_wait(handle->barrier, ltid);