Lines Matching refs:src

29 __device__ void nvvm_wmma_m16n16k16(int *src, int *dst,  in nvvm_wmma_m16n16k16()  argument
34 __hmma_m16n16k16_ld_a(dst, src, ldm, 0); in nvvm_wmma_m16n16k16()
37 __hmma_m16n16k16_ld_a(dst, src+1, ldm, 1); in nvvm_wmma_m16n16k16()
41 __hmma_m16n16k16_ld_b(dst, src, ldm, 0); in nvvm_wmma_m16n16k16()
44 __hmma_m16n16k16_ld_b(dst, src+2, ldm, 1); in nvvm_wmma_m16n16k16()
48 __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0); in nvvm_wmma_m16n16k16()
51 __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1); in nvvm_wmma_m16n16k16()
62 __hmma_m16n16k16_st_c_f16(dst, src, ldm, 0); in nvvm_wmma_m16n16k16()
65 __hmma_m16n16k16_st_c_f16(dst, src, ldm, 1); in nvvm_wmma_m16n16k16()
76 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0); in nvvm_wmma_m16n16k16()
79 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1); in nvvm_wmma_m16n16k16()
82 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0); in nvvm_wmma_m16n16k16()
85 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1); in nvvm_wmma_m16n16k16()
88 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0); in nvvm_wmma_m16n16k16()
91 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1); in nvvm_wmma_m16n16k16()
94 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0); in nvvm_wmma_m16n16k16()
97 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1); in nvvm_wmma_m16n16k16()
101 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0); in nvvm_wmma_m16n16k16()
104 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1); in nvvm_wmma_m16n16k16()
107 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0); in nvvm_wmma_m16n16k16()
110 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1); in nvvm_wmma_m16n16k16()
113 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0); in nvvm_wmma_m16n16k16()
116 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1); in nvvm_wmma_m16n16k16()
119 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0); in nvvm_wmma_m16n16k16()
122 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1); in nvvm_wmma_m16n16k16()
126 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0); in nvvm_wmma_m16n16k16()
129 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1); in nvvm_wmma_m16n16k16()
132 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0); in nvvm_wmma_m16n16k16()
135 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1); in nvvm_wmma_m16n16k16()
138 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0); in nvvm_wmma_m16n16k16()
141 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1); in nvvm_wmma_m16n16k16()
144 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0); in nvvm_wmma_m16n16k16()
147 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1); in nvvm_wmma_m16n16k16()
151 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0); in nvvm_wmma_m16n16k16()
154 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1); in nvvm_wmma_m16n16k16()
157 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0); in nvvm_wmma_m16n16k16()
160 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1); in nvvm_wmma_m16n16k16()
163 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0); in nvvm_wmma_m16n16k16()
166 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1); in nvvm_wmma_m16n16k16()
169 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0); in nvvm_wmma_m16n16k16()
172 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1); in nvvm_wmma_m16n16k16()
177 __device__ void nvvm_wmma_m32n8k16(int *src, int *dst, in nvvm_wmma_m32n8k16() argument
182 __hmma_m32n8k16_ld_a(dst, src, ldm, 0); in nvvm_wmma_m32n8k16()
185 __hmma_m32n8k16_ld_a(dst, src+1, ldm, 1); in nvvm_wmma_m32n8k16()
189 __hmma_m32n8k16_ld_b(dst, src, ldm, 0); in nvvm_wmma_m32n8k16()
192 __hmma_m32n8k16_ld_b(dst, src+2, ldm, 1); in nvvm_wmma_m32n8k16()
196 __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 0); in nvvm_wmma_m32n8k16()
199 __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 1); in nvvm_wmma_m32n8k16()
210 __hmma_m32n8k16_st_c_f16(dst, src, ldm, 0); in nvvm_wmma_m32n8k16()
213 __hmma_m32n8k16_st_c_f16(dst, src, ldm, 1); in nvvm_wmma_m32n8k16()
224 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 0); in nvvm_wmma_m32n8k16()
227 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 1); in nvvm_wmma_m32n8k16()
230 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 0); in nvvm_wmma_m32n8k16()
233 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 1); in nvvm_wmma_m32n8k16()
236 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 0); in nvvm_wmma_m32n8k16()
239 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 1); in nvvm_wmma_m32n8k16()
242 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 0); in nvvm_wmma_m32n8k16()
245 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 1); in nvvm_wmma_m32n8k16()
249 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 0); in nvvm_wmma_m32n8k16()
252 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 1); in nvvm_wmma_m32n8k16()
255 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 0); in nvvm_wmma_m32n8k16()
258 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 1); in nvvm_wmma_m32n8k16()
261 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 0); in nvvm_wmma_m32n8k16()
264 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 1); in nvvm_wmma_m32n8k16()
267 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 0); in nvvm_wmma_m32n8k16()
270 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 1); in nvvm_wmma_m32n8k16()
274 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 0); in nvvm_wmma_m32n8k16()
277 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 1); in nvvm_wmma_m32n8k16()
280 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 0); in nvvm_wmma_m32n8k16()
283 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 1); in nvvm_wmma_m32n8k16()
286 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 0); in nvvm_wmma_m32n8k16()
289 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 1); in nvvm_wmma_m32n8k16()
292 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 0); in nvvm_wmma_m32n8k16()
295 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 1); in nvvm_wmma_m32n8k16()
299 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 0); in nvvm_wmma_m32n8k16()
302 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 1); in nvvm_wmma_m32n8k16()
305 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 0); in nvvm_wmma_m32n8k16()
308 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 1); in nvvm_wmma_m32n8k16()
311 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 0); in nvvm_wmma_m32n8k16()
314 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 1); in nvvm_wmma_m32n8k16()
317 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 0); in nvvm_wmma_m32n8k16()
320 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 1); in nvvm_wmma_m32n8k16()
327 __hmma_m8n32k16_ld_a(dst, src, ldm, 0); in nvvm_wmma_m32n8k16()
330 __hmma_m8n32k16_ld_a(dst, src+1, ldm, 1); in nvvm_wmma_m32n8k16()
334 __hmma_m8n32k16_ld_b(dst, src, ldm, 0); in nvvm_wmma_m32n8k16()
337 __hmma_m8n32k16_ld_b(dst, src+2, ldm, 1); in nvvm_wmma_m32n8k16()
341 __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 0); in nvvm_wmma_m32n8k16()
344 __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 1); in nvvm_wmma_m32n8k16()
355 __hmma_m8n32k16_st_c_f16(dst, src, ldm, 0); in nvvm_wmma_m32n8k16()
358 __hmma_m8n32k16_st_c_f16(dst, src, ldm, 1); in nvvm_wmma_m32n8k16()
369 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 0); in nvvm_wmma_m32n8k16()
372 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 1); in nvvm_wmma_m32n8k16()
375 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 0); in nvvm_wmma_m32n8k16()
378 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 1); in nvvm_wmma_m32n8k16()
381 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 0); in nvvm_wmma_m32n8k16()
384 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 1); in nvvm_wmma_m32n8k16()
387 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 0); in nvvm_wmma_m32n8k16()
390 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 1); in nvvm_wmma_m32n8k16()
394 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 0); in nvvm_wmma_m32n8k16()
397 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 1); in nvvm_wmma_m32n8k16()
400 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 0); in nvvm_wmma_m32n8k16()
403 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 1); in nvvm_wmma_m32n8k16()
406 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 0); in nvvm_wmma_m32n8k16()
409 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 1); in nvvm_wmma_m32n8k16()
412 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 0); in nvvm_wmma_m32n8k16()
415 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 1); in nvvm_wmma_m32n8k16()
419 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 0); in nvvm_wmma_m32n8k16()
422 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 1); in nvvm_wmma_m32n8k16()
425 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 0); in nvvm_wmma_m32n8k16()
428 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 1); in nvvm_wmma_m32n8k16()
431 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 0); in nvvm_wmma_m32n8k16()
434 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 1); in nvvm_wmma_m32n8k16()
437 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 0); in nvvm_wmma_m32n8k16()
440 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 1); in nvvm_wmma_m32n8k16()
444 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 0); in nvvm_wmma_m32n8k16()
447 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 1); in nvvm_wmma_m32n8k16()
450 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 0); in nvvm_wmma_m32n8k16()
453 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 1); in nvvm_wmma_m32n8k16()
456 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 0); in nvvm_wmma_m32n8k16()
459 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 1); in nvvm_wmma_m32n8k16()
462 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 0); in nvvm_wmma_m32n8k16()
465 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 1); in nvvm_wmma_m32n8k16()