Home
last modified time | relevance | path

Searched refs:vindex_n (Results 1 – 8 of 8) sorted by relevance

/dports/math/openblas/OpenBLAS-0.3.18/kernel/x86_64/
H A Ddgemm_small_kernel_tt_skylakex.c42 _mm512_i64scatter_pd(&C[(j + N*8)*ldc + i + M], vindex_n, result##M##N, 8);
44 _mm512_mask_i64scatter_pd(&C[(j + N*8)*ldc + i + M], mask, vindex_n, result##M##N, 8);
53 __m512d tmp##M##N = _mm512_i64gather_pd(vindex_n, &C[(j + N*8)*ldc + i + M], 8); \
55 _mm512_i64scatter_pd(&C[(j + N*8)*ldc + i + M], vindex_n, result##M##N, 8);
57 …__m512d tmp##M##N = _mm512_mask_i64gather_pd(_mm512_setzero_pd(), mask, vindex_n, &C[(j + N*8)*ldc…
59 _mm512_mask_i64scatter_pd(&C[(j + N*8)*ldc + i + M], mask, vindex_n, result##M##N, 8);
292 __m512i vindex_n = _mm512_loadu_si512(index_n); in CNAME() local
H A Dsgemm_small_kernel_tt_skylakex.c42 _mm512_i32scatter_ps(&C[(j + N*16)*ldc + i + M], vindex_n, result##M##N, 4);
44 _mm512_mask_i32scatter_ps(&C[(j + N*16)*ldc + i + M], mask, vindex_n, result##M##N, 4);
53 __m512 tmp##M##N = _mm512_i32gather_ps(vindex_n, &C[(j + N*16)*ldc + i + M], 4); \
55 _mm512_i32scatter_ps(&C[(j + N*16)*ldc + i + M], vindex_n, result##M##N, 4);
57 …__m512 tmp##M##N = _mm512_mask_i32gather_ps(_mm512_setzero_ps(), mask, vindex_n, &C[(j + N*16)*ldc…
59 _mm512_mask_i32scatter_ps(&C[(j + N*16)*ldc + i + M], mask, vindex_n, result##M##N, 4);
314 __m512i vindex_n = _mm512_loadu_si512(index_n); in CNAME() local
H A Ddgemm_small_kernel_nt_skylakex.c48 _mm512_i64scatter_pd(&C[(j + N*8)*ldc + i + M], vindex_n, result##M##N, 8);
50 _mm512_mask_i64scatter_pd(&C[(j + N*8)*ldc + i + M], mask, vindex_n, result##M##N, 8)
61 __m512d tmp##M##N = _mm512_i64gather_pd(vindex_n, &C[(j + N*8)*ldc + i + M], 8); \
63 _mm512_i64scatter_pd(&C[(j + N*8)*ldc + i + M], vindex_n, result##M##N, 8);
65 …__m512d tmp##M##N = _mm512_mask_i64gather_pd(_mm512_setzero_pd(), mask, vindex_n, &C[(j + N*8)*ldc…
67 _mm512_mask_i64scatter_pd(&C[(j + N*8)*ldc + i + M], mask, vindex_n, result##M##N, 8);
388 __m512i vindex_n = _mm512_loadu_si512(index_n); in CNAME() local
H A Dsgemm_small_kernel_nt_skylakex.c48 _mm512_i32scatter_ps(&C[(j + N*16)*ldc + i + M], vindex_n, result##M##N, 4);
50 _mm512_mask_i32scatter_ps(&C[(j + N*16)*ldc + i + M], mask, vindex_n, result##M##N, 4)
61 __m512 tmp##M##N = _mm512_i32gather_ps(vindex_n, &C[(j + N*16)*ldc + i + M], 4); \
63 _mm512_i32scatter_ps(&C[(j + N*16)*ldc + i + M], vindex_n, result##M##N, 4);
65 …__m512 tmp##M##N = _mm512_mask_i32gather_ps(_mm512_setzero_ps(), mask, vindex_n, &C[(j + N*16)*ldc…
67 _mm512_mask_i32scatter_ps(&C[(j + N*16)*ldc + i + M], mask, vindex_n, result##M##N, 4);
388 __m512i vindex_n = _mm512_loadu_si512(index_n); in CNAME() local
H A Dsgemm_small_kernel_tn_skylakex.c59 #define STORE_N4(M, s0) _mm_i32scatter_ps(&C[j*ldc + i + M], vindex_n, s0, 4);
67 s0 = _mm_fmadd_ps(_mm_i32gather_ps(&C[j*ldc + i + M], vindex_n, 4), beta_128, s0); \
68 _mm_i32scatter_ps(&C[j*ldc + i + M], vindex_n, s0, 4);
100 __m128i vindex_n = _mm_set_epi32(ldc*3, ldc*2, ldc, 0); in CNAME() local
H A Ddgemm_small_kernel_tn_skylakex.c58 #define STORE_N4(M, s0) _mm256_i64scatter_pd(&C[j*ldc + i + M], vindex_n, s0, 8);
66 s0 = _mm256_fmadd_pd(_mm256_i64gather_pd(&C[j*ldc + i + M], vindex_n, 8), beta_256, s0); \
67 _mm256_i64scatter_pd(&C[j*ldc + i + M], vindex_n, s0, 8);
98 __m256i vindex_n = _mm256_set_epi64x(ldc*3, ldc*2, ldc, 0); in CNAME() local
H A Ddgemm_small_kernel_nn_skylakex.c78 _mm256_i64scatter_pd(&C[j*ldc + i + M], vindex_n, s0, 8); \
89 s1 = _mm256_i64gather_pd(&C[j*ldc + i + M], vindex_n, 8); \
91 _mm256_i64scatter_pd(&C[j*ldc + i + M], vindex_n, s0, 8); \
370 __m256i vindex_n = _mm256_set_epi64x(ldc*3, ldc*2, ldc*1, 0); in CNAME() local
H A Dsgemm_small_kernel_nn_skylakex.c79 _mm_i32scatter_ps(&C[j*ldc + i + M], vindex_n, s0, 4); \
90 s1 = _mm_i32gather_ps(&C[j*ldc + i + M], vindex_n, 4); \
92 _mm_i32scatter_ps(&C[j*ldc + i + M], vindex_n, s0, 4); \
398 __m128i vindex_n = _mm_set_epi32(ldc*3, ldc*2, ldc, 0); in CNAME() local