/dports/math/py-numpy/numpy-1.20.3/numpy/core/src/common/simd/avx512/ |
H A D | memory.h | 160 assert(nlane > 0); in npyv_load_till_s32() 162 const __mmask16 mask = nlane > 31 ? -1 : (1 << nlane) - 1; in npyv_load_till_s32() 169 const __mmask16 mask = nlane > 31 ? -1 : (1 << nlane) - 1; in npyv_load_tillz_s32() 177 const __mmask8 mask = nlane > 31 ? -1 : (1 << nlane) - 1; in npyv_load_till_s64() 184 const __mmask8 mask = nlane > 15 ? -1 : (1 << nlane) - 1; in npyv_load_tillz_s64() 201 const __mmask16 mask = nlane > 31 ? -1 : (1 << nlane) - 1; in npyv_loadn_till_s32() 218 const __mmask8 mask = nlane > 31 ? -1 : (1 << nlane) - 1; in npyv_loadn_till_s64() 232 const __mmask16 mask = nlane > 31 ? -1 : (1 << nlane) - 1; in npyv_store_till_s32() 239 const __mmask8 mask = nlane > 15 ? -1 : (1 << nlane) - 1; in npyv_store_till_s64() 254 const __mmask16 mask = nlane > 31 ? -1 : (1 << nlane) - 1; in npyv_storen_till_s32() [all …]
|
/dports/math/py-numpy/numpy-1.20.3/numpy/core/src/common/simd/avx2/ |
H A D | memory.h | 152 assert(nlane > 0); in npyv_load_till_s32() 155 __m256i vnlane = _mm256_set1_epi32(nlane > 8 ? 8 : (int)nlane); in npyv_load_till_s32() 165 __m256i vnlane = _mm256_set1_epi32(nlane > 8 ? 8 : (int)nlane); in npyv_load_tillz_s32() 175 __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane); in npyv_load_till_s64() 185 __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane); in npyv_load_tillz_s64() 201 __m256i vnlane = _mm256_set1_epi32(nlane > 8 ? 8 : (int)nlane); in npyv_loadn_till_s32() 217 __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane); in npyv_loadn_till_s64() 233 __m256i vnlane = _mm256_set1_epi32(nlane > 8 ? 8 : (int)nlane); in npyv_store_till_s32() 242 __m256i vnlane = _mm256_set1_epi64x(nlane > 8 ? 8 : (int)nlane); in npyv_store_till_s64() 255 switch(nlane) { in npyv_storen_till_s32() [all …]
|
/dports/math/py-numpy/numpy-1.20.3/numpy/core/src/common/simd/sse/ |
H A D | memory.h | 155 if (nlane > 3) { in npyv_load_till_s32() 169 switch(nlane) { in npyv_load_till_s32() 202 switch(nlane) { in npyv_load_tillz_s32() 223 if (nlane <= 2) { in npyv_load_till_s64() 231 if (nlane == 1) { in npyv_load_till_s64() 244 if (nlane == 1) { in npyv_load_tillz_s64() 258 if (nlane > 3) { in npyv_loadn_till_s32() 271 switch(nlane) { in npyv_loadn_till_s32() 304 switch(nlane) { in npyv_loadn_tillz_s32() 368 switch(nlane) { in npyv_store_till_s32() [all …]
|
/dports/math/py-numpy/numpy-1.20.3/numpy/core/src/common/simd/vsx/ |
H A D | memory.h | 150 assert(nlane > 0); in npyv_load_till_s32() 152 switch(nlane) { in npyv_load_till_s32() 174 assert(nlane > 0); in npyv_load_till_s64() 175 if (nlane == 1) { in npyv_load_till_s64() 190 assert(nlane > 0); in npyv_loadn_till_s32() 192 switch(nlane) { in npyv_loadn_till_s32() 214 if (nlane == 1) { in npyv_loadn_till_s64() 229 switch(nlane) { in npyv_store_till_s32() 248 if (nlane == 1) { in npyv_store_till_s64() 261 switch(nlane) { in npyv_storen_till_s32() [all …]
|
/dports/math/py-numpy/numpy-1.20.3/numpy/core/src/common/simd/neon/ |
H A D | memory.h | 140 assert(nlane > 0); in npyv_load_till_s32() 141 switch(nlane) { in npyv_load_till_s32() 161 assert(nlane > 0); in npyv_load_till_s64() 162 if (nlane == 1) { in npyv_load_till_s64() 178 assert(nlane > 0); in npyv_loadn_till_s32() 180 switch(nlane) { in npyv_loadn_till_s32() 200 if (nlane == 1) { in npyv_loadn_till_s64() 216 switch(nlane) { in npyv_store_till_s32() 235 if (nlane == 1) { in npyv_store_till_s64() 248 switch(nlane) { in npyv_storen_till_s32() [all …]
|
/dports/science/simint/simint-v0.8/simint/vectorization/ |
H A D | intrinsics_scalar.h | 104 double mask_load(int nlane, double * memaddr) in mask_load() argument 106 if(nlane == 1) in mask_load()
|
H A D | intrinsics_sse.h | 155 __m128d mask_load(int nlane, double * memaddr) in mask_load() argument 158 for(int n = nlane; n < SIMINT_SIMD_LEN; n++) in mask_load()
|
H A D | intrinsics_avx512.h | 177 __m512d mask_load(int nlane, double * memaddr) in mask_load() argument 180 for(int n = nlane; n < SIMINT_SIMD_LEN; n++) in mask_load()
|
H A D | intrinsics_avx.h | 211 __m256d mask_load(int nlane, double * memaddr) in mask_load() argument 214 for(int n = nlane; n < SIMINT_SIMD_LEN; n++) in mask_load()
|
/dports/science/simint/simint-v0.8/simint/ostei/gen/ |
H A D | ostei_s_s_s_s.c | 91 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_s_s_s_s() local 161 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_s_s_s_s()
|
H A D | ostei_s_s_p_s.c | 92 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_s_s_p_s() local 171 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_s_s_p_s()
|
H A D | ostei_s_s_s_p.c | 92 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_s_s_s_p() local 171 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_s_s_s_p()
|
H A D | ostei_p_s_s_s.c | 93 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_p_s_s_s() local 170 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_p_s_s_s()
|
H A D | ostei_s_p_s_s.c | 93 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_s_p_s_s() local 170 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_s_p_s_s()
|
H A D | ostei_s_s_s_d.c | 93 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_s_s_s_d() local 172 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_s_s_s_d()
|
H A D | ostei_d_s_s_s.c | 94 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_d_s_s_s() local 171 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_d_s_s_s()
|
H A D | ostei_s_s_d_s.c | 93 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_s_s_d_s() local 172 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_s_s_d_s()
|
H A D | ostei_s_d_s_s.c | 94 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_s_d_s_s() local 171 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_s_d_s_s()
|
H A D | ostei_p_s_s_p.c | 94 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_p_s_s_p() local 180 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_p_s_s_p()
|
H A D | ostei_p_s_p_s.c | 94 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_p_s_p_s() local 180 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_p_s_p_s()
|
H A D | ostei_s_p_s_p.c | 94 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_s_p_s_p() local 180 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_s_p_s_p()
|
H A D | ostei_s_p_p_s.c | 94 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_s_p_p_s() local 180 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_s_p_p_s()
|
H A D | ostei_f_s_s_s.c | 96 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_f_s_s_s() local 173 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_f_s_s_s()
|
H A D | ostei_p_p_s_s.c | 101 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_p_p_s_s() local 180 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_p_p_s_s()
|
H A D | ostei_s_f_s_s.c | 96 … const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j)); in ostei_s_f_s_s() local 173 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j); in ostei_s_f_s_s()
|