/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | packing_x86.cpp | 179 __m256 _row4 = _mm256_loadu_ps(r4); in forward() local 183 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 188 _mm256_storeu_ps(outptr + 32, _row4); in forward() 250 __m256 _row4 = _mm256_loadu_ps(r0 + 32); in forward() local 254 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 259 _mm256_storeu_ps(outptr4, _row4); in forward() 434 __m256 _row4 = _mm256_loadu_ps(r4); in forward() local 438 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 443 _mm256_storeu_ps(outptr + 32, _row4); in forward() 502 __m256 _row4 = _mm256_loadu_ps(r0 + 32); in forward() local [all …]
|
H A D | flatten_x86.cpp | 112 __m256 _row4 = _mm256_loadu_ps(ptr + 32); in forward() local 117 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 123 _mm256_storeu_ps(outptr4, _row4); in forward() 227 __m256 _row4 = _mm256_loadu_ps(ptr + 32); in forward() local 232 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 238 _mm256_storeu_ps(outptr4, _row4); in forward()
|
H A D | reshape_x86.cpp | 181 __m256 _row4 = _mm256_loadu_ps(ptr4); in forward() local 186 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 192 _mm256_storeu_ps(outptr + 32, _row4); in forward() 352 __m256 _row4 = _mm256_loadu_ps(ptr4); in forward() local 357 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 363 _mm256_storeu_ps(outptr + 32, _row4); in forward()
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | packing_x86.cpp | 180 __m256 _row4 = _mm256_loadu_ps(r4); in forward() local 184 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 189 _mm256_storeu_ps(outptr + 32, _row4); in forward() 251 __m256 _row4 = _mm256_loadu_ps(r0 + 32); in forward() local 255 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 260 _mm256_storeu_ps(outptr4, _row4); in forward() 435 __m256 _row4 = _mm256_loadu_ps(r4); in forward() local 439 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 444 _mm256_storeu_ps(outptr + 32, _row4); in forward() 503 __m256 _row4 = _mm256_loadu_ps(r0 + 32); in forward() local [all …]
|
H A D | flatten_x86.cpp | 116 __m256 _row4 = _mm256_loadu_ps(ptr + 32); in forward() local 121 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 127 _mm256_storeu_ps(outptr4, _row4); in forward() 231 __m256 _row4 = _mm256_loadu_ps(ptr + 32); in forward() local 236 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 242 _mm256_storeu_ps(outptr4, _row4); in forward()
|
H A D | reshape_x86.cpp | 180 __m256 _row4 = _mm256_loadu_ps(ptr4); in forward() local 185 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 191 _mm256_storeu_ps(outptr + 32, _row4); in forward() 351 __m256 _row4 = _mm256_loadu_ps(ptr4); in forward() local 356 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 362 _mm256_storeu_ps(outptr + 32, _row4); in forward()
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | packing_x86.cpp | 180 __m256 _row4 = _mm256_loadu_ps(r4); in forward() local 184 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 189 _mm256_storeu_ps(outptr + 32, _row4); in forward() 251 __m256 _row4 = _mm256_loadu_ps(r0 + 32); in forward() local 255 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 260 _mm256_storeu_ps(outptr4, _row4); in forward() 435 __m256 _row4 = _mm256_loadu_ps(r4); in forward() local 439 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 444 _mm256_storeu_ps(outptr + 32, _row4); in forward() 503 __m256 _row4 = _mm256_loadu_ps(r0 + 32); in forward() local [all …]
|
H A D | flatten_x86.cpp | 116 __m256 _row4 = _mm256_loadu_ps(ptr + 32); in forward() local 121 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 127 _mm256_storeu_ps(outptr4, _row4); in forward() 231 __m256 _row4 = _mm256_loadu_ps(ptr + 32); in forward() local 236 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 242 _mm256_storeu_ps(outptr4, _row4); in forward()
|
H A D | reshape_x86.cpp | 180 __m256 _row4 = _mm256_loadu_ps(ptr4); in forward() local 185 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 191 _mm256_storeu_ps(outptr + 32, _row4); in forward() 351 __m256 _row4 = _mm256_loadu_ps(ptr4); in forward() local 356 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 362 _mm256_storeu_ps(outptr + 32, _row4); in forward()
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | packing_x86.cpp | 180 __m256 _row4 = _mm256_loadu_ps(r4); in forward() local 184 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 189 _mm256_storeu_ps(outptr + 32, _row4); in forward() 251 __m256 _row4 = _mm256_loadu_ps(r0 + 32); in forward() local 255 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 260 _mm256_storeu_ps(outptr4, _row4); in forward() 435 __m256 _row4 = _mm256_loadu_ps(r4); in forward() local 439 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 444 _mm256_storeu_ps(outptr + 32, _row4); in forward() 503 __m256 _row4 = _mm256_loadu_ps(r0 + 32); in forward() local [all …]
|
H A D | reshape_x86.cpp | 180 __m256 _row4 = _mm256_loadu_ps(ptr4); in forward() local 185 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 191 _mm256_storeu_ps(outptr + 32, _row4); in forward() 351 __m256 _row4 = _mm256_loadu_ps(ptr4); in forward() local 356 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 362 _mm256_storeu_ps(outptr + 32, _row4); in forward()
|
H A D | flatten_x86.cpp | 116 __m256 _row4 = _mm256_loadu_ps(ptr + 32); in forward() local 121 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 127 _mm256_storeu_ps(outptr4, _row4); in forward() 231 __m256 _row4 = _mm256_loadu_ps(ptr + 32); in forward() local 236 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 242 _mm256_storeu_ps(outptr4, _row4); in forward()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | packing_x86.cpp | 181 __m256 _row4 = _mm256_loadu_ps(r4); in forward() local 185 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 190 _mm256_storeu_ps(outptr + 32, _row4); in forward() 252 __m256 _row4 = _mm256_loadu_ps(r0 + 32); in forward() local 256 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 261 _mm256_storeu_ps(outptr4, _row4); in forward() 439 __m256 _row4 = _mm256_loadu_ps(r4); in forward() local 443 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 448 _mm256_storeu_ps(outptr + 32, _row4); in forward() 507 __m256 _row4 = _mm256_loadu_ps(r0 + 32); in forward() local [all …]
|
H A D | flatten_x86.cpp | 117 __m256 _row4 = _mm256_loadu_ps(ptr + 32); in forward() local 122 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 128 _mm256_storeu_ps(outptr4, _row4); in forward() 232 __m256 _row4 = _mm256_loadu_ps(ptr + 32); in forward() local 237 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 243 _mm256_storeu_ps(outptr4, _row4); in forward()
|
H A D | reshape_x86.cpp | 180 __m256 _row4 = _mm256_loadu_ps(ptr4); in forward() local 185 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 191 _mm256_storeu_ps(outptr + 32, _row4); in forward() 390 __m256 _row4 = _mm256_loadu_ps(ptr4); in forward() local 395 transpose8_ps(_row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7); in forward() 401 _mm256_storeu_ps(outptr + 32, _row4); in forward()
|
/dports/biology/gmap/gmap-2020-09-12/src/ |
H A D | oligoindex_hr.c | 11033 _t2 = _mm_unpackhi_epi32(_row4,_row5); in store_9mers_fwd_simd_64() 11053 _t2 = _mm_unpacklo_epi32(_row4,_row5); in store_9mers_fwd_simd_64() 11149 _t2 = _mm256_unpackhi_epi32(_row4,_row5); in store_9mers_fwd_simd_128() 11168 _t2 = _mm256_unpacklo_epi32(_row4,_row5); in store_9mers_fwd_simd_128() 11282 _t2 = _mm512_unpackhi_epi32(_row4,_row5); in store_9mers_fwd_simd_256() 11301 _t2 = _mm512_unpacklo_epi32(_row4,_row5); in store_9mers_fwd_simd_256() 22473 _t2 = _mm_unpackhi_epi32(_row4,_row5); in store_9mers_rev_simd_64() 22493 _t2 = _mm_unpacklo_epi32(_row4,_row5); in store_9mers_rev_simd_64() 22588 _t2 = _mm256_unpackhi_epi32(_row4,_row5); in store_9mers_rev_simd_128() 22608 _t2 = _mm256_unpacklo_epi32(_row4,_row5); in store_9mers_rev_simd_128() [all …]
|