/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | deconvolution_4x4.h | 62 float* outptr2 = outptr1 + outw; in deconv4x4s1_neon() local 63 float* outptr3 = outptr2 + outw; in deconv4x4s1_neon() 143 outptr2 += 4; in deconv4x4s1_neon() 163 outptr2[0] += val * k2[0]; in deconv4x4s1_neon() 164 outptr2[1] += val * k2[1]; in deconv4x4s1_neon() 165 outptr2[2] += val * k2[2]; in deconv4x4s1_neon() 166 outptr2[3] += val * k2[3]; in deconv4x4s1_neon() 176 outptr2++; in deconv4x4s1_neon() 274 vst2q_f32(outptr2, _out2); in deconv4x4s2_neon() 295 outptr2 += 8; in deconv4x4s2_neon() [all …]
|
H A D | deconvolution_3x3.h | 206 outptr2 += 4; in deconv3x3s1_neon() 222 outptr2[0] += val * k2[0]; in deconv3x3s1_neon() 223 outptr2[1] += val * k2[1]; in deconv3x3s1_neon() 224 outptr2[2] += val * k2[2]; in deconv3x3s1_neon() 229 outptr2++; in deconv3x3s1_neon() 325 vst2q_f32(outptr2, _out2); in deconv3x3s2_neon() 334 outptr2 += 8; in deconv3x3s2_neon() 350 outptr2[0] += val * k2[0]; in deconv3x3s2_neon() 351 outptr2[1] += val * k2[1]; in deconv3x3s2_neon() 352 outptr2[2] += val * k2[2]; in deconv3x3s2_neon() [all …]
|
H A D | flatten_arm.cpp | 121 outptr2 += 4; in forward() 129 *outptr2++ = ptr[2]; in forward() 164 outptr2 += 4; in forward() 172 *outptr2++ = ptr[2]; in forward() 294 outptr2 += 4; in forward_bf16s_fp16s() 305 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() 342 outptr2 += 4; in forward_bf16s_fp16s() 350 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() 397 outptr2 += 4; in forward_bf16s_fp16s() 408 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() [all …]
|
H A D | deconvolution_4x4_fp16s.h | 60 __fp16* outptr2 = outptr1 + outw; in deconv4x4s2_fp16sa_neon() local 61 __fp16* outptr3 = outptr2 + outw; in deconv4x4s2_fp16sa_neon() 102 vst2_f16(outptr2, _out2); in deconv4x4s2_fp16sa_neon() 104 _out2 = vld2_f16(outptr2 + 2); in deconv4x4s2_fp16sa_neon() 107 vst2_f16(outptr2 + 2, _out2); in deconv4x4s2_fp16sa_neon() 123 outptr2 += 8; in deconv4x4s2_fp16sa_neon() 140 outptr2[0] += val * k2[0]; in deconv4x4s2_fp16sa_neon() 141 outptr2[1] += val * k2[1]; in deconv4x4s2_fp16sa_neon() 142 outptr2[2] += val * k2[2]; in deconv4x4s2_fp16sa_neon() 143 outptr2[3] += val * k2[3]; in deconv4x4s2_fp16sa_neon() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | deconvolution_4x4.h | 62 float* outptr2 = outptr1 + outw; in deconv4x4s1_neon() local 63 float* outptr3 = outptr2 + outw; in deconv4x4s1_neon() 143 outptr2 += 4; in deconv4x4s1_neon() 163 outptr2[0] += val * k2[0]; in deconv4x4s1_neon() 164 outptr2[1] += val * k2[1]; in deconv4x4s1_neon() 165 outptr2[2] += val * k2[2]; in deconv4x4s1_neon() 166 outptr2[3] += val * k2[3]; in deconv4x4s1_neon() 176 outptr2++; in deconv4x4s1_neon() 274 vst2q_f32(outptr2, _out2); in deconv4x4s2_neon() 295 outptr2 += 8; in deconv4x4s2_neon() [all …]
|
H A D | deconvolution_3x3.h | 206 outptr2 += 4; in deconv3x3s1_neon() 222 outptr2[0] += val * k2[0]; in deconv3x3s1_neon() 223 outptr2[1] += val * k2[1]; in deconv3x3s1_neon() 224 outptr2[2] += val * k2[2]; in deconv3x3s1_neon() 229 outptr2++; in deconv3x3s1_neon() 325 vst2q_f32(outptr2, _out2); in deconv3x3s2_neon() 334 outptr2 += 8; in deconv3x3s2_neon() 350 outptr2[0] += val * k2[0]; in deconv3x3s2_neon() 351 outptr2[1] += val * k2[1]; in deconv3x3s2_neon() 352 outptr2[2] += val * k2[2]; in deconv3x3s2_neon() [all …]
|
H A D | flatten_arm.cpp | 121 outptr2 += 4; in forward() 129 *outptr2++ = ptr[2]; in forward() 164 outptr2 += 4; in forward() 172 *outptr2++ = ptr[2]; in forward() 294 outptr2 += 4; in forward_bf16s_fp16s() 305 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() 342 outptr2 += 4; in forward_bf16s_fp16s() 350 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() 397 outptr2 += 4; in forward_bf16s_fp16s() 408 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() [all …]
|
H A D | deconvolution_4x4_fp16s.h | 60 __fp16* outptr2 = outptr1 + outw; in deconv4x4s2_fp16sa_neon() local 61 __fp16* outptr3 = outptr2 + outw; in deconv4x4s2_fp16sa_neon() 102 vst2_f16(outptr2, _out2); in deconv4x4s2_fp16sa_neon() 104 _out2 = vld2_f16(outptr2 + 2); in deconv4x4s2_fp16sa_neon() 107 vst2_f16(outptr2 + 2, _out2); in deconv4x4s2_fp16sa_neon() 123 outptr2 += 8; in deconv4x4s2_fp16sa_neon() 140 outptr2[0] += val * k2[0]; in deconv4x4s2_fp16sa_neon() 141 outptr2[1] += val * k2[1]; in deconv4x4s2_fp16sa_neon() 142 outptr2[2] += val * k2[2]; in deconv4x4s2_fp16sa_neon() 143 outptr2[3] += val * k2[3]; in deconv4x4s2_fp16sa_neon() [all …]
|
H A D | convolution_1x1_bf16s.h | 501 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 667 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 801 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 988 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1140 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1265 outptr2 += 8; in conv1x1s1_sgemm_bf16s_neon() 1383 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1493 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1570 outptr2 += 4; in conv1x1s1_sgemm_bf16s_neon() 1670 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | deconvolution_4x4.h | 62 float* outptr2 = outptr1 + outw; in deconv4x4s1_neon() local 63 float* outptr3 = outptr2 + outw; in deconv4x4s1_neon() 143 outptr2 += 4; in deconv4x4s1_neon() 163 outptr2[0] += val * k2[0]; in deconv4x4s1_neon() 164 outptr2[1] += val * k2[1]; in deconv4x4s1_neon() 165 outptr2[2] += val * k2[2]; in deconv4x4s1_neon() 166 outptr2[3] += val * k2[3]; in deconv4x4s1_neon() 176 outptr2++; in deconv4x4s1_neon() 274 vst2q_f32(outptr2, _out2); in deconv4x4s2_neon() 295 outptr2 += 8; in deconv4x4s2_neon() [all …]
|
H A D | deconvolution_3x3.h | 206 outptr2 += 4; in deconv3x3s1_neon() 222 outptr2[0] += val * k2[0]; in deconv3x3s1_neon() 223 outptr2[1] += val * k2[1]; in deconv3x3s1_neon() 224 outptr2[2] += val * k2[2]; in deconv3x3s1_neon() 229 outptr2++; in deconv3x3s1_neon() 325 vst2q_f32(outptr2, _out2); in deconv3x3s2_neon() 334 outptr2 += 8; in deconv3x3s2_neon() 350 outptr2[0] += val * k2[0]; in deconv3x3s2_neon() 351 outptr2[1] += val * k2[1]; in deconv3x3s2_neon() 352 outptr2[2] += val * k2[2]; in deconv3x3s2_neon() [all …]
|
H A D | flatten_arm.cpp | 121 outptr2 += 4; in forward() 129 *outptr2++ = ptr[2]; in forward() 164 outptr2 += 4; in forward() 172 *outptr2++ = ptr[2]; in forward() 294 outptr2 += 4; in forward_bf16s_fp16s() 305 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() 342 outptr2 += 4; in forward_bf16s_fp16s() 350 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() 397 outptr2 += 4; in forward_bf16s_fp16s() 408 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() [all …]
|
H A D | deconvolution_4x4_fp16s.h | 60 __fp16* outptr2 = outptr1 + outw; in deconv4x4s2_fp16sa_neon() local 61 __fp16* outptr3 = outptr2 + outw; in deconv4x4s2_fp16sa_neon() 102 vst2_f16(outptr2, _out2); in deconv4x4s2_fp16sa_neon() 104 _out2 = vld2_f16(outptr2 + 2); in deconv4x4s2_fp16sa_neon() 107 vst2_f16(outptr2 + 2, _out2); in deconv4x4s2_fp16sa_neon() 123 outptr2 += 8; in deconv4x4s2_fp16sa_neon() 140 outptr2[0] += val * k2[0]; in deconv4x4s2_fp16sa_neon() 141 outptr2[1] += val * k2[1]; in deconv4x4s2_fp16sa_neon() 142 outptr2[2] += val * k2[2]; in deconv4x4s2_fp16sa_neon() 143 outptr2[3] += val * k2[3]; in deconv4x4s2_fp16sa_neon() [all …]
|
H A D | convolution_1x1_bf16s.h | 501 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 667 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 801 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 988 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1140 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1265 outptr2 += 8; in conv1x1s1_sgemm_bf16s_neon() 1383 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1493 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1570 outptr2 += 4; in conv1x1s1_sgemm_bf16s_neon() 1670 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | deconvolution_4x4.h | 62 float* outptr2 = outptr1 + outw; in deconv4x4s1_neon() local 63 float* outptr3 = outptr2 + outw; in deconv4x4s1_neon() 143 outptr2 += 4; in deconv4x4s1_neon() 163 outptr2[0] += val * k2[0]; in deconv4x4s1_neon() 164 outptr2[1] += val * k2[1]; in deconv4x4s1_neon() 165 outptr2[2] += val * k2[2]; in deconv4x4s1_neon() 166 outptr2[3] += val * k2[3]; in deconv4x4s1_neon() 176 outptr2++; in deconv4x4s1_neon() 274 vst2q_f32(outptr2, _out2); in deconv4x4s2_neon() 295 outptr2 += 8; in deconv4x4s2_neon() [all …]
|
H A D | deconvolution_3x3.h | 206 outptr2 += 4; in deconv3x3s1_neon() 222 outptr2[0] += val * k2[0]; in deconv3x3s1_neon() 223 outptr2[1] += val * k2[1]; in deconv3x3s1_neon() 224 outptr2[2] += val * k2[2]; in deconv3x3s1_neon() 229 outptr2++; in deconv3x3s1_neon() 325 vst2q_f32(outptr2, _out2); in deconv3x3s2_neon() 334 outptr2 += 8; in deconv3x3s2_neon() 350 outptr2[0] += val * k2[0]; in deconv3x3s2_neon() 351 outptr2[1] += val * k2[1]; in deconv3x3s2_neon() 352 outptr2[2] += val * k2[2]; in deconv3x3s2_neon() [all …]
|
H A D | flatten_arm.cpp | 126 outptr2 += 4; in forward() 134 *outptr2++ = ptr[2]; in forward() 169 outptr2 += 4; in forward() 177 *outptr2++ = ptr[2]; in forward() 300 outptr2 += 4; in forward_bf16s_fp16s() 311 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() 348 outptr2 += 4; in forward_bf16s_fp16s() 356 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() 403 outptr2 += 4; in forward_bf16s_fp16s() 414 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() [all …]
|
H A D | deconvolution_4x4_fp16s.h | 60 __fp16* outptr2 = outptr1 + outw; in deconv4x4s2_fp16sa_neon() local 61 __fp16* outptr3 = outptr2 + outw; in deconv4x4s2_fp16sa_neon() 102 vst2_f16(outptr2, _out2); in deconv4x4s2_fp16sa_neon() 104 _out2 = vld2_f16(outptr2 + 2); in deconv4x4s2_fp16sa_neon() 107 vst2_f16(outptr2 + 2, _out2); in deconv4x4s2_fp16sa_neon() 123 outptr2 += 8; in deconv4x4s2_fp16sa_neon() 140 outptr2[0] += val * k2[0]; in deconv4x4s2_fp16sa_neon() 141 outptr2[1] += val * k2[1]; in deconv4x4s2_fp16sa_neon() 142 outptr2[2] += val * k2[2]; in deconv4x4s2_fp16sa_neon() 143 outptr2[3] += val * k2[3]; in deconv4x4s2_fp16sa_neon() [all …]
|
H A D | convolution_1x1_bf16s.h | 501 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 667 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 801 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 988 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1140 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1265 outptr2 += 8; in conv1x1s1_sgemm_bf16s_neon() 1383 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1493 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() 1570 outptr2 += 4; in conv1x1s1_sgemm_bf16s_neon() 1670 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | deconvolution_4x4.h | 62 float* outptr2 = outptr1 + outw; in deconv4x4s1_neon() local 63 float* outptr3 = outptr2 + outw; in deconv4x4s1_neon() 143 outptr2 += 4; in deconv4x4s1_neon() 163 outptr2[0] += val * k2[0]; in deconv4x4s1_neon() 164 outptr2[1] += val * k2[1]; in deconv4x4s1_neon() 165 outptr2[2] += val * k2[2]; in deconv4x4s1_neon() 166 outptr2[3] += val * k2[3]; in deconv4x4s1_neon() 176 outptr2++; in deconv4x4s1_neon() 274 vst2q_f32(outptr2, _out2); in deconv4x4s2_neon() 295 outptr2 += 8; in deconv4x4s2_neon() [all …]
|
H A D | deconvolution_3x3.h | 206 outptr2 += 4; in deconv3x3s1_neon() 222 outptr2[0] += val * k2[0]; in deconv3x3s1_neon() 223 outptr2[1] += val * k2[1]; in deconv3x3s1_neon() 224 outptr2[2] += val * k2[2]; in deconv3x3s1_neon() 229 outptr2++; in deconv3x3s1_neon() 325 vst2q_f32(outptr2, _out2); in deconv3x3s2_neon() 334 outptr2 += 8; in deconv3x3s2_neon() 350 outptr2[0] += val * k2[0]; in deconv3x3s2_neon() 351 outptr2[1] += val * k2[1]; in deconv3x3s2_neon() 352 outptr2[2] += val * k2[2]; in deconv3x3s2_neon() [all …]
|
H A D | flatten_arm.cpp | 118 outptr2 += 4; in forward() 126 *outptr2++ = ptr[2]; in forward() 161 outptr2 += 4; in forward() 169 *outptr2++ = ptr[2]; in forward() 291 outptr2 += 4; in forward_bf16s_fp16s() 302 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() 339 outptr2 += 4; in forward_bf16s_fp16s() 347 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() 394 outptr2 += 4; in forward_bf16s_fp16s() 405 *outptr2++ = ptr[2]; in forward_bf16s_fp16s() [all …]
|
H A D | deconvolution_4x4_fp16s.h | 60 __fp16* outptr2 = outptr1 + outw; in deconv4x4s2_fp16sa_neon() local 61 __fp16* outptr3 = outptr2 + outw; in deconv4x4s2_fp16sa_neon() 102 vst2_f16(outptr2, _out2); in deconv4x4s2_fp16sa_neon() 104 _out2 = vld2_f16(outptr2 + 2); in deconv4x4s2_fp16sa_neon() 107 vst2_f16(outptr2 + 2, _out2); in deconv4x4s2_fp16sa_neon() 123 outptr2 += 8; in deconv4x4s2_fp16sa_neon() 140 outptr2[0] += val * k2[0]; in deconv4x4s2_fp16sa_neon() 141 outptr2[1] += val * k2[1]; in deconv4x4s2_fp16sa_neon() 142 outptr2[2] += val * k2[2]; in deconv4x4s2_fp16sa_neon() 143 outptr2[3] += val * k2[3]; in deconv4x4s2_fp16sa_neon() [all …]
|
H A D | convolution_1x1.h | 456 "2"(outptr2), in conv1x1s1_sgemm_neon() 593 "2"(outptr2), in conv1x1s1_sgemm_neon() 1119 outptr2 += 8; in conv1x1s1_sgemm_neon() 1390 outptr2 += 4; in conv1x1s1_sgemm_neon() 1590 outptr2++; in conv1x1s1_sgemm_neon() 2555 outptr2++; in conv1x1s1_neon() 2676 outptr2++; in conv1x1s1_neon() 2914 outptr2++; in conv1x1s1_neon() 3069 outptr2++; in conv1x1s1_neon() 3400 outptr2++; in conv1x1s1_neon() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | flatten_x86.cpp | 121 _mm256_storeu_ps(outptr2, _row2); in forward() 130 outptr2 += 8; in forward() 142 *outptr2++ = ptr[2]; in forward() 178 _mm_storeu_ps(outptr2, _row2); in forward() 184 outptr2 += 4; in forward() 191 *outptr2++ = ptr[2]; in forward() 245 outptr2 += 8; in forward() 257 *outptr2++ = ptr[2]; in forward() 293 _mm_storeu_ps(outptr2, _row2); in forward() 299 outptr2 += 4; in forward() [all …]
|