Home
last modified time | relevance | path

Searched refs:outptr2 (Results 1 – 25 of 435) sorted by relevance

12345678910>>...18

/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Ddeconvolution_4x4.h62 float* outptr2 = outptr1 + outw; in deconv4x4s1_neon() local
63 float* outptr3 = outptr2 + outw; in deconv4x4s1_neon()
143 outptr2 += 4; in deconv4x4s1_neon()
163 outptr2[0] += val * k2[0]; in deconv4x4s1_neon()
164 outptr2[1] += val * k2[1]; in deconv4x4s1_neon()
165 outptr2[2] += val * k2[2]; in deconv4x4s1_neon()
166 outptr2[3] += val * k2[3]; in deconv4x4s1_neon()
176 outptr2++; in deconv4x4s1_neon()
274 vst2q_f32(outptr2, _out2); in deconv4x4s2_neon()
295 outptr2 += 8; in deconv4x4s2_neon()
[all …]
H A Ddeconvolution_3x3.h206 outptr2 += 4; in deconv3x3s1_neon()
222 outptr2[0] += val * k2[0]; in deconv3x3s1_neon()
223 outptr2[1] += val * k2[1]; in deconv3x3s1_neon()
224 outptr2[2] += val * k2[2]; in deconv3x3s1_neon()
229 outptr2++; in deconv3x3s1_neon()
325 vst2q_f32(outptr2, _out2); in deconv3x3s2_neon()
334 outptr2 += 8; in deconv3x3s2_neon()
350 outptr2[0] += val * k2[0]; in deconv3x3s2_neon()
351 outptr2[1] += val * k2[1]; in deconv3x3s2_neon()
352 outptr2[2] += val * k2[2]; in deconv3x3s2_neon()
[all …]
H A Dflatten_arm.cpp121 outptr2 += 4; in forward()
129 *outptr2++ = ptr[2]; in forward()
164 outptr2 += 4; in forward()
172 *outptr2++ = ptr[2]; in forward()
294 outptr2 += 4; in forward_bf16s_fp16s()
305 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
342 outptr2 += 4; in forward_bf16s_fp16s()
350 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
397 outptr2 += 4; in forward_bf16s_fp16s()
408 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
[all …]
H A Ddeconvolution_4x4_fp16s.h60 __fp16* outptr2 = outptr1 + outw; in deconv4x4s2_fp16sa_neon() local
61 __fp16* outptr3 = outptr2 + outw; in deconv4x4s2_fp16sa_neon()
102 vst2_f16(outptr2, _out2); in deconv4x4s2_fp16sa_neon()
104 _out2 = vld2_f16(outptr2 + 2); in deconv4x4s2_fp16sa_neon()
107 vst2_f16(outptr2 + 2, _out2); in deconv4x4s2_fp16sa_neon()
123 outptr2 += 8; in deconv4x4s2_fp16sa_neon()
140 outptr2[0] += val * k2[0]; in deconv4x4s2_fp16sa_neon()
141 outptr2[1] += val * k2[1]; in deconv4x4s2_fp16sa_neon()
142 outptr2[2] += val * k2[2]; in deconv4x4s2_fp16sa_neon()
143 outptr2[3] += val * k2[3]; in deconv4x4s2_fp16sa_neon()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Ddeconvolution_4x4.h62 float* outptr2 = outptr1 + outw; in deconv4x4s1_neon() local
63 float* outptr3 = outptr2 + outw; in deconv4x4s1_neon()
143 outptr2 += 4; in deconv4x4s1_neon()
163 outptr2[0] += val * k2[0]; in deconv4x4s1_neon()
164 outptr2[1] += val * k2[1]; in deconv4x4s1_neon()
165 outptr2[2] += val * k2[2]; in deconv4x4s1_neon()
166 outptr2[3] += val * k2[3]; in deconv4x4s1_neon()
176 outptr2++; in deconv4x4s1_neon()
274 vst2q_f32(outptr2, _out2); in deconv4x4s2_neon()
295 outptr2 += 8; in deconv4x4s2_neon()
[all …]
H A Ddeconvolution_3x3.h206 outptr2 += 4; in deconv3x3s1_neon()
222 outptr2[0] += val * k2[0]; in deconv3x3s1_neon()
223 outptr2[1] += val * k2[1]; in deconv3x3s1_neon()
224 outptr2[2] += val * k2[2]; in deconv3x3s1_neon()
229 outptr2++; in deconv3x3s1_neon()
325 vst2q_f32(outptr2, _out2); in deconv3x3s2_neon()
334 outptr2 += 8; in deconv3x3s2_neon()
350 outptr2[0] += val * k2[0]; in deconv3x3s2_neon()
351 outptr2[1] += val * k2[1]; in deconv3x3s2_neon()
352 outptr2[2] += val * k2[2]; in deconv3x3s2_neon()
[all …]
H A Dflatten_arm.cpp121 outptr2 += 4; in forward()
129 *outptr2++ = ptr[2]; in forward()
164 outptr2 += 4; in forward()
172 *outptr2++ = ptr[2]; in forward()
294 outptr2 += 4; in forward_bf16s_fp16s()
305 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
342 outptr2 += 4; in forward_bf16s_fp16s()
350 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
397 outptr2 += 4; in forward_bf16s_fp16s()
408 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
[all …]
H A Ddeconvolution_4x4_fp16s.h60 __fp16* outptr2 = outptr1 + outw; in deconv4x4s2_fp16sa_neon() local
61 __fp16* outptr3 = outptr2 + outw; in deconv4x4s2_fp16sa_neon()
102 vst2_f16(outptr2, _out2); in deconv4x4s2_fp16sa_neon()
104 _out2 = vld2_f16(outptr2 + 2); in deconv4x4s2_fp16sa_neon()
107 vst2_f16(outptr2 + 2, _out2); in deconv4x4s2_fp16sa_neon()
123 outptr2 += 8; in deconv4x4s2_fp16sa_neon()
140 outptr2[0] += val * k2[0]; in deconv4x4s2_fp16sa_neon()
141 outptr2[1] += val * k2[1]; in deconv4x4s2_fp16sa_neon()
142 outptr2[2] += val * k2[2]; in deconv4x4s2_fp16sa_neon()
143 outptr2[3] += val * k2[3]; in deconv4x4s2_fp16sa_neon()
[all …]
H A Dconvolution_1x1_bf16s.h501 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
667 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
801 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
988 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1140 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1265 outptr2 += 8; in conv1x1s1_sgemm_bf16s_neon()
1383 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1493 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1570 outptr2 += 4; in conv1x1s1_sgemm_bf16s_neon()
1670 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Ddeconvolution_4x4.h62 float* outptr2 = outptr1 + outw; in deconv4x4s1_neon() local
63 float* outptr3 = outptr2 + outw; in deconv4x4s1_neon()
143 outptr2 += 4; in deconv4x4s1_neon()
163 outptr2[0] += val * k2[0]; in deconv4x4s1_neon()
164 outptr2[1] += val * k2[1]; in deconv4x4s1_neon()
165 outptr2[2] += val * k2[2]; in deconv4x4s1_neon()
166 outptr2[3] += val * k2[3]; in deconv4x4s1_neon()
176 outptr2++; in deconv4x4s1_neon()
274 vst2q_f32(outptr2, _out2); in deconv4x4s2_neon()
295 outptr2 += 8; in deconv4x4s2_neon()
[all …]
H A Ddeconvolution_3x3.h206 outptr2 += 4; in deconv3x3s1_neon()
222 outptr2[0] += val * k2[0]; in deconv3x3s1_neon()
223 outptr2[1] += val * k2[1]; in deconv3x3s1_neon()
224 outptr2[2] += val * k2[2]; in deconv3x3s1_neon()
229 outptr2++; in deconv3x3s1_neon()
325 vst2q_f32(outptr2, _out2); in deconv3x3s2_neon()
334 outptr2 += 8; in deconv3x3s2_neon()
350 outptr2[0] += val * k2[0]; in deconv3x3s2_neon()
351 outptr2[1] += val * k2[1]; in deconv3x3s2_neon()
352 outptr2[2] += val * k2[2]; in deconv3x3s2_neon()
[all …]
H A Dflatten_arm.cpp121 outptr2 += 4; in forward()
129 *outptr2++ = ptr[2]; in forward()
164 outptr2 += 4; in forward()
172 *outptr2++ = ptr[2]; in forward()
294 outptr2 += 4; in forward_bf16s_fp16s()
305 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
342 outptr2 += 4; in forward_bf16s_fp16s()
350 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
397 outptr2 += 4; in forward_bf16s_fp16s()
408 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
[all …]
H A Ddeconvolution_4x4_fp16s.h60 __fp16* outptr2 = outptr1 + outw; in deconv4x4s2_fp16sa_neon() local
61 __fp16* outptr3 = outptr2 + outw; in deconv4x4s2_fp16sa_neon()
102 vst2_f16(outptr2, _out2); in deconv4x4s2_fp16sa_neon()
104 _out2 = vld2_f16(outptr2 + 2); in deconv4x4s2_fp16sa_neon()
107 vst2_f16(outptr2 + 2, _out2); in deconv4x4s2_fp16sa_neon()
123 outptr2 += 8; in deconv4x4s2_fp16sa_neon()
140 outptr2[0] += val * k2[0]; in deconv4x4s2_fp16sa_neon()
141 outptr2[1] += val * k2[1]; in deconv4x4s2_fp16sa_neon()
142 outptr2[2] += val * k2[2]; in deconv4x4s2_fp16sa_neon()
143 outptr2[3] += val * k2[3]; in deconv4x4s2_fp16sa_neon()
[all …]
H A Dconvolution_1x1_bf16s.h501 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
667 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
801 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
988 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1140 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1265 outptr2 += 8; in conv1x1s1_sgemm_bf16s_neon()
1383 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1493 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1570 outptr2 += 4; in conv1x1s1_sgemm_bf16s_neon()
1670 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Ddeconvolution_4x4.h62 float* outptr2 = outptr1 + outw; in deconv4x4s1_neon() local
63 float* outptr3 = outptr2 + outw; in deconv4x4s1_neon()
143 outptr2 += 4; in deconv4x4s1_neon()
163 outptr2[0] += val * k2[0]; in deconv4x4s1_neon()
164 outptr2[1] += val * k2[1]; in deconv4x4s1_neon()
165 outptr2[2] += val * k2[2]; in deconv4x4s1_neon()
166 outptr2[3] += val * k2[3]; in deconv4x4s1_neon()
176 outptr2++; in deconv4x4s1_neon()
274 vst2q_f32(outptr2, _out2); in deconv4x4s2_neon()
295 outptr2 += 8; in deconv4x4s2_neon()
[all …]
H A Ddeconvolution_3x3.h206 outptr2 += 4; in deconv3x3s1_neon()
222 outptr2[0] += val * k2[0]; in deconv3x3s1_neon()
223 outptr2[1] += val * k2[1]; in deconv3x3s1_neon()
224 outptr2[2] += val * k2[2]; in deconv3x3s1_neon()
229 outptr2++; in deconv3x3s1_neon()
325 vst2q_f32(outptr2, _out2); in deconv3x3s2_neon()
334 outptr2 += 8; in deconv3x3s2_neon()
350 outptr2[0] += val * k2[0]; in deconv3x3s2_neon()
351 outptr2[1] += val * k2[1]; in deconv3x3s2_neon()
352 outptr2[2] += val * k2[2]; in deconv3x3s2_neon()
[all …]
H A Dflatten_arm.cpp126 outptr2 += 4; in forward()
134 *outptr2++ = ptr[2]; in forward()
169 outptr2 += 4; in forward()
177 *outptr2++ = ptr[2]; in forward()
300 outptr2 += 4; in forward_bf16s_fp16s()
311 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
348 outptr2 += 4; in forward_bf16s_fp16s()
356 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
403 outptr2 += 4; in forward_bf16s_fp16s()
414 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
[all …]
H A Ddeconvolution_4x4_fp16s.h60 __fp16* outptr2 = outptr1 + outw; in deconv4x4s2_fp16sa_neon() local
61 __fp16* outptr3 = outptr2 + outw; in deconv4x4s2_fp16sa_neon()
102 vst2_f16(outptr2, _out2); in deconv4x4s2_fp16sa_neon()
104 _out2 = vld2_f16(outptr2 + 2); in deconv4x4s2_fp16sa_neon()
107 vst2_f16(outptr2 + 2, _out2); in deconv4x4s2_fp16sa_neon()
123 outptr2 += 8; in deconv4x4s2_fp16sa_neon()
140 outptr2[0] += val * k2[0]; in deconv4x4s2_fp16sa_neon()
141 outptr2[1] += val * k2[1]; in deconv4x4s2_fp16sa_neon()
142 outptr2[2] += val * k2[2]; in deconv4x4s2_fp16sa_neon()
143 outptr2[3] += val * k2[3]; in deconv4x4s2_fp16sa_neon()
[all …]
H A Dconvolution_1x1_bf16s.h501 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
667 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
801 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
988 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1140 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1265 outptr2 += 8; in conv1x1s1_sgemm_bf16s_neon()
1383 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1493 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
1570 outptr2 += 4; in conv1x1s1_sgemm_bf16s_neon()
1670 "2"(outptr2), in conv1x1s1_sgemm_bf16s_neon()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Ddeconvolution_4x4.h62 float* outptr2 = outptr1 + outw; in deconv4x4s1_neon() local
63 float* outptr3 = outptr2 + outw; in deconv4x4s1_neon()
143 outptr2 += 4; in deconv4x4s1_neon()
163 outptr2[0] += val * k2[0]; in deconv4x4s1_neon()
164 outptr2[1] += val * k2[1]; in deconv4x4s1_neon()
165 outptr2[2] += val * k2[2]; in deconv4x4s1_neon()
166 outptr2[3] += val * k2[3]; in deconv4x4s1_neon()
176 outptr2++; in deconv4x4s1_neon()
274 vst2q_f32(outptr2, _out2); in deconv4x4s2_neon()
295 outptr2 += 8; in deconv4x4s2_neon()
[all …]
H A Ddeconvolution_3x3.h206 outptr2 += 4; in deconv3x3s1_neon()
222 outptr2[0] += val * k2[0]; in deconv3x3s1_neon()
223 outptr2[1] += val * k2[1]; in deconv3x3s1_neon()
224 outptr2[2] += val * k2[2]; in deconv3x3s1_neon()
229 outptr2++; in deconv3x3s1_neon()
325 vst2q_f32(outptr2, _out2); in deconv3x3s2_neon()
334 outptr2 += 8; in deconv3x3s2_neon()
350 outptr2[0] += val * k2[0]; in deconv3x3s2_neon()
351 outptr2[1] += val * k2[1]; in deconv3x3s2_neon()
352 outptr2[2] += val * k2[2]; in deconv3x3s2_neon()
[all …]
H A Dflatten_arm.cpp118 outptr2 += 4; in forward()
126 *outptr2++ = ptr[2]; in forward()
161 outptr2 += 4; in forward()
169 *outptr2++ = ptr[2]; in forward()
291 outptr2 += 4; in forward_bf16s_fp16s()
302 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
339 outptr2 += 4; in forward_bf16s_fp16s()
347 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
394 outptr2 += 4; in forward_bf16s_fp16s()
405 *outptr2++ = ptr[2]; in forward_bf16s_fp16s()
[all …]
H A Ddeconvolution_4x4_fp16s.h60 __fp16* outptr2 = outptr1 + outw; in deconv4x4s2_fp16sa_neon() local
61 __fp16* outptr3 = outptr2 + outw; in deconv4x4s2_fp16sa_neon()
102 vst2_f16(outptr2, _out2); in deconv4x4s2_fp16sa_neon()
104 _out2 = vld2_f16(outptr2 + 2); in deconv4x4s2_fp16sa_neon()
107 vst2_f16(outptr2 + 2, _out2); in deconv4x4s2_fp16sa_neon()
123 outptr2 += 8; in deconv4x4s2_fp16sa_neon()
140 outptr2[0] += val * k2[0]; in deconv4x4s2_fp16sa_neon()
141 outptr2[1] += val * k2[1]; in deconv4x4s2_fp16sa_neon()
142 outptr2[2] += val * k2[2]; in deconv4x4s2_fp16sa_neon()
143 outptr2[3] += val * k2[3]; in deconv4x4s2_fp16sa_neon()
[all …]
H A Dconvolution_1x1.h456 "2"(outptr2), in conv1x1s1_sgemm_neon()
593 "2"(outptr2), in conv1x1s1_sgemm_neon()
1119 outptr2 += 8; in conv1x1s1_sgemm_neon()
1390 outptr2 += 4; in conv1x1s1_sgemm_neon()
1590 outptr2++; in conv1x1s1_sgemm_neon()
2555 outptr2++; in conv1x1s1_neon()
2676 outptr2++; in conv1x1s1_neon()
2914 outptr2++; in conv1x1s1_neon()
3069 outptr2++; in conv1x1s1_neon()
3400 outptr2++; in conv1x1s1_neon()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/
H A Dflatten_x86.cpp121 _mm256_storeu_ps(outptr2, _row2); in forward()
130 outptr2 += 8; in forward()
142 *outptr2++ = ptr[2]; in forward()
178 _mm_storeu_ps(outptr2, _row2); in forward()
184 outptr2 += 4; in forward()
191 *outptr2++ = ptr[2]; in forward()
245 outptr2 += 8; in forward()
257 *outptr2++ = ptr[2]; in forward()
293 _mm_storeu_ps(outptr2, _row2); in forward()
299 outptr2 += 4; in forward()
[all …]

12345678910>>...18