Home
last modified time | relevance | path

Searched refs:tailstep (Results 1 – 25 of 340) sorted by relevance

12345678910>>...14

/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_7x7_pack1to4_bf16s.h2776 r0 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2777 r1 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2778 r2 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2779 r3 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2780 r4 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2781 r5 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2782 r6 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5565 r0 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5566 r1 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5567 r2 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
[all …]
H A Dconvolution_7x7_pack1to8_fp16s.h24 const int tailstep = w - 2 * outw + w; in conv7x7s2_pack1to8_fp16sa_neon() local
1145 r0 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1146 r1 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1147 r2 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1148 r3 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1149 r4 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1150 r5 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1151 r6 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
H A Dconvolution_7x7_pack1to4.h24 const int tailstep = w - 2 * outw + w; in conv7x7s2_pack1to4_neon() local
2099 r0 += tailstep; in conv7x7s2_pack1to4_neon()
2100 r1 += tailstep; in conv7x7s2_pack1to4_neon()
2101 r2 += tailstep; in conv7x7s2_pack1to4_neon()
2102 r3 += tailstep; in conv7x7s2_pack1to4_neon()
2103 r4 += tailstep; in conv7x7s2_pack1to4_neon()
2104 r5 += tailstep; in conv7x7s2_pack1to4_neon()
2105 r6 += tailstep; in conv7x7s2_pack1to4_neon()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_7x7_pack1to4_bf16s.h2776 r0 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2777 r1 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2778 r2 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2779 r3 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2780 r4 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2781 r5 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2782 r6 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5565 r0 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5566 r1 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5567 r2 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
[all …]
H A Dconvolution_7x7_pack1to8_fp16s.h24 const int tailstep = w - 2 * outw + w; in conv7x7s2_pack1to8_fp16sa_neon() local
1145 r0 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1146 r1 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1147 r2 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1148 r3 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1149 r4 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1150 r5 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1151 r6 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
H A Dconvolution_7x7_pack1to4.h24 const int tailstep = w - 2 * outw + w; in conv7x7s2_pack1to4_neon() local
2099 r0 += tailstep; in conv7x7s2_pack1to4_neon()
2100 r1 += tailstep; in conv7x7s2_pack1to4_neon()
2101 r2 += tailstep; in conv7x7s2_pack1to4_neon()
2102 r3 += tailstep; in conv7x7s2_pack1to4_neon()
2103 r4 += tailstep; in conv7x7s2_pack1to4_neon()
2104 r5 += tailstep; in conv7x7s2_pack1to4_neon()
2105 r6 += tailstep; in conv7x7s2_pack1to4_neon()
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_7x7_pack1to4_bf16s.h2776 r0 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2777 r1 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2778 r2 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2779 r3 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2780 r4 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2781 r5 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2782 r6 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5565 r0 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5566 r1 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5567 r2 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
[all …]
H A Dconvolution_7x7_pack1to8_fp16s.h24 const int tailstep = w - 2 * outw + w; in conv7x7s2_pack1to8_fp16sa_neon() local
1145 r0 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1146 r1 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1147 r2 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1148 r3 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1149 r4 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1150 r5 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1151 r6 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
H A Dconvolution_7x7_pack1to4.h24 const int tailstep = w - 2 * outw + w; in conv7x7s2_pack1to4_neon() local
2099 r0 += tailstep; in conv7x7s2_pack1to4_neon()
2100 r1 += tailstep; in conv7x7s2_pack1to4_neon()
2101 r2 += tailstep; in conv7x7s2_pack1to4_neon()
2102 r3 += tailstep; in conv7x7s2_pack1to4_neon()
2103 r4 += tailstep; in conv7x7s2_pack1to4_neon()
2104 r5 += tailstep; in conv7x7s2_pack1to4_neon()
2105 r6 += tailstep; in conv7x7s2_pack1to4_neon()
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_7x7_pack1to4_bf16s.h2776 r0 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2777 r1 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2778 r2 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2779 r3 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2780 r4 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2781 r5 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2782 r6 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5565 r0 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5566 r1 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5567 r2 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
[all …]
H A Dconvolution_7x7_pack1to8_fp16s.h24 const int tailstep = w - 2 * outw + w; in conv7x7s2_pack1to8_fp16sa_neon() local
1145 r0 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1146 r1 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1147 r2 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1148 r3 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1149 r4 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1150 r5 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1151 r6 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
H A Dconvolution_7x7_pack1to4.h24 const int tailstep = w - 2 * outw + w; in conv7x7s2_pack1to4_neon() local
2099 r0 += tailstep; in conv7x7s2_pack1to4_neon()
2100 r1 += tailstep; in conv7x7s2_pack1to4_neon()
2101 r2 += tailstep; in conv7x7s2_pack1to4_neon()
2102 r3 += tailstep; in conv7x7s2_pack1to4_neon()
2103 r4 += tailstep; in conv7x7s2_pack1to4_neon()
2104 r5 += tailstep; in conv7x7s2_pack1to4_neon()
2105 r6 += tailstep; in conv7x7s2_pack1to4_neon()
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_7x7_pack1to4_bf16s.h2776 r0 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2777 r1 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2778 r2 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2779 r3 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2780 r4 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2781 r5 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
2782 r6 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5565 r0 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5566 r1 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
5567 r2 += tailstep; in conv7x7s2_pack1to4_bf16s_neon()
[all …]
H A Dconvolution_7x7_pack1to8_fp16s.h24 const int tailstep = w - 2 * outw + w; in conv7x7s2_pack1to8_fp16sa_neon() local
1145 r0 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1146 r1 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1147 r2 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1148 r3 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1149 r4 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1150 r5 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
1151 r6 += tailstep; in conv7x7s2_pack1to8_fp16sa_neon()
H A Dconvolution_7x7_pack1to4.h24 const int tailstep = w - 2 * outw + w; in conv7x7s2_pack1to4_neon() local
2099 r0 += tailstep; in conv7x7s2_pack1to4_neon()
2100 r1 += tailstep; in conv7x7s2_pack1to4_neon()
2101 r2 += tailstep; in conv7x7s2_pack1to4_neon()
2102 r3 += tailstep; in conv7x7s2_pack1to4_neon()
2103 r4 += tailstep; in conv7x7s2_pack1to4_neon()
2104 r5 += tailstep; in conv7x7s2_pack1to4_neon()
2105 r6 += tailstep; in conv7x7s2_pack1to4_neon()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/
H A Dconvolution_1x1_int8.h105 const int tailstep = w - 2 * outw + w; in conv1x1s2_int8_sse() local
154 r0 += tailstep; in conv1x1s2_int8_sse()
155 r1 += tailstep; in conv1x1s2_int8_sse()
156 r2 += tailstep; in conv1x1s2_int8_sse()
157 r3 += tailstep; in conv1x1s2_int8_sse()
158 r4 += tailstep; in conv1x1s2_int8_sse()
159 r5 += tailstep; in conv1x1s2_int8_sse()
160 r6 += tailstep; in conv1x1s2_int8_sse()
161 r7 += tailstep; in conv1x1s2_int8_sse()
188 r0 += tailstep; in conv1x1s2_int8_sse()
H A Dconvolutiondepthwise_3x3_int8.h88 const int tailstep = w - 2 * outw + w; in convdw3x3s2_int8_sse() local
136 r0 += tailstep; in convdw3x3s2_int8_sse()
137 r1 += tailstep; in convdw3x3s2_int8_sse()
138 r2 += tailstep; in convdw3x3s2_int8_sse()
218 const int tailstep = w - 2 * outw + w; in convdw3x3s2_int8_dequant_sse() local
269 r0 += tailstep; in convdw3x3s2_int8_dequant_sse()
270 r1 += tailstep; in convdw3x3s2_int8_dequant_sse()
271 r2 += tailstep; in convdw3x3s2_int8_dequant_sse()
400 r0 += tailstep; in convdw3x3s2_int8_requant_sse()
401 r1 += tailstep; in convdw3x3s2_int8_requant_sse()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/
H A Dconvolution_1x1_int8.h105 const int tailstep = w - 2 * outw + w; in conv1x1s2_int8_sse() local
154 r0 += tailstep; in conv1x1s2_int8_sse()
155 r1 += tailstep; in conv1x1s2_int8_sse()
156 r2 += tailstep; in conv1x1s2_int8_sse()
157 r3 += tailstep; in conv1x1s2_int8_sse()
158 r4 += tailstep; in conv1x1s2_int8_sse()
159 r5 += tailstep; in conv1x1s2_int8_sse()
160 r6 += tailstep; in conv1x1s2_int8_sse()
161 r7 += tailstep; in conv1x1s2_int8_sse()
188 r0 += tailstep; in conv1x1s2_int8_sse()
H A Dconvolutiondepthwise_3x3_int8.h88 const int tailstep = w - 2 * outw + w; in convdw3x3s2_int8_sse() local
136 r0 += tailstep; in convdw3x3s2_int8_sse()
137 r1 += tailstep; in convdw3x3s2_int8_sse()
138 r2 += tailstep; in convdw3x3s2_int8_sse()
218 const int tailstep = w - 2 * outw + w; in convdw3x3s2_int8_dequant_sse() local
269 r0 += tailstep; in convdw3x3s2_int8_dequant_sse()
270 r1 += tailstep; in convdw3x3s2_int8_dequant_sse()
271 r2 += tailstep; in convdw3x3s2_int8_dequant_sse()
400 r0 += tailstep; in convdw3x3s2_int8_requant_sse()
401 r1 += tailstep; in convdw3x3s2_int8_requant_sse()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/
H A Dconvolution_1x1_int8.h105 const int tailstep = w - 2 * outw + w; in conv1x1s2_int8_sse() local
154 r0 += tailstep; in conv1x1s2_int8_sse()
155 r1 += tailstep; in conv1x1s2_int8_sse()
156 r2 += tailstep; in conv1x1s2_int8_sse()
157 r3 += tailstep; in conv1x1s2_int8_sse()
158 r4 += tailstep; in conv1x1s2_int8_sse()
159 r5 += tailstep; in conv1x1s2_int8_sse()
160 r6 += tailstep; in conv1x1s2_int8_sse()
161 r7 += tailstep; in conv1x1s2_int8_sse()
188 r0 += tailstep; in conv1x1s2_int8_sse()
H A Dconvolutiondepthwise_3x3_int8.h88 const int tailstep = w - 2 * outw + w; in convdw3x3s2_int8_sse() local
136 r0 += tailstep; in convdw3x3s2_int8_sse()
137 r1 += tailstep; in convdw3x3s2_int8_sse()
138 r2 += tailstep; in convdw3x3s2_int8_sse()
218 const int tailstep = w - 2 * outw + w; in convdw3x3s2_int8_dequant_sse() local
269 r0 += tailstep; in convdw3x3s2_int8_dequant_sse()
270 r1 += tailstep; in convdw3x3s2_int8_dequant_sse()
271 r2 += tailstep; in convdw3x3s2_int8_dequant_sse()
400 r0 += tailstep; in convdw3x3s2_int8_requant_sse()
401 r1 += tailstep; in convdw3x3s2_int8_requant_sse()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/
H A Dconvolution_1x1_int8.h105 const int tailstep = w - 2 * outw + w; in conv1x1s2_int8_sse() local
154 r0 += tailstep; in conv1x1s2_int8_sse()
155 r1 += tailstep; in conv1x1s2_int8_sse()
156 r2 += tailstep; in conv1x1s2_int8_sse()
157 r3 += tailstep; in conv1x1s2_int8_sse()
158 r4 += tailstep; in conv1x1s2_int8_sse()
159 r5 += tailstep; in conv1x1s2_int8_sse()
160 r6 += tailstep; in conv1x1s2_int8_sse()
161 r7 += tailstep; in conv1x1s2_int8_sse()
188 r0 += tailstep; in conv1x1s2_int8_sse()
H A Dconvolutiondepthwise_3x3_int8.h88 const int tailstep = w - 2 * outw + w; in convdw3x3s2_int8_sse() local
136 r0 += tailstep; in convdw3x3s2_int8_sse()
137 r1 += tailstep; in convdw3x3s2_int8_sse()
138 r2 += tailstep; in convdw3x3s2_int8_sse()
218 const int tailstep = w - 2 * outw + w; in convdw3x3s2_int8_dequant_sse() local
269 r0 += tailstep; in convdw3x3s2_int8_dequant_sse()
270 r1 += tailstep; in convdw3x3s2_int8_dequant_sse()
271 r2 += tailstep; in convdw3x3s2_int8_dequant_sse()
400 r0 += tailstep; in convdw3x3s2_int8_requant_sse()
401 r1 += tailstep; in convdw3x3s2_int8_requant_sse()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/
H A Dconvolution_1x1_int8.h105 const int tailstep = w - 2 * outw + w; in conv1x1s2_int8_sse() local
154 r0 += tailstep; in conv1x1s2_int8_sse()
155 r1 += tailstep; in conv1x1s2_int8_sse()
156 r2 += tailstep; in conv1x1s2_int8_sse()
157 r3 += tailstep; in conv1x1s2_int8_sse()
158 r4 += tailstep; in conv1x1s2_int8_sse()
159 r5 += tailstep; in conv1x1s2_int8_sse()
160 r6 += tailstep; in conv1x1s2_int8_sse()
161 r7 += tailstep; in conv1x1s2_int8_sse()
188 r0 += tailstep; in conv1x1s2_int8_sse()
H A Dconvolutiondepthwise_3x3_int8.h96 const int tailstep = w - 2 * outw + w; in convdw3x3s2_int8_sse() local
144 r0 += tailstep; in convdw3x3s2_int8_sse()
145 r1 += tailstep; in convdw3x3s2_int8_sse()
146 r2 += tailstep; in convdw3x3s2_int8_sse()
226 const int tailstep = w - 2 * outw + w; in convdw3x3s2_int8_dequant_sse() local
277 r0 += tailstep; in convdw3x3s2_int8_dequant_sse()
278 r1 += tailstep; in convdw3x3s2_int8_dequant_sse()
279 r2 += tailstep; in convdw3x3s2_int8_dequant_sse()
408 r0 += tailstep; in convdw3x3s2_int8_requant_sse()
409 r1 += tailstep; in convdw3x3s2_int8_requant_sse()
[all …]

12345678910>>...14