/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | requantize_relu_pack8.h | 123 _bias0 = vmulq_f32(_bias0, _scale_out0); in requantize_relu_pack8_neon() 139 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() 141 _v2 = vfmaq_f32(_bias0, _v2, _scale0); in requantize_relu_pack8_neon() 143 _v4 = vfmaq_f32(_bias0, _v4, _scale0); in requantize_relu_pack8_neon() 145 _v6 = vfmaq_f32(_bias0, _v6, _scale0); in requantize_relu_pack8_neon() 165 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() 167 _v2 = vfmaq_f32(_bias0, _v2, _scale0); in requantize_relu_pack8_neon() 170 _v0 = vmlaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() 172 _v2 = vmlaq_f32(_bias0, _v2, _scale0); in requantize_relu_pack8_neon() 187 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack4.h | 215 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 315 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 395 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 509 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 610 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 688 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 761 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 950 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() 1085 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() 1166 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack4_bf16s.h | 287 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 433 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 538 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 690 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 823 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 919 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 1010 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 1016 float32x4_t _sum0 = _bias0; in convdw3x3s1_pack4_bf16s_neon() 1252 "w"(_bias0) // %17 in convdw3x3s2_pack4_bf16s_neon() 1360 "w"(_bias0) // %17 in convdw3x3s2_pack4_bf16s_neon() [all …]
|
H A D | requantize_leakyrelu_pack8.h | 124 _bias0 = vmulq_f32(_bias0, _scale_out0); in requantize_leakyrelu_pack8_neon() 141 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_leakyrelu_pack8_neon() 143 _v2 = vfmaq_f32(_bias0, _v2, _scale0); in requantize_leakyrelu_pack8_neon() 145 _v4 = vfmaq_f32(_bias0, _v4, _scale0); in requantize_leakyrelu_pack8_neon() 147 _v6 = vfmaq_f32(_bias0, _v6, _scale0); in requantize_leakyrelu_pack8_neon() 167 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_leakyrelu_pack8_neon() 169 _v2 = vfmaq_f32(_bias0, _v2, _scale0); in requantize_leakyrelu_pack8_neon() 172 _v0 = vmlaq_f32(_bias0, _v0, _scale0); in requantize_leakyrelu_pack8_neon() 174 _v2 = vmlaq_f32(_bias0, _v2, _scale0); in requantize_leakyrelu_pack8_neon() 189 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_leakyrelu_pack8_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16s.h | 31 float16x8_t _bias0 = bias ? vld1q_f16(bias + g * 8) : vdupq_n_f16((__fp16)0.f); in convdw3x3s1_pack8_fp16sa_neon() local 211 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 306 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 385 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 490 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 557 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 614 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 643 float16x8_t _bias0 = bias ? vld1q_f16(bias + g * 8) : vdupq_n_f16((__fp16)0.f); in convdw3x3s2_pack8_fp16sa_neon() local 768 "w"(_bias0) // %17 in convdw3x3s2_pack8_fp16sa_neon() 843 "w"(_bias0) // %17 in convdw3x3s2_pack8_fp16sa_neon() [all …]
|
H A D | requantize_relu_pack4.h | 115 _bias0 = vmulq_f32(_bias0, _scale_out0); in requantize_relu_pack4_neon() 130 _v00 = vfmaq_f32(_bias0, _v00, _scale0); in requantize_relu_pack4_neon() 131 _v01 = vfmaq_f32(_bias0, _v01, _scale0); in requantize_relu_pack4_neon() 132 _v02 = vfmaq_f32(_bias0, _v02, _scale0); in requantize_relu_pack4_neon() 133 _v03 = vfmaq_f32(_bias0, _v03, _scale0); in requantize_relu_pack4_neon() 155 _v00 = vfmaq_f32(_bias0, _v00, _scale0); in requantize_relu_pack4_neon() 156 _v01 = vfmaq_f32(_bias0, _v01, _scale0); in requantize_relu_pack4_neon() 160 _v00 = vmlaq_f32(_bias0, _v00, _scale0); in requantize_relu_pack4_neon() 161 _v01 = vmlaq_f32(_bias0, _v01, _scale0); in requantize_relu_pack4_neon() 177 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack4_neon() [all …]
|
H A D | requantize_leakyrelu_pack4.h | 116 _bias0 = vmulq_f32(_bias0, _scale_out0); in requantize_leakyrelu_pack4_neon() 132 _v00 = vfmaq_f32(_bias0, _v00, _scale0); in requantize_leakyrelu_pack4_neon() 133 _v01 = vfmaq_f32(_bias0, _v01, _scale0); in requantize_leakyrelu_pack4_neon() 134 _v02 = vfmaq_f32(_bias0, _v02, _scale0); in requantize_leakyrelu_pack4_neon() 135 _v03 = vfmaq_f32(_bias0, _v03, _scale0); in requantize_leakyrelu_pack4_neon() 157 _v00 = vfmaq_f32(_bias0, _v00, _scale0); in requantize_leakyrelu_pack4_neon() 158 _v01 = vfmaq_f32(_bias0, _v01, _scale0); in requantize_leakyrelu_pack4_neon() 162 _v00 = vmlaq_f32(_bias0, _v00, _scale0); in requantize_leakyrelu_pack4_neon() 163 _v01 = vmlaq_f32(_bias0, _v01, _scale0); in requantize_leakyrelu_pack4_neon() 179 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_leakyrelu_pack4_neon() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_3x3_pack4.h | 215 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 315 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 395 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 509 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 610 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 688 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 761 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 950 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() 1085 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() 1166 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack4_bf16s.h | 287 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 433 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 538 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 690 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 823 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 919 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 1010 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 1016 float32x4_t _sum0 = _bias0; in convdw3x3s1_pack4_bf16s_neon() 1252 "w"(_bias0) // %17 in convdw3x3s2_pack4_bf16s_neon() 1360 "w"(_bias0) // %17 in convdw3x3s2_pack4_bf16s_neon() [all …]
|
H A D | requantize_relu_pack8.h | 118 …float32x4_t _bias0 = bias_data_size == 1 ? vdupq_n_f32(bias_data[0]) : vld1q_f32((const float*)bia… in requantize_relu_pack8_neon() local 123 _bias0 = vmulq_f32(_bias0, _scale_out0); in requantize_relu_pack8_neon() 139 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() 141 _v2 = vfmaq_f32(_bias0, _v2, _scale0); in requantize_relu_pack8_neon() 143 _v4 = vfmaq_f32(_bias0, _v4, _scale0); in requantize_relu_pack8_neon() 145 _v6 = vfmaq_f32(_bias0, _v6, _scale0); in requantize_relu_pack8_neon() 165 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() 167 _v2 = vfmaq_f32(_bias0, _v2, _scale0); in requantize_relu_pack8_neon() 231 "w"(_bias0), // %6 in requantize_relu_pack8_neon() 241 _v0 = vmlaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16s.h | 31 float16x8_t _bias0 = bias ? vld1q_f16(bias + g * 8) : vdupq_n_f16((__fp16)0.f); in convdw3x3s1_pack8_fp16sa_neon() local 211 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 306 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 385 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 490 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 557 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 614 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 643 float16x8_t _bias0 = bias ? vld1q_f16(bias + g * 8) : vdupq_n_f16((__fp16)0.f); in convdw3x3s2_pack8_fp16sa_neon() local 768 "w"(_bias0) // %17 in convdw3x3s2_pack8_fp16sa_neon() 843 "w"(_bias0) // %17 in convdw3x3s2_pack8_fp16sa_neon() [all …]
|
H A D | requantize_relu_pack4.h | 110 …float32x4_t _bias0 = bias_data_size == 1 ? vdupq_n_f32(bias_data[0]) : vld1q_f32((const float*)bia… in requantize_relu_pack4_neon() local 115 _bias0 = vmulq_f32(_bias0, _scale_out0); in requantize_relu_pack4_neon() 130 _v00 = vfmaq_f32(_bias0, _v00, _scale0); in requantize_relu_pack4_neon() 131 _v01 = vfmaq_f32(_bias0, _v01, _scale0); in requantize_relu_pack4_neon() 132 _v02 = vfmaq_f32(_bias0, _v02, _scale0); in requantize_relu_pack4_neon() 133 _v03 = vfmaq_f32(_bias0, _v03, _scale0); in requantize_relu_pack4_neon() 155 _v00 = vfmaq_f32(_bias0, _v00, _scale0); in requantize_relu_pack4_neon() 156 _v01 = vfmaq_f32(_bias0, _v01, _scale0); in requantize_relu_pack4_neon() 225 "w"(_bias0), // %8 in requantize_relu_pack4_neon() 235 _v0 = vmlaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack4_neon() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_3x3_pack4.h | 215 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 315 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 395 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 509 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 610 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 688 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 761 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 950 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() 1085 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() 1166 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack4_bf16s.h | 287 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 433 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 538 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 690 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 823 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 919 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 1010 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 1016 float32x4_t _sum0 = _bias0; in convdw3x3s1_pack4_bf16s_neon() 1252 "w"(_bias0) // %17 in convdw3x3s2_pack4_bf16s_neon() 1360 "w"(_bias0) // %17 in convdw3x3s2_pack4_bf16s_neon() [all …]
|
H A D | requantize_relu_pack8.h | 118 …float32x4_t _bias0 = bias_data_size == 1 ? vdupq_n_f32(bias_data[0]) : vld1q_f32((const float*)bia… in requantize_relu_pack8_neon() local 123 _bias0 = vmulq_f32(_bias0, _scale_out0); in requantize_relu_pack8_neon() 139 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() 141 _v2 = vfmaq_f32(_bias0, _v2, _scale0); in requantize_relu_pack8_neon() 143 _v4 = vfmaq_f32(_bias0, _v4, _scale0); in requantize_relu_pack8_neon() 145 _v6 = vfmaq_f32(_bias0, _v6, _scale0); in requantize_relu_pack8_neon() 165 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() 167 _v2 = vfmaq_f32(_bias0, _v2, _scale0); in requantize_relu_pack8_neon() 231 "w"(_bias0), // %6 in requantize_relu_pack8_neon() 241 _v0 = vmlaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16s.h | 31 float16x8_t _bias0 = bias ? vld1q_f16(bias + g * 8) : vdupq_n_f16((__fp16)0.f); in convdw3x3s1_pack8_fp16sa_neon() local 211 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 306 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 385 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 490 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 557 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 614 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 643 float16x8_t _bias0 = bias ? vld1q_f16(bias + g * 8) : vdupq_n_f16((__fp16)0.f); in convdw3x3s2_pack8_fp16sa_neon() local 768 "w"(_bias0) // %17 in convdw3x3s2_pack8_fp16sa_neon() 843 "w"(_bias0) // %17 in convdw3x3s2_pack8_fp16sa_neon() [all …]
|
H A D | requantize_relu_pack4.h | 110 …float32x4_t _bias0 = bias_data_size == 1 ? vdupq_n_f32(bias_data[0]) : vld1q_f32((const float*)bia… in requantize_relu_pack4_neon() local 115 _bias0 = vmulq_f32(_bias0, _scale_out0); in requantize_relu_pack4_neon() 130 _v00 = vfmaq_f32(_bias0, _v00, _scale0); in requantize_relu_pack4_neon() 131 _v01 = vfmaq_f32(_bias0, _v01, _scale0); in requantize_relu_pack4_neon() 132 _v02 = vfmaq_f32(_bias0, _v02, _scale0); in requantize_relu_pack4_neon() 133 _v03 = vfmaq_f32(_bias0, _v03, _scale0); in requantize_relu_pack4_neon() 155 _v00 = vfmaq_f32(_bias0, _v00, _scale0); in requantize_relu_pack4_neon() 156 _v01 = vfmaq_f32(_bias0, _v01, _scale0); in requantize_relu_pack4_neon() 225 "w"(_bias0), // %8 in requantize_relu_pack4_neon() 235 _v0 = vmlaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack4_neon() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_3x3_pack4.h | 215 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 315 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 395 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 509 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 610 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 688 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 761 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 950 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() 1085 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() 1166 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack4_bf16s.h | 287 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 433 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 538 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 690 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 823 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 919 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 1010 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 1016 float32x4_t _sum0 = _bias0; in convdw3x3s1_pack4_bf16s_neon() 1252 "w"(_bias0) // %17 in convdw3x3s2_pack4_bf16s_neon() 1360 "w"(_bias0) // %17 in convdw3x3s2_pack4_bf16s_neon() [all …]
|
H A D | requantize_relu_pack8.h | 118 …float32x4_t _bias0 = bias_data_size == 1 ? vdupq_n_f32(bias_data[0]) : vld1q_f32((const float*)bia… in requantize_relu_pack8_neon() local 123 _bias0 = vmulq_f32(_bias0, _scale_out0); in requantize_relu_pack8_neon() 139 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() 141 _v2 = vfmaq_f32(_bias0, _v2, _scale0); in requantize_relu_pack8_neon() 143 _v4 = vfmaq_f32(_bias0, _v4, _scale0); in requantize_relu_pack8_neon() 145 _v6 = vfmaq_f32(_bias0, _v6, _scale0); in requantize_relu_pack8_neon() 165 _v0 = vfmaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() 167 _v2 = vfmaq_f32(_bias0, _v2, _scale0); in requantize_relu_pack8_neon() 231 "w"(_bias0), // %6 in requantize_relu_pack8_neon() 241 _v0 = vmlaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack8_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16s.h | 31 float16x8_t _bias0 = bias ? vld1q_f16(bias + g * 8) : vdupq_n_f16((__fp16)0.f); in convdw3x3s1_pack8_fp16sa_neon() local 211 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 306 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 385 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 490 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 557 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 614 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 643 float16x8_t _bias0 = bias ? vld1q_f16(bias + g * 8) : vdupq_n_f16((__fp16)0.f); in convdw3x3s2_pack8_fp16sa_neon() local 768 "w"(_bias0) // %17 in convdw3x3s2_pack8_fp16sa_neon() 843 "w"(_bias0) // %17 in convdw3x3s2_pack8_fp16sa_neon() [all …]
|
H A D | requantize_relu_pack4.h | 110 …float32x4_t _bias0 = bias_data_size == 1 ? vdupq_n_f32(bias_data[0]) : vld1q_f32((const float*)bia… in requantize_relu_pack4_neon() local 115 _bias0 = vmulq_f32(_bias0, _scale_out0); in requantize_relu_pack4_neon() 130 _v00 = vfmaq_f32(_bias0, _v00, _scale0); in requantize_relu_pack4_neon() 131 _v01 = vfmaq_f32(_bias0, _v01, _scale0); in requantize_relu_pack4_neon() 132 _v02 = vfmaq_f32(_bias0, _v02, _scale0); in requantize_relu_pack4_neon() 133 _v03 = vfmaq_f32(_bias0, _v03, _scale0); in requantize_relu_pack4_neon() 155 _v00 = vfmaq_f32(_bias0, _v00, _scale0); in requantize_relu_pack4_neon() 156 _v01 = vfmaq_f32(_bias0, _v01, _scale0); in requantize_relu_pack4_neon() 225 "w"(_bias0), // %8 in requantize_relu_pack4_neon() 235 _v0 = vmlaq_f32(_bias0, _v0, _scale0); in requantize_relu_pack4_neon() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_3x3_pack4.h | 215 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 315 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 395 "w"(_bias0) // %21 in convdw3x3s1_pack4_neon() 509 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 610 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 688 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 761 "w"(_bias0) // %17 in convdw3x3s1_pack4_neon() 950 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() 1085 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() 1166 "w"(_bias0) // %17 in convdw3x3s2_pack4_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack4_bf16s.h | 287 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 433 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 538 "w"(_bias0) // %21 in convdw3x3s1_pack4_bf16s_neon() 690 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 823 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 919 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 1010 "w"(_bias0) // %17 in convdw3x3s1_pack4_bf16s_neon() 1016 float32x4_t _sum0 = _bias0; in convdw3x3s1_pack4_bf16s_neon() 1252 "w"(_bias0) // %17 in convdw3x3s2_pack4_bf16s_neon() 1360 "w"(_bias0) // %17 in convdw3x3s2_pack4_bf16s_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16s.h | 31 float16x8_t _bias0 = bias ? vld1q_f16(bias + g * 8) : vdupq_n_f16((__fp16)0.f); in convdw3x3s1_pack8_fp16sa_neon() local 211 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 306 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 385 "w"(_bias0) // %21 in convdw3x3s1_pack8_fp16sa_neon() 490 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 557 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 614 "w"(_bias0) // %17 in convdw3x3s1_pack8_fp16sa_neon() 643 float16x8_t _bias0 = bias ? vld1q_f16(bias + g * 8) : vdupq_n_f16((__fp16)0.f); in convdw3x3s2_pack8_fp16sa_neon() local 768 "w"(_bias0) // %17 in convdw3x3s2_pack8_fp16sa_neon() 843 "w"(_bias0) // %17 in convdw3x3s2_pack8_fp16sa_neon() [all …]
|