1 /*
2 * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <arm_neon.h>
12
13 #include "./vpx_dsp_rtcd.h"
14 #include "vpx/vpx_integer.h"
15
vpx_highbd_convolve_copy_neon(const uint16_t * src,ptrdiff_t src_stride,uint16_t * dst,ptrdiff_t dst_stride,const InterpKernel * filter,int x0_q4,int x_step_q4,int y0_q4,int y_step_q4,int w,int h,int bd)16 void vpx_highbd_convolve_copy_neon(const uint16_t *src, ptrdiff_t src_stride,
17 uint16_t *dst, ptrdiff_t dst_stride,
18 const InterpKernel *filter, int x0_q4,
19 int x_step_q4, int y0_q4, int y_step_q4,
20 int w, int h, int bd) {
21 (void)filter;
22 (void)x0_q4;
23 (void)x_step_q4;
24 (void)y0_q4;
25 (void)y_step_q4;
26 (void)bd;
27
28 if (w < 8) { // copy4
29 do {
30 vst1_u16(dst, vld1_u16(src));
31 src += src_stride;
32 dst += dst_stride;
33 vst1_u16(dst, vld1_u16(src));
34 src += src_stride;
35 dst += dst_stride;
36 h -= 2;
37 } while (h > 0);
38 } else if (w == 8) { // copy8
39 do {
40 vst1q_u16(dst, vld1q_u16(src));
41 src += src_stride;
42 dst += dst_stride;
43 vst1q_u16(dst, vld1q_u16(src));
44 src += src_stride;
45 dst += dst_stride;
46 h -= 2;
47 } while (h > 0);
48 } else if (w < 32) { // copy16
49 do {
50 vst2q_u16(dst, vld2q_u16(src));
51 src += src_stride;
52 dst += dst_stride;
53 vst2q_u16(dst, vld2q_u16(src));
54 src += src_stride;
55 dst += dst_stride;
56 vst2q_u16(dst, vld2q_u16(src));
57 src += src_stride;
58 dst += dst_stride;
59 vst2q_u16(dst, vld2q_u16(src));
60 src += src_stride;
61 dst += dst_stride;
62 h -= 4;
63 } while (h > 0);
64 } else if (w == 32) { // copy32
65 do {
66 vst4q_u16(dst, vld4q_u16(src));
67 src += src_stride;
68 dst += dst_stride;
69 vst4q_u16(dst, vld4q_u16(src));
70 src += src_stride;
71 dst += dst_stride;
72 vst4q_u16(dst, vld4q_u16(src));
73 src += src_stride;
74 dst += dst_stride;
75 vst4q_u16(dst, vld4q_u16(src));
76 src += src_stride;
77 dst += dst_stride;
78 h -= 4;
79 } while (h > 0);
80 } else { // copy64
81 do {
82 vst4q_u16(dst, vld4q_u16(src));
83 vst4q_u16(dst + 32, vld4q_u16(src + 32));
84 src += src_stride;
85 dst += dst_stride;
86 vst4q_u16(dst, vld4q_u16(src));
87 vst4q_u16(dst + 32, vld4q_u16(src + 32));
88 src += src_stride;
89 dst += dst_stride;
90 vst4q_u16(dst, vld4q_u16(src));
91 vst4q_u16(dst + 32, vld4q_u16(src + 32));
92 src += src_stride;
93 dst += dst_stride;
94 vst4q_u16(dst, vld4q_u16(src));
95 vst4q_u16(dst + 32, vld4q_u16(src + 32));
96 src += src_stride;
97 dst += dst_stride;
98 h -= 4;
99 } while (h > 0);
100 }
101 }
102