1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "config.h"
29 
30 #include <string.h>
31 
32 #include "common/intops.h"
33 
34 #include "src/cdef_apply.h"
35 
36 
37 enum Backup2x8Flags {
38     BACKUP_2X8_Y = 1 << 0,
39     BACKUP_2X8_UV = 1 << 1,
40 };
41 
backup2lines(pixel * const dst[3],pixel * const src[3],const ptrdiff_t stride[2],const enum Dav1dPixelLayout layout)42 static void backup2lines(pixel *const dst[3], /*const*/ pixel *const src[3],
43                          const ptrdiff_t stride[2],
44                          const enum Dav1dPixelLayout layout)
45 {
46     const ptrdiff_t y_stride = PXSTRIDE(stride[0]);
47     if (y_stride < 0)
48         pixel_copy(dst[0] + y_stride, src[0] + 7 * y_stride, -2 * y_stride);
49     else
50         pixel_copy(dst[0], src[0] + 6 * y_stride, 2 * y_stride);
51 
52     if (layout != DAV1D_PIXEL_LAYOUT_I400) {
53         const ptrdiff_t uv_stride = PXSTRIDE(stride[1]);
54         if (uv_stride < 0) {
55             const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 3 : 7;
56             pixel_copy(dst[1] + uv_stride, src[1] + uv_off * uv_stride, -2 * uv_stride);
57             pixel_copy(dst[2] + uv_stride, src[2] + uv_off * uv_stride, -2 * uv_stride);
58         } else {
59             const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 2 : 6;
60             pixel_copy(dst[1], src[1] + uv_off * uv_stride, 2 * uv_stride);
61             pixel_copy(dst[2], src[2] + uv_off * uv_stride, 2 * uv_stride);
62         }
63     }
64 }
65 
backup2x8(pixel dst[3][8][2],pixel * const src[3],const ptrdiff_t src_stride[2],int x_off,const enum Dav1dPixelLayout layout,const enum Backup2x8Flags flag)66 static void backup2x8(pixel dst[3][8][2],
67                       /*const*/ pixel *const src[3],
68                       const ptrdiff_t src_stride[2], int x_off,
69                       const enum Dav1dPixelLayout layout,
70                       const enum Backup2x8Flags flag)
71 {
72     ptrdiff_t y_off = 0;
73     if (flag & BACKUP_2X8_Y) {
74         for (int y = 0; y < 8; y++, y_off += PXSTRIDE(src_stride[0]))
75             pixel_copy(dst[0][y], &src[0][y_off + x_off - 2], 2);
76     }
77 
78     if (layout == DAV1D_PIXEL_LAYOUT_I400 || !(flag & BACKUP_2X8_UV))
79         return;
80 
81     const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
82     const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
83 
84     x_off >>= ss_hor;
85     y_off = 0;
86     for (int y = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(src_stride[1])) {
87         pixel_copy(dst[1][y], &src[1][y_off + x_off - 2], 2);
88         pixel_copy(dst[2][y], &src[2][y_off + x_off - 2], 2);
89     }
90 }
91 
adjust_strength(const int strength,const unsigned var)92 static int adjust_strength(const int strength, const unsigned var) {
93     if (!var) return 0;
94     const int i = var >> 6 ? imin(ulog2(var >> 6), 12) : 0;
95     return (strength * (4 + i) + 8) >> 4;
96 }
97 
bytefn(dav1d_cdef_brow)98 void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
99                              pixel *const p[3],
100                              const Av1Filter *const lflvl,
101                              const int by_start, const int by_end)
102 {
103     const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
104     const Dav1dDSPContext *const dsp = f->dsp;
105     enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0);
106     pixel *ptrs[3] = { p[0], p[1], p[2] };
107     const int sbsz = 16;
108     const int sb64w = f->sb128w << 1;
109     const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
110     const enum Dav1dPixelLayout layout = f->cur.p.layout;
111     const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
112     const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
113     const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
114 
115     for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) {
116         const int tf = f->lf.top_pre_cdef_toggle;
117         const int by_idx = by & 30;
118         if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM;
119 
120         if (edges & CDEF_HAVE_BOTTOM) // backup pre-filter data for next iteration
121             backup2lines(f->lf.cdef_line[!tf], ptrs, f->cur.stride, layout);
122 
123         ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]);
124         pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
125         edges &= ~CDEF_HAVE_LEFT;
126         edges |= CDEF_HAVE_RIGHT;
127         enum Backup2x8Flags prev_flag = 0;
128         for (int sbx = 0, last_skip = 1; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) {
129             const int sb128x = sbx >> 1;
130             const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
131             const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
132             if (cdef_idx == -1 ||
133                 (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
134                  !f->frame_hdr->cdef.uv_strength[cdef_idx]))
135             {
136                 last_skip = 1;
137                 goto next_sb;
138             }
139 
140             const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
141             const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
142             const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1);
143 
144             const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
145             int y_sec_lvl = y_lvl & 3;
146             y_sec_lvl += y_sec_lvl == 3;
147             y_sec_lvl <<= bitdepth_min_8;
148 
149             const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
150             int uv_sec_lvl = uv_lvl & 3;
151             uv_sec_lvl += uv_sec_lvl == 3;
152             uv_sec_lvl <<= bitdepth_min_8;
153 
154             pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
155             for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
156                  bx += 2, edges |= CDEF_HAVE_LEFT)
157             {
158                 if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT;
159 
160                 // check if this 8x8 block had any coded coefficients; if not,
161                 // go to the next block
162                 const unsigned bx_mask = 3U << (bx & 14);
163                 const int bx_idx = (bx & 16) >> 4;
164                 if (!((lflvl[sb128x].noskip_mask[by_idx + 0][bx_idx] |
165                        lflvl[sb128x].noskip_mask[by_idx + 1][bx_idx]) & bx_mask))
166                 {
167                     last_skip = 1;
168                     goto next_b;
169                 }
170                 const int do_left = last_skip ? flag : (prev_flag ^ flag) & flag;
171                 prev_flag = flag;
172                 if (do_left && edges & CDEF_HAVE_LEFT) {
173                     // we didn't backup the prefilter data because it wasn't
174                     // there, so do it here instead
175                     backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left);
176                 }
177                 if (edges & CDEF_HAVE_RIGHT) {
178                     // backup pre-filter data for next iteration
179                     backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag);
180                 }
181 
182                 int dir;
183                 unsigned variance;
184                 if (y_pri_lvl || uv_pri_lvl)
185                     dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
186                                         &variance HIGHBD_CALL_SUFFIX);
187 
188                 if (y_pri_lvl) {
189                     const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance);
190                     if (adj_y_pri_lvl || y_sec_lvl)
191                         dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
192                                         &f->lf.cdef_line[tf][0][bx * 4],
193                                         adj_y_pri_lvl, y_sec_lvl, dir,
194                                         damping, edges HIGHBD_CALL_SUFFIX);
195                 } else if (y_sec_lvl)
196                     dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
197                                     &f->lf.cdef_line[tf][0][bx * 4],
198                                     0, y_sec_lvl, 0,
199                                     damping, edges HIGHBD_CALL_SUFFIX);
200                 if (uv_lvl) {
201                     assert(layout != DAV1D_PIXEL_LAYOUT_I400);
202                     const int uvdir = uv_pri_lvl ? layout == DAV1D_PIXEL_LAYOUT_I422 ?
203                         ((const uint8_t[]) { 7, 0, 2, 4, 5, 6, 6, 6 })[dir] : dir : 0;
204                     for (int pl = 1; pl <= 2; pl++) {
205                         dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1], lr_bak[bit][pl],
206                                              &f->lf.cdef_line[tf][pl][bx * 4 >> ss_hor],
207                                              uv_pri_lvl, uv_sec_lvl, uvdir,
208                                              damping - 1, edges HIGHBD_CALL_SUFFIX);
209                     }
210                 }
211 
212                 bit ^= 1;
213                 last_skip = 0;
214 
215             next_b:
216                 bptrs[0] += 8;
217                 bptrs[1] += 8 >> ss_hor;
218                 bptrs[2] += 8 >> ss_hor;
219             }
220 
221         next_sb:
222             iptrs[0] += sbsz * 4;
223             iptrs[1] += sbsz * 4 >> ss_hor;
224             iptrs[2] += sbsz * 4 >> ss_hor;
225         }
226 
227         ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
228         ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
229         ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
230         f->lf.top_pre_cdef_toggle ^= 1;
231     }
232 }
233