1 /*****************************************************************************
2  * Copyright (C) 2013-2020 MulticoreWare, Inc
3  *
4  * Authors: Steve Borho <steve@borho.org>
5  *          Min Chen <chenm003@163.com>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20  *
21  * This program is also available under a commercial proprietary license.
22  * For more information, contact us at license @ x265.com.
23  *****************************************************************************/
24 
25 
26 #include "common.h"
27 #include "yuv.h"
28 #include "shortyuv.h"
29 #include "picyuv.h"
30 #include "primitives.h"
31 
32 using namespace X265_NS;
33 
Yuv()34 Yuv::Yuv()
35 {
36     m_buf[0] = NULL;
37     m_buf[1] = NULL;
38     m_buf[2] = NULL;
39 }
40 
create(uint32_t size,int csp)41 bool Yuv::create(uint32_t size, int csp)
42 {
43     m_csp = csp;
44     m_hChromaShift = CHROMA_H_SHIFT(csp);
45     m_vChromaShift = CHROMA_V_SHIFT(csp);
46 
47     m_size  = size;
48     m_part = partitionFromSizes(size, size);
49 
50     for (int i = 0; i < 2; i++)
51         for (int j = 0; j < MAX_NUM_REF; j++)
52             for (int k = 0; k < INTEGRAL_PLANE_NUM; k++)
53                 m_integral[i][j][k] = NULL;
54 
55     if (csp == X265_CSP_I400)
56     {
57         CHECKED_MALLOC(m_buf[0], pixel, size * size + 8);
58         m_buf[1] = m_buf[2] = 0;
59         m_csize = 0;
60         return true;
61     }
62     else
63     {
64         m_csize = size >> m_hChromaShift;
65 
66         size_t sizeL = size * size;
67         size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift);
68 
69         X265_CHECK((sizeC & 15) == 0, "invalid size");
70 
71         // memory allocation (padded for SIMD reads)
72         CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8);
73         m_buf[1] = m_buf[0] + sizeL;
74         m_buf[2] = m_buf[0] + sizeL + sizeC;
75         return true;
76     }
77 
78 fail:
79     return false;
80 }
81 
destroy()82 void Yuv::destroy()
83 {
84     X265_FREE(m_buf[0]);
85 }
86 
copyToPicYuv(PicYuv & dstPic,uint32_t cuAddr,uint32_t absPartIdx) const87 void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const
88 {
89     pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
90     primitives.cu[m_part].copy_pp(dstY, dstPic.m_stride, m_buf[0], m_size);
91     if (m_csp != X265_CSP_I400)
92     {
93         pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
94         pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
95         primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize);
96         primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize);
97     }
98 }
99 
copyFromPicYuv(const PicYuv & srcPic,uint32_t cuAddr,uint32_t absPartIdx)100 void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
101 {
102     const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
103     primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcY, srcPic.m_stride);
104     if (m_csp != X265_CSP_I400)
105     {
106         const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
107         const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
108         primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC);
109         primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC);
110     }
111 }
112 
copyFromYuv(const Yuv & srcYuv)113 void Yuv::copyFromYuv(const Yuv& srcYuv)
114 {
115     X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
116 
117     primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
118     if (m_csp != X265_CSP_I400)
119     {
120         primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
121         primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
122     }
123 }
124 
125 /* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
copyPUFromYuv(const Yuv & srcYuv,uint32_t absPartIdx,int partEnum,bool bChroma)126 void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma)
127 {
128     X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n");
129 
130     const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size);
131     primitives.pu[partEnum].copy_pp(m_buf[0], m_size, srcY, srcYuv.m_size);
132 
133     if (bChroma)
134     {
135         const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx);
136         const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx);
137         primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[1], m_csize, srcU, srcYuv.m_csize);
138         primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[2], m_csize, srcV, srcYuv.m_csize);
139     }
140 }
141 
copyToPartYuv(Yuv & dstYuv,uint32_t absPartIdx) const142 void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const
143 {
144     pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
145     primitives.cu[m_part].copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size);
146     if (m_csp != X265_CSP_I400)
147     {
148         pixel* dstU = dstYuv.getCbAddr(absPartIdx);
149         pixel* dstV = dstYuv.getCrAddr(absPartIdx);
150         primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize);
151         primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize);
152     }
153 }
154 
copyPartToYuv(Yuv & dstYuv,uint32_t absPartIdx) const155 void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
156 {
157     pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size);
158     pixel* dstY = dstYuv.m_buf[0];
159     primitives.cu[dstYuv.m_part].copy_pp(dstY, dstYuv.m_size, srcY, m_size);
160     if (m_csp != X265_CSP_I400)
161     {
162         pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
163         pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
164         pixel* dstU = dstYuv.m_buf[1];
165         pixel* dstV = dstYuv.m_buf[2];
166         primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
167         primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
168     }
169 }
170 
addClip(const Yuv & srcYuv0,const ShortYuv & srcYuv1,uint32_t log2SizeL,int picCsp)171 void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL, int picCsp)
172 {
173     primitives.cu[log2SizeL - 2].add_ps[(m_size % 64 == 0) && (srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0)](m_buf[0],
174                                          m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
175     if (m_csp != X265_CSP_I400 && picCsp != X265_CSP_I400)
176     {
177         primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 ==0) && (srcYuv1.m_csize % 64 == 0)](m_buf[1],
178                                                            m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
179         primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0)](m_buf[2],
180                                                            m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
181     }
182     if (picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)
183     {
184         primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv0.m_csize);
185         primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv0.m_csize);
186     }
187 }
188 
addAvg(const ShortYuv & srcYuv0,const ShortYuv & srcYuv1,uint32_t absPartIdx,uint32_t width,uint32_t height,bool bLuma,bool bChroma)189 void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
190 {
191     int part = partitionFromSizes(width, height);
192 
193     if (bLuma)
194     {
195         const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
196         const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
197         pixel* dstY = getLumaAddr(absPartIdx);
198         primitives.pu[part].addAvg[(srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0) && (m_size % 64 == 0)](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
199     }
200     if (bChroma)
201     {
202         const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx);
203         const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx);
204         const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx);
205         const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
206         pixel* dstU = getCbAddr(absPartIdx);
207         pixel* dstV = getCrAddr(absPartIdx);
208         primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
209         primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
210     }
211 }
212 
copyPartToPartLuma(Yuv & dstYuv,uint32_t absPartIdx,uint32_t log2Size) const213 void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
214 {
215     const pixel* src = getLumaAddr(absPartIdx);
216     pixel* dst = dstYuv.getLumaAddr(absPartIdx);
217     primitives.cu[log2Size - 2].copy_pp(dst, dstYuv.m_size, src, m_size);
218 }
219 
copyPartToPartChroma(Yuv & dstYuv,uint32_t absPartIdx,uint32_t log2SizeL) const220 void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
221 {
222     const pixel* srcU = getCbAddr(absPartIdx);
223     const pixel* srcV = getCrAddr(absPartIdx);
224     pixel* dstU = dstYuv.getCbAddr(absPartIdx);
225     pixel* dstV = dstYuv.getCrAddr(absPartIdx);
226     primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
227     primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
228 }
229