1 /*****************************************************************************
2 * Copyright (C) 2013-2020 MulticoreWare, Inc
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 * Min Chen <chenm003@163.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 *
21 * This program is also available under a commercial proprietary license.
22 * For more information, contact us at license @ x265.com.
23 *****************************************************************************/
24
25
26 #include "common.h"
27 #include "yuv.h"
28 #include "shortyuv.h"
29 #include "picyuv.h"
30 #include "primitives.h"
31
32 using namespace X265_NS;
33
Yuv()34 Yuv::Yuv()
35 {
36 m_buf[0] = NULL;
37 m_buf[1] = NULL;
38 m_buf[2] = NULL;
39 }
40
create(uint32_t size,int csp)41 bool Yuv::create(uint32_t size, int csp)
42 {
43 m_csp = csp;
44 m_hChromaShift = CHROMA_H_SHIFT(csp);
45 m_vChromaShift = CHROMA_V_SHIFT(csp);
46
47 m_size = size;
48 m_part = partitionFromSizes(size, size);
49
50 for (int i = 0; i < 2; i++)
51 for (int j = 0; j < MAX_NUM_REF; j++)
52 for (int k = 0; k < INTEGRAL_PLANE_NUM; k++)
53 m_integral[i][j][k] = NULL;
54
55 if (csp == X265_CSP_I400)
56 {
57 CHECKED_MALLOC(m_buf[0], pixel, size * size + 8);
58 m_buf[1] = m_buf[2] = 0;
59 m_csize = 0;
60 return true;
61 }
62 else
63 {
64 m_csize = size >> m_hChromaShift;
65
66 size_t sizeL = size * size;
67 size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift);
68
69 X265_CHECK((sizeC & 15) == 0, "invalid size");
70
71 // memory allocation (padded for SIMD reads)
72 CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8);
73 m_buf[1] = m_buf[0] + sizeL;
74 m_buf[2] = m_buf[0] + sizeL + sizeC;
75 return true;
76 }
77
78 fail:
79 return false;
80 }
81
destroy()82 void Yuv::destroy()
83 {
84 X265_FREE(m_buf[0]);
85 }
86
copyToPicYuv(PicYuv & dstPic,uint32_t cuAddr,uint32_t absPartIdx) const87 void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const
88 {
89 pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
90 primitives.cu[m_part].copy_pp(dstY, dstPic.m_stride, m_buf[0], m_size);
91 if (m_csp != X265_CSP_I400)
92 {
93 pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
94 pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
95 primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize);
96 primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize);
97 }
98 }
99
copyFromPicYuv(const PicYuv & srcPic,uint32_t cuAddr,uint32_t absPartIdx)100 void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
101 {
102 const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
103 primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcY, srcPic.m_stride);
104 if (m_csp != X265_CSP_I400)
105 {
106 const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
107 const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
108 primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC);
109 primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC);
110 }
111 }
112
copyFromYuv(const Yuv & srcYuv)113 void Yuv::copyFromYuv(const Yuv& srcYuv)
114 {
115 X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
116
117 primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
118 if (m_csp != X265_CSP_I400)
119 {
120 primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
121 primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
122 }
123 }
124
125 /* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
copyPUFromYuv(const Yuv & srcYuv,uint32_t absPartIdx,int partEnum,bool bChroma)126 void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma)
127 {
128 X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n");
129
130 const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size);
131 primitives.pu[partEnum].copy_pp(m_buf[0], m_size, srcY, srcYuv.m_size);
132
133 if (bChroma)
134 {
135 const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx);
136 const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx);
137 primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[1], m_csize, srcU, srcYuv.m_csize);
138 primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[2], m_csize, srcV, srcYuv.m_csize);
139 }
140 }
141
copyToPartYuv(Yuv & dstYuv,uint32_t absPartIdx) const142 void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const
143 {
144 pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
145 primitives.cu[m_part].copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size);
146 if (m_csp != X265_CSP_I400)
147 {
148 pixel* dstU = dstYuv.getCbAddr(absPartIdx);
149 pixel* dstV = dstYuv.getCrAddr(absPartIdx);
150 primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize);
151 primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize);
152 }
153 }
154
copyPartToYuv(Yuv & dstYuv,uint32_t absPartIdx) const155 void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
156 {
157 pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size);
158 pixel* dstY = dstYuv.m_buf[0];
159 primitives.cu[dstYuv.m_part].copy_pp(dstY, dstYuv.m_size, srcY, m_size);
160 if (m_csp != X265_CSP_I400)
161 {
162 pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
163 pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
164 pixel* dstU = dstYuv.m_buf[1];
165 pixel* dstV = dstYuv.m_buf[2];
166 primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
167 primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
168 }
169 }
170
addClip(const Yuv & srcYuv0,const ShortYuv & srcYuv1,uint32_t log2SizeL,int picCsp)171 void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL, int picCsp)
172 {
173 primitives.cu[log2SizeL - 2].add_ps[(m_size % 64 == 0) && (srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0)](m_buf[0],
174 m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
175 if (m_csp != X265_CSP_I400 && picCsp != X265_CSP_I400)
176 {
177 primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 ==0) && (srcYuv1.m_csize % 64 == 0)](m_buf[1],
178 m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
179 primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps[(m_csize % 64 == 0) && (srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0)](m_buf[2],
180 m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
181 }
182 if (picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)
183 {
184 primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv0.m_csize);
185 primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv0.m_csize);
186 }
187 }
188
addAvg(const ShortYuv & srcYuv0,const ShortYuv & srcYuv1,uint32_t absPartIdx,uint32_t width,uint32_t height,bool bLuma,bool bChroma)189 void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
190 {
191 int part = partitionFromSizes(width, height);
192
193 if (bLuma)
194 {
195 const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
196 const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
197 pixel* dstY = getLumaAddr(absPartIdx);
198 primitives.pu[part].addAvg[(srcYuv0.m_size % 64 == 0) && (srcYuv1.m_size % 64 == 0) && (m_size % 64 == 0)](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
199 }
200 if (bChroma)
201 {
202 const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx);
203 const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx);
204 const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx);
205 const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
206 pixel* dstU = getCbAddr(absPartIdx);
207 pixel* dstV = getCrAddr(absPartIdx);
208 primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
209 primitives.chroma[m_csp].pu[part].addAvg[(srcYuv0.m_csize % 64 == 0) && (srcYuv1.m_csize % 64 == 0) && (m_csize % 64 == 0)](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
210 }
211 }
212
copyPartToPartLuma(Yuv & dstYuv,uint32_t absPartIdx,uint32_t log2Size) const213 void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
214 {
215 const pixel* src = getLumaAddr(absPartIdx);
216 pixel* dst = dstYuv.getLumaAddr(absPartIdx);
217 primitives.cu[log2Size - 2].copy_pp(dst, dstYuv.m_size, src, m_size);
218 }
219
copyPartToPartChroma(Yuv & dstYuv,uint32_t absPartIdx,uint32_t log2SizeL) const220 void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
221 {
222 const pixel* srcU = getCbAddr(absPartIdx);
223 const pixel* srcV = getCrAddr(absPartIdx);
224 pixel* dstU = dstYuv.getCbAddr(absPartIdx);
225 pixel* dstV = dstYuv.getCrAddr(absPartIdx);
226 primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
227 primitives.chroma[m_csp].cu[log2SizeL - 2].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
228 }
229