1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5
6 #include "lib/jxl/dec_xyb.h"
7
8 #include <string.h>
9
10 #undef HWY_TARGET_INCLUDE
11 #define HWY_TARGET_INCLUDE "lib/jxl/dec_xyb.cc"
12 #include <hwy/foreach_target.h>
13 #include <hwy/highway.h>
14
15 #include "lib/jxl/base/compiler_specific.h"
16 #include "lib/jxl/base/profiler.h"
17 #include "lib/jxl/base/status.h"
18 #include "lib/jxl/dec_group_border.h"
19 #include "lib/jxl/dec_xyb-inl.h"
20 #include "lib/jxl/fields.h"
21 #include "lib/jxl/image.h"
22 #include "lib/jxl/opsin_params.h"
23 #include "lib/jxl/quantizer.h"
24 #include "lib/jxl/sanitizers.h"
25 HWY_BEFORE_NAMESPACE();
26 namespace jxl {
27 namespace HWY_NAMESPACE {
28
29 // These templates are not found via ADL.
30 using hwy::HWY_NAMESPACE::Broadcast;
31
OpsinToLinearInplace(Image3F * JXL_RESTRICT inout,ThreadPool * pool,const OpsinParams & opsin_params)32 void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
33 const OpsinParams& opsin_params) {
34 PROFILER_FUNC;
35 JXL_CHECK_IMAGE_INITIALIZED(*inout, Rect(*inout));
36
37 const size_t xsize = inout->xsize(); // not padded
38 JXL_CHECK(RunOnPool(
39 pool, 0, inout->ysize(), ThreadPool::NoInit,
40 [&](const uint32_t task, size_t /* thread */) {
41 const size_t y = task;
42
43 // Faster than adding via ByteOffset at end of loop.
44 float* JXL_RESTRICT row0 = inout->PlaneRow(0, y);
45 float* JXL_RESTRICT row1 = inout->PlaneRow(1, y);
46 float* JXL_RESTRICT row2 = inout->PlaneRow(2, y);
47
48 const HWY_FULL(float) d;
49
50 for (size_t x = 0; x < xsize; x += Lanes(d)) {
51 const auto in_opsin_x = Load(d, row0 + x);
52 const auto in_opsin_y = Load(d, row1 + x);
53 const auto in_opsin_b = Load(d, row2 + x);
54 auto linear_r = Undefined(d);
55 auto linear_g = Undefined(d);
56 auto linear_b = Undefined(d);
57 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
58 &linear_r, &linear_g, &linear_b);
59
60 Store(linear_r, d, row0 + x);
61 Store(linear_g, d, row1 + x);
62 Store(linear_b, d, row2 + x);
63 }
64 },
65 "OpsinToLinear"));
66 }
67
68 // Same, but not in-place.
OpsinToLinear(const Image3F & opsin,const Rect & rect,ThreadPool * pool,Image3F * JXL_RESTRICT linear,const OpsinParams & opsin_params)69 void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
70 Image3F* JXL_RESTRICT linear,
71 const OpsinParams& opsin_params) {
72 PROFILER_FUNC;
73
74 JXL_ASSERT(SameSize(rect, *linear));
75 JXL_CHECK_IMAGE_INITIALIZED(opsin, rect);
76
77 JXL_CHECK(RunOnPool(
78 pool, 0, static_cast<int>(rect.ysize()), ThreadPool::NoInit,
79 [&](const uint32_t task, size_t /*thread*/) {
80 const size_t y = static_cast<size_t>(task);
81
82 // Faster than adding via ByteOffset at end of loop.
83 const float* JXL_RESTRICT row_opsin_0 = rect.ConstPlaneRow(opsin, 0, y);
84 const float* JXL_RESTRICT row_opsin_1 = rect.ConstPlaneRow(opsin, 1, y);
85 const float* JXL_RESTRICT row_opsin_2 = rect.ConstPlaneRow(opsin, 2, y);
86 float* JXL_RESTRICT row_linear_0 = linear->PlaneRow(0, y);
87 float* JXL_RESTRICT row_linear_1 = linear->PlaneRow(1, y);
88 float* JXL_RESTRICT row_linear_2 = linear->PlaneRow(2, y);
89
90 const HWY_FULL(float) d;
91
92 for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
93 const auto in_opsin_x = Load(d, row_opsin_0 + x);
94 const auto in_opsin_y = Load(d, row_opsin_1 + x);
95 const auto in_opsin_b = Load(d, row_opsin_2 + x);
96 auto linear_r = Undefined(d);
97 auto linear_g = Undefined(d);
98 auto linear_b = Undefined(d);
99 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
100 &linear_r, &linear_g, &linear_b);
101
102 Store(linear_r, d, row_linear_0 + x);
103 Store(linear_g, d, row_linear_1 + x);
104 Store(linear_b, d, row_linear_2 + x);
105 }
106 },
107 "OpsinToLinear(Rect)"));
108 JXL_CHECK_IMAGE_INITIALIZED(*linear, rect);
109 }
110
111 // Transform YCbCr to RGB.
112 // Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
YcbcrToRgb(const Image3F & ycbcr,Image3F * rgb,const Rect & rect)113 void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
114 JXL_CHECK_IMAGE_INITIALIZED(ycbcr, rect);
115 const HWY_CAPPED(float, GroupBorderAssigner::kPaddingXRound) df;
116 const size_t S = Lanes(df); // Step.
117
118 const size_t xsize = rect.xsize();
119 const size_t ysize = rect.ysize();
120 if ((xsize == 0) || (ysize == 0)) return;
121
122 // Full-range BT.601 as defined by JFIF Clause 7:
123 // https://www.itu.int/rec/T-REC-T.871-201105-I/en
124 const auto c128 = Set(df, 128.0f / 255);
125 const auto crcr = Set(df, 1.402f);
126 const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f);
127 const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f);
128 const auto cbcb = Set(df, 1.772f);
129
130 for (size_t y = 0; y < ysize; y++) {
131 const float* y_row = rect.ConstPlaneRow(ycbcr, 1, y);
132 const float* cb_row = rect.ConstPlaneRow(ycbcr, 0, y);
133 const float* cr_row = rect.ConstPlaneRow(ycbcr, 2, y);
134 float* r_row = rect.PlaneRow(rgb, 0, y);
135 float* g_row = rect.PlaneRow(rgb, 1, y);
136 float* b_row = rect.PlaneRow(rgb, 2, y);
137 for (size_t x = 0; x < xsize; x += S) {
138 const auto y_vec = Load(df, y_row + x) + c128;
139 const auto cb_vec = Load(df, cb_row + x);
140 const auto cr_vec = Load(df, cr_row + x);
141 const auto r_vec = crcr * cr_vec + y_vec;
142 const auto g_vec = cgcr * cr_vec + cgcb * cb_vec + y_vec;
143 const auto b_vec = cbcb * cb_vec + y_vec;
144 Store(r_vec, df, r_row + x);
145 Store(g_vec, df, g_row + x);
146 Store(b_vec, df, b_row + x);
147 }
148 }
149 JXL_CHECK_IMAGE_INITIALIZED(*rgb, rect);
150 }
151
152 // NOLINTNEXTLINE(google-readability-namespace-comments)
153 } // namespace HWY_NAMESPACE
154 } // namespace jxl
155 HWY_AFTER_NAMESPACE();
156
157 #if HWY_ONCE
158 namespace jxl {
159
160 HWY_EXPORT(OpsinToLinearInplace);
OpsinToLinearInplace(Image3F * JXL_RESTRICT inout,ThreadPool * pool,const OpsinParams & opsin_params)161 void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
162 const OpsinParams& opsin_params) {
163 return HWY_DYNAMIC_DISPATCH(OpsinToLinearInplace)(inout, pool, opsin_params);
164 }
165
166 HWY_EXPORT(OpsinToLinear);
OpsinToLinear(const Image3F & opsin,const Rect & rect,ThreadPool * pool,Image3F * JXL_RESTRICT linear,const OpsinParams & opsin_params)167 void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
168 Image3F* JXL_RESTRICT linear,
169 const OpsinParams& opsin_params) {
170 return HWY_DYNAMIC_DISPATCH(OpsinToLinear)(opsin, rect, pool, linear,
171 opsin_params);
172 }
173
174 HWY_EXPORT(YcbcrToRgb);
YcbcrToRgb(const Image3F & ycbcr,Image3F * rgb,const Rect & rect)175 void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
176 return HWY_DYNAMIC_DISPATCH(YcbcrToRgb)(ycbcr, rgb, rect);
177 }
178
179 HWY_EXPORT(HasFastXYBTosRGB8);
HasFastXYBTosRGB8()180 bool HasFastXYBTosRGB8() { return HWY_DYNAMIC_DISPATCH(HasFastXYBTosRGB8)(); }
181
182 HWY_EXPORT(FastXYBTosRGB8);
FastXYBTosRGB8(const float * input[4],uint8_t * output,bool is_rgba,size_t xsize)183 void FastXYBTosRGB8(const float* input[4], uint8_t* output, bool is_rgba,
184 size_t xsize) {
185 return HWY_DYNAMIC_DISPATCH(FastXYBTosRGB8)(input, output, is_rgba, xsize);
186 }
187
Init(float intensity_target)188 void OpsinParams::Init(float intensity_target) {
189 InitSIMDInverseMatrix(GetOpsinAbsorbanceInverseMatrix(), inverse_opsin_matrix,
190 intensity_target);
191 memcpy(opsin_biases, kNegOpsinAbsorbanceBiasRGB,
192 sizeof(kNegOpsinAbsorbanceBiasRGB));
193 memcpy(quant_biases, kDefaultQuantBias, sizeof(kDefaultQuantBias));
194 for (size_t c = 0; c < 4; c++) {
195 opsin_biases_cbrt[c] = cbrtf(opsin_biases[c]);
196 }
197 }
198
Set(const CodecMetadata & metadata,const ColorEncoding & default_enc)199 Status OutputEncodingInfo::Set(const CodecMetadata& metadata,
200 const ColorEncoding& default_enc) {
201 const auto& im = metadata.transform_data.opsin_inverse_matrix;
202 float inverse_matrix[9];
203 memcpy(inverse_matrix, im.inverse_matrix, sizeof(inverse_matrix));
204 intensity_target = metadata.m.IntensityTarget();
205 if (metadata.m.xyb_encoded) {
206 const auto& orig_color_encoding = metadata.m.color_encoding;
207 color_encoding = default_enc;
208 // Figure out if we can output to this color encoding.
209 do {
210 if (!orig_color_encoding.HaveFields()) break;
211 // TODO(veluca): keep in sync with dec_reconstruct.cc
212 if (!orig_color_encoding.tf.IsPQ() && !orig_color_encoding.tf.IsSRGB() &&
213 !orig_color_encoding.tf.IsGamma() &&
214 !orig_color_encoding.tf.IsLinear() &&
215 !orig_color_encoding.tf.IsHLG() && !orig_color_encoding.tf.IsDCI() &&
216 !orig_color_encoding.tf.Is709()) {
217 break;
218 }
219 if (orig_color_encoding.tf.IsGamma()) {
220 inverse_gamma = orig_color_encoding.tf.GetGamma();
221 }
222 if (orig_color_encoding.tf.IsDCI()) {
223 inverse_gamma = 1.0f / 2.6f;
224 }
225 if (orig_color_encoding.IsGray() &&
226 orig_color_encoding.white_point != WhitePoint::kD65) {
227 // TODO(veluca): figure out what should happen here.
228 break;
229 }
230
231 if ((orig_color_encoding.primaries != Primaries::kSRGB ||
232 orig_color_encoding.white_point != WhitePoint::kD65) &&
233 !orig_color_encoding.IsGray()) {
234 all_default_opsin = false;
235 float srgb_to_xyzd50[9];
236 const auto& srgb = ColorEncoding::SRGB(/*is_gray=*/false);
237 JXL_CHECK(PrimariesToXYZD50(
238 srgb.GetPrimaries().r.x, srgb.GetPrimaries().r.y,
239 srgb.GetPrimaries().g.x, srgb.GetPrimaries().g.y,
240 srgb.GetPrimaries().b.x, srgb.GetPrimaries().b.y,
241 srgb.GetWhitePoint().x, srgb.GetWhitePoint().y, srgb_to_xyzd50));
242 float original_to_xyz[3][3];
243 JXL_RETURN_IF_ERROR(PrimariesToXYZ(
244 orig_color_encoding.GetPrimaries().r.x,
245 orig_color_encoding.GetPrimaries().r.y,
246 orig_color_encoding.GetPrimaries().g.x,
247 orig_color_encoding.GetPrimaries().g.y,
248 orig_color_encoding.GetPrimaries().b.x,
249 orig_color_encoding.GetPrimaries().b.y,
250 orig_color_encoding.GetWhitePoint().x,
251 orig_color_encoding.GetWhitePoint().y, &original_to_xyz[0][0]));
252 memcpy(luminances, original_to_xyz[1], sizeof luminances);
253 float adapt_to_d50[9];
254 JXL_RETURN_IF_ERROR(AdaptToXYZD50(orig_color_encoding.GetWhitePoint().x,
255 orig_color_encoding.GetWhitePoint().y,
256 adapt_to_d50));
257 float xyzd50_to_original[9];
258 MatMul(adapt_to_d50, &original_to_xyz[0][0], 3, 3, 3,
259 xyzd50_to_original);
260 JXL_RETURN_IF_ERROR(Inv3x3Matrix(xyzd50_to_original));
261 float srgb_to_original[9];
262 MatMul(xyzd50_to_original, srgb_to_xyzd50, 3, 3, 3, srgb_to_original);
263 MatMul(srgb_to_original, im.inverse_matrix, 3, 3, 3, inverse_matrix);
264 }
265 color_encoding = orig_color_encoding;
266 color_encoding_is_original = true;
267 if (color_encoding.tf.IsPQ()) {
268 intensity_target = 10000;
269 }
270 } while (false);
271 } else {
272 color_encoding = metadata.m.color_encoding;
273 }
274 if (std::abs(intensity_target - 255.0) > 0.1f || !im.all_default) {
275 all_default_opsin = false;
276 }
277 InitSIMDInverseMatrix(inverse_matrix, opsin_params.inverse_opsin_matrix,
278 intensity_target);
279 std::copy(std::begin(im.opsin_biases), std::end(im.opsin_biases),
280 opsin_params.opsin_biases);
281 for (int i = 0; i < 3; ++i) {
282 opsin_params.opsin_biases_cbrt[i] = cbrtf(opsin_params.opsin_biases[i]);
283 }
284 opsin_params.opsin_biases_cbrt[3] = opsin_params.opsin_biases[3] = 1;
285 std::copy(std::begin(im.quant_biases), std::end(im.quant_biases),
286 opsin_params.quant_biases);
287 return true;
288 }
289
290 } // namespace jxl
291 #endif // HWY_ONCE
292