1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5 
6 #include "lib/jxl/dec_xyb.h"
7 
8 #include <string.h>
9 
10 #undef HWY_TARGET_INCLUDE
11 #define HWY_TARGET_INCLUDE "lib/jxl/dec_xyb.cc"
12 #include <hwy/foreach_target.h>
13 #include <hwy/highway.h>
14 
15 #include "lib/jxl/base/compiler_specific.h"
16 #include "lib/jxl/base/profiler.h"
17 #include "lib/jxl/base/status.h"
18 #include "lib/jxl/dec_group_border.h"
19 #include "lib/jxl/dec_xyb-inl.h"
20 #include "lib/jxl/fields.h"
21 #include "lib/jxl/image.h"
22 #include "lib/jxl/opsin_params.h"
23 #include "lib/jxl/quantizer.h"
24 #include "lib/jxl/sanitizers.h"
25 HWY_BEFORE_NAMESPACE();
26 namespace jxl {
27 namespace HWY_NAMESPACE {
28 
29 // These templates are not found via ADL.
30 using hwy::HWY_NAMESPACE::Broadcast;
31 
OpsinToLinearInplace(Image3F * JXL_RESTRICT inout,ThreadPool * pool,const OpsinParams & opsin_params)32 void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
33                           const OpsinParams& opsin_params) {
34   PROFILER_FUNC;
35   JXL_CHECK_IMAGE_INITIALIZED(*inout, Rect(*inout));
36 
37   const size_t xsize = inout->xsize();  // not padded
38   JXL_CHECK(RunOnPool(
39       pool, 0, inout->ysize(), ThreadPool::NoInit,
40       [&](const uint32_t task, size_t /* thread */) {
41         const size_t y = task;
42 
43         // Faster than adding via ByteOffset at end of loop.
44         float* JXL_RESTRICT row0 = inout->PlaneRow(0, y);
45         float* JXL_RESTRICT row1 = inout->PlaneRow(1, y);
46         float* JXL_RESTRICT row2 = inout->PlaneRow(2, y);
47 
48         const HWY_FULL(float) d;
49 
50         for (size_t x = 0; x < xsize; x += Lanes(d)) {
51           const auto in_opsin_x = Load(d, row0 + x);
52           const auto in_opsin_y = Load(d, row1 + x);
53           const auto in_opsin_b = Load(d, row2 + x);
54           auto linear_r = Undefined(d);
55           auto linear_g = Undefined(d);
56           auto linear_b = Undefined(d);
57           XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
58                    &linear_r, &linear_g, &linear_b);
59 
60           Store(linear_r, d, row0 + x);
61           Store(linear_g, d, row1 + x);
62           Store(linear_b, d, row2 + x);
63         }
64       },
65       "OpsinToLinear"));
66 }
67 
68 // Same, but not in-place.
OpsinToLinear(const Image3F & opsin,const Rect & rect,ThreadPool * pool,Image3F * JXL_RESTRICT linear,const OpsinParams & opsin_params)69 void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
70                    Image3F* JXL_RESTRICT linear,
71                    const OpsinParams& opsin_params) {
72   PROFILER_FUNC;
73 
74   JXL_ASSERT(SameSize(rect, *linear));
75   JXL_CHECK_IMAGE_INITIALIZED(opsin, rect);
76 
77   JXL_CHECK(RunOnPool(
78       pool, 0, static_cast<int>(rect.ysize()), ThreadPool::NoInit,
79       [&](const uint32_t task, size_t /*thread*/) {
80         const size_t y = static_cast<size_t>(task);
81 
82         // Faster than adding via ByteOffset at end of loop.
83         const float* JXL_RESTRICT row_opsin_0 = rect.ConstPlaneRow(opsin, 0, y);
84         const float* JXL_RESTRICT row_opsin_1 = rect.ConstPlaneRow(opsin, 1, y);
85         const float* JXL_RESTRICT row_opsin_2 = rect.ConstPlaneRow(opsin, 2, y);
86         float* JXL_RESTRICT row_linear_0 = linear->PlaneRow(0, y);
87         float* JXL_RESTRICT row_linear_1 = linear->PlaneRow(1, y);
88         float* JXL_RESTRICT row_linear_2 = linear->PlaneRow(2, y);
89 
90         const HWY_FULL(float) d;
91 
92         for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
93           const auto in_opsin_x = Load(d, row_opsin_0 + x);
94           const auto in_opsin_y = Load(d, row_opsin_1 + x);
95           const auto in_opsin_b = Load(d, row_opsin_2 + x);
96           auto linear_r = Undefined(d);
97           auto linear_g = Undefined(d);
98           auto linear_b = Undefined(d);
99           XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
100                    &linear_r, &linear_g, &linear_b);
101 
102           Store(linear_r, d, row_linear_0 + x);
103           Store(linear_g, d, row_linear_1 + x);
104           Store(linear_b, d, row_linear_2 + x);
105         }
106       },
107       "OpsinToLinear(Rect)"));
108   JXL_CHECK_IMAGE_INITIALIZED(*linear, rect);
109 }
110 
111 // Transform YCbCr to RGB.
112 // Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
YcbcrToRgb(const Image3F & ycbcr,Image3F * rgb,const Rect & rect)113 void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
114   JXL_CHECK_IMAGE_INITIALIZED(ycbcr, rect);
115   const HWY_CAPPED(float, GroupBorderAssigner::kPaddingXRound) df;
116   const size_t S = Lanes(df);  // Step.
117 
118   const size_t xsize = rect.xsize();
119   const size_t ysize = rect.ysize();
120   if ((xsize == 0) || (ysize == 0)) return;
121 
122   // Full-range BT.601 as defined by JFIF Clause 7:
123   // https://www.itu.int/rec/T-REC-T.871-201105-I/en
124   const auto c128 = Set(df, 128.0f / 255);
125   const auto crcr = Set(df, 1.402f);
126   const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f);
127   const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f);
128   const auto cbcb = Set(df, 1.772f);
129 
130   for (size_t y = 0; y < ysize; y++) {
131     const float* y_row = rect.ConstPlaneRow(ycbcr, 1, y);
132     const float* cb_row = rect.ConstPlaneRow(ycbcr, 0, y);
133     const float* cr_row = rect.ConstPlaneRow(ycbcr, 2, y);
134     float* r_row = rect.PlaneRow(rgb, 0, y);
135     float* g_row = rect.PlaneRow(rgb, 1, y);
136     float* b_row = rect.PlaneRow(rgb, 2, y);
137     for (size_t x = 0; x < xsize; x += S) {
138       const auto y_vec = Load(df, y_row + x) + c128;
139       const auto cb_vec = Load(df, cb_row + x);
140       const auto cr_vec = Load(df, cr_row + x);
141       const auto r_vec = crcr * cr_vec + y_vec;
142       const auto g_vec = cgcr * cr_vec + cgcb * cb_vec + y_vec;
143       const auto b_vec = cbcb * cb_vec + y_vec;
144       Store(r_vec, df, r_row + x);
145       Store(g_vec, df, g_row + x);
146       Store(b_vec, df, b_row + x);
147     }
148   }
149   JXL_CHECK_IMAGE_INITIALIZED(*rgb, rect);
150 }
151 
152 // NOLINTNEXTLINE(google-readability-namespace-comments)
153 }  // namespace HWY_NAMESPACE
154 }  // namespace jxl
155 HWY_AFTER_NAMESPACE();
156 
157 #if HWY_ONCE
158 namespace jxl {
159 
160 HWY_EXPORT(OpsinToLinearInplace);
OpsinToLinearInplace(Image3F * JXL_RESTRICT inout,ThreadPool * pool,const OpsinParams & opsin_params)161 void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
162                           const OpsinParams& opsin_params) {
163   return HWY_DYNAMIC_DISPATCH(OpsinToLinearInplace)(inout, pool, opsin_params);
164 }
165 
166 HWY_EXPORT(OpsinToLinear);
OpsinToLinear(const Image3F & opsin,const Rect & rect,ThreadPool * pool,Image3F * JXL_RESTRICT linear,const OpsinParams & opsin_params)167 void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
168                    Image3F* JXL_RESTRICT linear,
169                    const OpsinParams& opsin_params) {
170   return HWY_DYNAMIC_DISPATCH(OpsinToLinear)(opsin, rect, pool, linear,
171                                              opsin_params);
172 }
173 
174 HWY_EXPORT(YcbcrToRgb);
YcbcrToRgb(const Image3F & ycbcr,Image3F * rgb,const Rect & rect)175 void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
176   return HWY_DYNAMIC_DISPATCH(YcbcrToRgb)(ycbcr, rgb, rect);
177 }
178 
179 HWY_EXPORT(HasFastXYBTosRGB8);
HasFastXYBTosRGB8()180 bool HasFastXYBTosRGB8() { return HWY_DYNAMIC_DISPATCH(HasFastXYBTosRGB8)(); }
181 
182 HWY_EXPORT(FastXYBTosRGB8);
FastXYBTosRGB8(const float * input[4],uint8_t * output,bool is_rgba,size_t xsize)183 void FastXYBTosRGB8(const float* input[4], uint8_t* output, bool is_rgba,
184                     size_t xsize) {
185   return HWY_DYNAMIC_DISPATCH(FastXYBTosRGB8)(input, output, is_rgba, xsize);
186 }
187 
Init(float intensity_target)188 void OpsinParams::Init(float intensity_target) {
189   InitSIMDInverseMatrix(GetOpsinAbsorbanceInverseMatrix(), inverse_opsin_matrix,
190                         intensity_target);
191   memcpy(opsin_biases, kNegOpsinAbsorbanceBiasRGB,
192          sizeof(kNegOpsinAbsorbanceBiasRGB));
193   memcpy(quant_biases, kDefaultQuantBias, sizeof(kDefaultQuantBias));
194   for (size_t c = 0; c < 4; c++) {
195     opsin_biases_cbrt[c] = cbrtf(opsin_biases[c]);
196   }
197 }
198 
Set(const CodecMetadata & metadata,const ColorEncoding & default_enc)199 Status OutputEncodingInfo::Set(const CodecMetadata& metadata,
200                                const ColorEncoding& default_enc) {
201   const auto& im = metadata.transform_data.opsin_inverse_matrix;
202   float inverse_matrix[9];
203   memcpy(inverse_matrix, im.inverse_matrix, sizeof(inverse_matrix));
204   intensity_target = metadata.m.IntensityTarget();
205   if (metadata.m.xyb_encoded) {
206     const auto& orig_color_encoding = metadata.m.color_encoding;
207     color_encoding = default_enc;
208     // Figure out if we can output to this color encoding.
209     do {
210       if (!orig_color_encoding.HaveFields()) break;
211       // TODO(veluca): keep in sync with dec_reconstruct.cc
212       if (!orig_color_encoding.tf.IsPQ() && !orig_color_encoding.tf.IsSRGB() &&
213           !orig_color_encoding.tf.IsGamma() &&
214           !orig_color_encoding.tf.IsLinear() &&
215           !orig_color_encoding.tf.IsHLG() && !orig_color_encoding.tf.IsDCI() &&
216           !orig_color_encoding.tf.Is709()) {
217         break;
218       }
219       if (orig_color_encoding.tf.IsGamma()) {
220         inverse_gamma = orig_color_encoding.tf.GetGamma();
221       }
222       if (orig_color_encoding.tf.IsDCI()) {
223         inverse_gamma = 1.0f / 2.6f;
224       }
225       if (orig_color_encoding.IsGray() &&
226           orig_color_encoding.white_point != WhitePoint::kD65) {
227         // TODO(veluca): figure out what should happen here.
228         break;
229       }
230 
231       if ((orig_color_encoding.primaries != Primaries::kSRGB ||
232            orig_color_encoding.white_point != WhitePoint::kD65) &&
233           !orig_color_encoding.IsGray()) {
234         all_default_opsin = false;
235         float srgb_to_xyzd50[9];
236         const auto& srgb = ColorEncoding::SRGB(/*is_gray=*/false);
237         JXL_CHECK(PrimariesToXYZD50(
238             srgb.GetPrimaries().r.x, srgb.GetPrimaries().r.y,
239             srgb.GetPrimaries().g.x, srgb.GetPrimaries().g.y,
240             srgb.GetPrimaries().b.x, srgb.GetPrimaries().b.y,
241             srgb.GetWhitePoint().x, srgb.GetWhitePoint().y, srgb_to_xyzd50));
242         float original_to_xyz[3][3];
243         JXL_RETURN_IF_ERROR(PrimariesToXYZ(
244             orig_color_encoding.GetPrimaries().r.x,
245             orig_color_encoding.GetPrimaries().r.y,
246             orig_color_encoding.GetPrimaries().g.x,
247             orig_color_encoding.GetPrimaries().g.y,
248             orig_color_encoding.GetPrimaries().b.x,
249             orig_color_encoding.GetPrimaries().b.y,
250             orig_color_encoding.GetWhitePoint().x,
251             orig_color_encoding.GetWhitePoint().y, &original_to_xyz[0][0]));
252         memcpy(luminances, original_to_xyz[1], sizeof luminances);
253         float adapt_to_d50[9];
254         JXL_RETURN_IF_ERROR(AdaptToXYZD50(orig_color_encoding.GetWhitePoint().x,
255                                           orig_color_encoding.GetWhitePoint().y,
256                                           adapt_to_d50));
257         float xyzd50_to_original[9];
258         MatMul(adapt_to_d50, &original_to_xyz[0][0], 3, 3, 3,
259                xyzd50_to_original);
260         JXL_RETURN_IF_ERROR(Inv3x3Matrix(xyzd50_to_original));
261         float srgb_to_original[9];
262         MatMul(xyzd50_to_original, srgb_to_xyzd50, 3, 3, 3, srgb_to_original);
263         MatMul(srgb_to_original, im.inverse_matrix, 3, 3, 3, inverse_matrix);
264       }
265       color_encoding = orig_color_encoding;
266       color_encoding_is_original = true;
267       if (color_encoding.tf.IsPQ()) {
268         intensity_target = 10000;
269       }
270     } while (false);
271   } else {
272     color_encoding = metadata.m.color_encoding;
273   }
274   if (std::abs(intensity_target - 255.0) > 0.1f || !im.all_default) {
275     all_default_opsin = false;
276   }
277   InitSIMDInverseMatrix(inverse_matrix, opsin_params.inverse_opsin_matrix,
278                         intensity_target);
279   std::copy(std::begin(im.opsin_biases), std::end(im.opsin_biases),
280             opsin_params.opsin_biases);
281   for (int i = 0; i < 3; ++i) {
282     opsin_params.opsin_biases_cbrt[i] = cbrtf(opsin_params.opsin_biases[i]);
283   }
284   opsin_params.opsin_biases_cbrt[3] = opsin_params.opsin_biases[3] = 1;
285   std::copy(std::begin(im.quant_biases), std::end(im.quant_biases),
286             opsin_params.quant_biases);
287   return true;
288 }
289 
290 }  // namespace jxl
291 #endif  // HWY_ONCE
292