1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5
6 #include "lib/jxl/dec_reconstruct.h"
7
8 #include <atomic>
9 #include <utility>
10
11 #include "lib/jxl/filters.h"
12 #include "lib/jxl/image_ops.h"
13
14 #undef HWY_TARGET_INCLUDE
15 #define HWY_TARGET_INCLUDE "lib/jxl/dec_reconstruct.cc"
16 #include <hwy/foreach_target.h>
17 #include <hwy/highway.h>
18
19 #include "lib/jxl/aux_out.h"
20 #include "lib/jxl/base/data_parallel.h"
21 #include "lib/jxl/base/profiler.h"
22 #include "lib/jxl/blending.h"
23 #include "lib/jxl/color_management.h"
24 #include "lib/jxl/common.h"
25 #include "lib/jxl/dec_noise.h"
26 #include "lib/jxl/dec_upsample.h"
27 #include "lib/jxl/dec_xyb-inl.h"
28 #include "lib/jxl/dec_xyb.h"
29 #include "lib/jxl/epf.h"
30 #include "lib/jxl/fast_math-inl.h"
31 #include "lib/jxl/frame_header.h"
32 #include "lib/jxl/loop_filter.h"
33 #include "lib/jxl/passes_state.h"
34 #include "lib/jxl/transfer_functions-inl.h"
35 HWY_BEFORE_NAMESPACE();
36 namespace jxl {
37 namespace HWY_NAMESPACE {
38
UndoXYBInPlace(Image3F * idct,const Rect & rect,const OutputEncodingInfo & output_encoding_info)39 Status UndoXYBInPlace(Image3F* idct, const Rect& rect,
40 const OutputEncodingInfo& output_encoding_info) {
41 PROFILER_ZONE("UndoXYB");
42
43 // The size of `rect` might not be a multiple of Lanes(d), but is guaranteed
44 // to be a multiple of kBlockDim or at the margin of the image.
45 for (size_t y = 0; y < rect.ysize(); y++) {
46 float* JXL_RESTRICT row0 = rect.PlaneRow(idct, 0, y);
47 float* JXL_RESTRICT row1 = rect.PlaneRow(idct, 1, y);
48 float* JXL_RESTRICT row2 = rect.PlaneRow(idct, 2, y);
49
50 const HWY_CAPPED(float, GroupBorderAssigner::kPaddingXRound) d;
51
52 if (output_encoding_info.color_encoding.tf.IsLinear()) {
53 for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
54 const auto in_opsin_x = Load(d, row0 + x);
55 const auto in_opsin_y = Load(d, row1 + x);
56 const auto in_opsin_b = Load(d, row2 + x);
57 JXL_COMPILER_FENCE;
58 auto linear_r = Undefined(d);
59 auto linear_g = Undefined(d);
60 auto linear_b = Undefined(d);
61 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b,
62 output_encoding_info.opsin_params, &linear_r, &linear_g,
63 &linear_b);
64
65 Store(linear_r, d, row0 + x);
66 Store(linear_g, d, row1 + x);
67 Store(linear_b, d, row2 + x);
68 }
69 } else if (output_encoding_info.color_encoding.tf.IsSRGB()) {
70 for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
71 const auto in_opsin_x = Load(d, row0 + x);
72 const auto in_opsin_y = Load(d, row1 + x);
73 const auto in_opsin_b = Load(d, row2 + x);
74 JXL_COMPILER_FENCE;
75 auto linear_r = Undefined(d);
76 auto linear_g = Undefined(d);
77 auto linear_b = Undefined(d);
78 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b,
79 output_encoding_info.opsin_params, &linear_r, &linear_g,
80 &linear_b);
81
82 #if JXL_HIGH_PRECISION
83 Store(TF_SRGB().EncodedFromDisplay(d, linear_r), d, row0 + x);
84 Store(TF_SRGB().EncodedFromDisplay(d, linear_g), d, row1 + x);
85 Store(TF_SRGB().EncodedFromDisplay(d, linear_b), d, row2 + x);
86 #else
87 Store(FastLinearToSRGB(d, linear_r), d, row0 + x);
88 Store(FastLinearToSRGB(d, linear_g), d, row1 + x);
89 Store(FastLinearToSRGB(d, linear_b), d, row2 + x);
90 #endif
91 }
92 } else if (output_encoding_info.color_encoding.tf.IsPQ()) {
93 for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
94 const auto in_opsin_x = Load(d, row0 + x);
95 const auto in_opsin_y = Load(d, row1 + x);
96 const auto in_opsin_b = Load(d, row2 + x);
97 JXL_COMPILER_FENCE;
98 auto linear_r = Undefined(d);
99 auto linear_g = Undefined(d);
100 auto linear_b = Undefined(d);
101 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b,
102 output_encoding_info.opsin_params, &linear_r, &linear_g,
103 &linear_b);
104 Store(TF_PQ().EncodedFromDisplay(d, linear_r), d, row0 + x);
105 Store(TF_PQ().EncodedFromDisplay(d, linear_g), d, row1 + x);
106 Store(TF_PQ().EncodedFromDisplay(d, linear_b), d, row2 + x);
107 }
108 } else if (output_encoding_info.color_encoding.tf.IsHLG()) {
109 for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
110 const auto in_opsin_x = Load(d, row0 + x);
111 const auto in_opsin_y = Load(d, row1 + x);
112 const auto in_opsin_b = Load(d, row2 + x);
113 JXL_COMPILER_FENCE;
114 auto linear_r = Undefined(d);
115 auto linear_g = Undefined(d);
116 auto linear_b = Undefined(d);
117 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b,
118 output_encoding_info.opsin_params, &linear_r, &linear_g,
119 &linear_b);
120 Store(TF_HLG().EncodedFromDisplay(d, linear_r), d, row0 + x);
121 Store(TF_HLG().EncodedFromDisplay(d, linear_g), d, row1 + x);
122 Store(TF_HLG().EncodedFromDisplay(d, linear_b), d, row2 + x);
123 }
124 } else if (output_encoding_info.color_encoding.tf.Is709()) {
125 for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
126 const auto in_opsin_x = Load(d, row0 + x);
127 const auto in_opsin_y = Load(d, row1 + x);
128 const auto in_opsin_b = Load(d, row2 + x);
129 JXL_COMPILER_FENCE;
130 auto linear_r = Undefined(d);
131 auto linear_g = Undefined(d);
132 auto linear_b = Undefined(d);
133 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b,
134 output_encoding_info.opsin_params, &linear_r, &linear_g,
135 &linear_b);
136 Store(TF_709().EncodedFromDisplay(d, linear_r), d, row0 + x);
137 Store(TF_709().EncodedFromDisplay(d, linear_g), d, row1 + x);
138 Store(TF_709().EncodedFromDisplay(d, linear_b), d, row2 + x);
139 }
140 } else if (output_encoding_info.color_encoding.tf.IsGamma() ||
141 output_encoding_info.color_encoding.tf.IsDCI()) {
142 auto gamma_tf = [&](hwy::HWY_NAMESPACE::Vec<decltype(d)> v) {
143 return IfThenZeroElse(
144 v <= Set(d, 1e-5f),
145 FastPowf(d, v, Set(d, output_encoding_info.inverse_gamma)));
146 };
147 for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
148 #if MEMORY_SANITIZER
149 const auto mask = Iota(d, x) < Set(d, rect.xsize());
150 const auto in_opsin_x = IfThenElseZero(mask, Load(d, row0 + x));
151 const auto in_opsin_y = IfThenElseZero(mask, Load(d, row1 + x));
152 const auto in_opsin_b = IfThenElseZero(mask, Load(d, row2 + x));
153 #else
154 const auto in_opsin_x = Load(d, row0 + x);
155 const auto in_opsin_y = Load(d, row1 + x);
156 const auto in_opsin_b = Load(d, row2 + x);
157 #endif
158 JXL_COMPILER_FENCE;
159 auto linear_r = Undefined(d);
160 auto linear_g = Undefined(d);
161 auto linear_b = Undefined(d);
162 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b,
163 output_encoding_info.opsin_params, &linear_r, &linear_g,
164 &linear_b);
165 Store(gamma_tf(linear_r), d, row0 + x);
166 Store(gamma_tf(linear_g), d, row1 + x);
167 Store(gamma_tf(linear_b), d, row2 + x);
168 }
169 } else {
170 // This is a programming error.
171 JXL_ABORT("Invalid target encoding");
172 }
173 }
174 return true;
175 }
176
177 template <typename D, typename V>
StoreRGBA(D d,V r,V g,V b,V a,bool alpha,size_t n,size_t extra,uint8_t * buf)178 void StoreRGBA(D d, V r, V g, V b, V a, bool alpha, size_t n, size_t extra,
179 uint8_t* buf) {
180 #if HWY_TARGET == HWY_SCALAR
181 buf[0] = r.raw;
182 buf[1] = g.raw;
183 buf[2] = b.raw;
184 if (alpha) {
185 buf[3] = a.raw;
186 }
187 #elif HWY_TARGET == HWY_NEON
188 if (alpha) {
189 uint8x8x4_t data = {r.raw, g.raw, b.raw, a.raw};
190 if (extra >= 8) {
191 vst4_u8(buf, data);
192 } else {
193 uint8_t tmp[8 * 4];
194 vst4_u8(tmp, data);
195 memcpy(buf, tmp, n * 4);
196 }
197 } else {
198 uint8x8x3_t data = {r.raw, g.raw, b.raw};
199 if (extra >= 8) {
200 vst3_u8(buf, data);
201 } else {
202 uint8_t tmp[8 * 3];
203 vst3_u8(tmp, data);
204 memcpy(buf, tmp, n * 3);
205 }
206 }
207 #else
208 // TODO(veluca): implement this for x86.
209 size_t mul = alpha ? 4 : 3;
210 HWY_ALIGN uint8_t bytes[16];
211 Store(r, d, bytes);
212 for (size_t i = 0; i < n; i++) {
213 buf[mul * i] = bytes[i];
214 }
215 Store(g, d, bytes);
216 for (size_t i = 0; i < n; i++) {
217 buf[mul * i + 1] = bytes[i];
218 }
219 Store(b, d, bytes);
220 for (size_t i = 0; i < n; i++) {
221 buf[mul * i + 2] = bytes[i];
222 }
223 if (alpha) {
224 Store(a, d, bytes);
225 for (size_t i = 0; i < n; i++) {
226 buf[4 * i + 3] = bytes[i];
227 }
228 }
229 #endif
230 }
231
232 // Outputs floating point image to RGBA 8-bit buffer. Does not support alpha
233 // channel in the input, but outputs opaque alpha channel for the case where the
234 // output buffer to write to is in the 4-byte per pixel RGBA format.
FloatToRGBA8(const Image3F & input,const Rect & input_rect,bool is_rgba,const ImageF * alpha_in,const Rect & alpha_rect,const Rect & output_buf_rect,uint8_t * JXL_RESTRICT output_buf,size_t stride)235 void FloatToRGBA8(const Image3F& input, const Rect& input_rect, bool is_rgba,
236 const ImageF* alpha_in, const Rect& alpha_rect,
237 const Rect& output_buf_rect, uint8_t* JXL_RESTRICT output_buf,
238 size_t stride) {
239 size_t bytes = is_rgba ? 4 : 3;
240 for (size_t y = 0; y < output_buf_rect.ysize(); y++) {
241 const float* JXL_RESTRICT row_in_r = input_rect.ConstPlaneRow(input, 0, y);
242 const float* JXL_RESTRICT row_in_g = input_rect.ConstPlaneRow(input, 1, y);
243 const float* JXL_RESTRICT row_in_b = input_rect.ConstPlaneRow(input, 2, y);
244 const float* JXL_RESTRICT row_in_a =
245 alpha_in ? alpha_rect.ConstRow(*alpha_in, y) : nullptr;
246 size_t base_ptr =
247 (y + output_buf_rect.y0()) * stride + bytes * output_buf_rect.x0();
248 using D = HWY_CAPPED(float, 4);
249 const D d;
250 D::Rebind<uint32_t> du;
251 auto zero = Zero(d);
252 auto one = Set(d, 1.0f);
253 auto mul = Set(d, 255.0f);
254 #if MEMORY_SANITIZER
255 // Avoid use-of-uninitialized-value for loads past the end of the image's
256 // initialized data.
257 auto safe_load = [&](const float* ptr, size_t x) {
258 uint32_t kMask[8] = {~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0};
259 size_t n = std::min<size_t>(Lanes(d), output_buf_rect.xsize() - x);
260 auto mask = BitCast(d, LoadU(du, kMask + Lanes(d) - n));
261 return Load(d, ptr + x) & mask;
262 };
263 #else
264 auto safe_load = [](const float* ptr, size_t x) {
265 return Load(D(), ptr + x);
266 };
267 #endif
268 for (size_t x = 0; x < output_buf_rect.xsize(); x += Lanes(d)) {
269 auto rf = Clamp(zero, safe_load(row_in_r, x), one) * mul;
270 auto gf = Clamp(zero, safe_load(row_in_g, x), one) * mul;
271 auto bf = Clamp(zero, safe_load(row_in_b, x), one) * mul;
272 auto af = row_in_a ? Clamp(zero, safe_load(row_in_a, x), one) * mul
273 : Set(d, 255.0f);
274 auto r8 = U8FromU32(BitCast(du, NearestInt(rf)));
275 auto g8 = U8FromU32(BitCast(du, NearestInt(gf)));
276 auto b8 = U8FromU32(BitCast(du, NearestInt(bf)));
277 auto a8 = U8FromU32(BitCast(du, NearestInt(af)));
278 size_t n = output_buf_rect.xsize() - x;
279 if (JXL_LIKELY(n >= Lanes(d))) {
280 StoreRGBA(D::Rebind<uint8_t>(), r8, g8, b8, a8, is_rgba, Lanes(d), n,
281 output_buf + base_ptr + bytes * x);
282 } else {
283 StoreRGBA(D::Rebind<uint8_t>(), r8, g8, b8, a8, is_rgba, n, n,
284 output_buf + base_ptr + bytes * x);
285 }
286 }
287 }
288 }
289
290 // NOLINTNEXTLINE(google-readability-namespace-comments)
291 } // namespace HWY_NAMESPACE
292 } // namespace jxl
293 HWY_AFTER_NAMESPACE();
294
295 #if HWY_ONCE
296 namespace jxl {
297
298 HWY_EXPORT(UndoXYBInPlace);
299 HWY_EXPORT(FloatToRGBA8);
300
UndoXYB(const Image3F & src,Image3F * dst,const OutputEncodingInfo & output_info,ThreadPool * pool)301 void UndoXYB(const Image3F& src, Image3F* dst,
302 const OutputEncodingInfo& output_info, ThreadPool* pool) {
303 CopyImageTo(src, dst);
304 pool->Run(0, src.ysize(), ThreadPool::SkipInit(), [&](int y, int /*thread*/) {
305 JXL_CHECK(HWY_DYNAMIC_DISPATCH(UndoXYBInPlace)(dst, Rect(*dst).Line(y),
306 output_info));
307 });
308 }
309
310 namespace {
ScaleRectForEC(Rect in,const FrameHeader & frame_header,size_t ec)311 Rect ScaleRectForEC(Rect in, const FrameHeader& frame_header, size_t ec) {
312 auto s = [&](size_t x) {
313 return DivCeil(x * frame_header.upsampling,
314 frame_header.extra_channel_upsampling[ec]);
315 };
316 return Rect(s(in.x0()), s(in.y0()), s(in.xsize()), s(in.ysize()));
317 }
318
319 // Implements EnsurePaddingInPlace, but allows processing data one row at a
320 // time.
321 class EnsurePaddingInPlaceRowByRow {
Init(const Rect & rect,const Rect & image_rect,size_t image_xsize,size_t image_ysize,size_t xpadding,size_t ypadding,ssize_t * y0,ssize_t * y1)322 void Init(const Rect& rect, const Rect& image_rect, size_t image_xsize,
323 size_t image_ysize, size_t xpadding, size_t ypadding, ssize_t* y0,
324 ssize_t* y1) {
325 // coordinates relative to rect.
326 JXL_DASSERT(SameSize(rect, image_rect));
327 *y0 = -std::min(image_rect.y0(), ypadding);
328 *y1 = rect.ysize() + std::min(ypadding, image_ysize - image_rect.ysize() -
329 image_rect.y0());
330 if (image_rect.x0() >= xpadding &&
331 image_rect.x0() + image_rect.xsize() + xpadding <= image_xsize) {
332 // Nothing to do.
333 strategy_ = kSkip;
334 } else if (image_xsize >= 2 * xpadding) {
335 strategy_ = kFast;
336 } else {
337 strategy_ = kSlow;
338 }
339 y0_ = rect.y0();
340 JXL_DASSERT(rect.x0() >= xpadding);
341 x0_ = x1_ = rect.x0() - xpadding;
342 // If close to the left border - do mirroring.
343 if (image_rect.x0() < xpadding) x1_ = rect.x0() - image_rect.x0();
344 x2_ = x3_ = rect.x0() + rect.xsize() + xpadding;
345 // If close to the right border - do mirroring.
346 if (image_rect.x0() + image_rect.xsize() + xpadding > image_xsize) {
347 x2_ = rect.x0() + image_xsize - image_rect.x0();
348 }
349 JXL_DASSERT(image_xsize == (x2_ - x1_) ||
350 (x1_ - x0_ <= x2_ - x1_ && x3_ - x2_ <= x2_ - x1_));
351 }
352
353 public:
Init(Image3F * img,const Rect & rect,const Rect & image_rect,size_t image_xsize,size_t image_ysize,size_t xpadding,size_t ypadding,ssize_t * y0,ssize_t * y1)354 void Init(Image3F* img, const Rect& rect, const Rect& image_rect,
355 size_t image_xsize, size_t image_ysize, size_t xpadding,
356 size_t ypadding, ssize_t* y0, ssize_t* y1) {
357 Init(rect, image_rect, image_xsize, image_ysize, xpadding, ypadding, y0,
358 y1);
359 img3_ = img;
360 JXL_DASSERT(x3_ <= img->xsize());
361 }
Init(ImageF * img,const Rect & rect,const Rect & image_rect,size_t image_xsize,size_t image_ysize,size_t xpadding,size_t ypadding,ssize_t * y0,ssize_t * y1)362 void Init(ImageF* img, const Rect& rect, const Rect& image_rect,
363 size_t image_xsize, size_t image_ysize, size_t xpadding,
364 size_t ypadding, ssize_t* y0, ssize_t* y1) {
365 Init(rect, image_rect, image_xsize, image_ysize, xpadding, ypadding, y0,
366 y1);
367 img_ = img;
368 JXL_DASSERT(x3_ <= img->xsize());
369 }
370 // To be called when row `y` of the input is available, for all the values in
371 // [*y0, *y1).
Process3(ssize_t y)372 void Process3(ssize_t y) {
373 JXL_DASSERT(img3_);
374 for (size_t c = 0; c < 3; c++) {
375 img_ = &img3_->Plane(c);
376 Process(y);
377 }
378 }
Process(ssize_t y)379 void Process(ssize_t y) {
380 JXL_DASSERT(img_);
381 switch (strategy_) {
382 case kSkip:
383 break;
384 case kFast: {
385 // Image is wide enough that a single Mirror() step is sufficient.
386 float* JXL_RESTRICT row = img_->Row(y + y0_);
387 for (size_t x = x0_; x < x1_; x++) {
388 row[x] = row[2 * x1_ - x - 1];
389 }
390 for (size_t x = x2_; x < x3_; x++) {
391 row[x] = row[2 * x2_ - x - 1];
392 }
393 break;
394 }
395 case kSlow: {
396 // Slow case for small images.
397 float* JXL_RESTRICT row = img_->Row(y + y0_) + x1_;
398 for (ssize_t x = x0_ - x1_; x < 0; x++) {
399 *(row + x) = row[Mirror(x, x2_ - x1_)];
400 }
401 for (size_t x = x2_ - x1_; x < x3_ - x1_; x++) {
402 *(row + x) = row[Mirror(x, x2_ - x1_)];
403 }
404 break;
405 }
406 }
407 }
408
409 private:
410 // Initialized to silence spurious compiler warnings.
411 Image3F* img3_ = nullptr;
412 ImageF* img_ = nullptr;
413 // Will fill [x0_, x1_) and [x2_, x3_) on every row.
414 // The [x1_, x2_) range contains valid image pixels. We guarantee that either
415 // x1_ - x0_ <= x2_ - x1_, (and similarly for x2_, x3_), or that the [x1_,
416 // x2_) contains a full horizontal line of the original image.
417 size_t x0_ = 0, x1_ = 0, x2_ = 0, x3_ = 0;
418 size_t y0_ = 0;
419 // kSlow: use calls to Mirror(), for the case where the border might be larger
420 // than the image.
421 // kFast: directly use the result of Mirror() when it can be computed in a
422 // single iteration.
423 // kSkip: do nothing.
424 enum Strategy { kFast, kSlow, kSkip };
425 Strategy strategy_ = kSkip;
426 };
427 } // namespace
428
EnsurePaddingInPlace(Image3F * img,const Rect & rect,const Rect & image_rect,size_t image_xsize,size_t image_ysize,size_t xpadding,size_t ypadding)429 void EnsurePaddingInPlace(Image3F* img, const Rect& rect,
430 const Rect& image_rect, size_t image_xsize,
431 size_t image_ysize, size_t xpadding,
432 size_t ypadding) {
433 ssize_t y0, y1;
434 EnsurePaddingInPlaceRowByRow impl;
435 impl.Init(img, rect, image_rect, image_xsize, image_ysize, xpadding, ypadding,
436 &y0, &y1);
437 for (ssize_t y = y0; y < y1; y++) {
438 impl.Process3(y);
439 }
440 }
441
FinalizeImageRect(Image3F * input_image,const Rect & input_rect,const std::vector<std::pair<ImageF *,Rect>> & extra_channels,PassesDecoderState * dec_state,size_t thread,ImageBundle * JXL_RESTRICT output_image,const Rect & frame_rect)442 Status FinalizeImageRect(
443 Image3F* input_image, const Rect& input_rect,
444 const std::vector<std::pair<ImageF*, Rect>>& extra_channels,
445 PassesDecoderState* dec_state, size_t thread,
446 ImageBundle* JXL_RESTRICT output_image, const Rect& frame_rect) {
447 const ImageFeatures& image_features = dec_state->shared->image_features;
448 const FrameHeader& frame_header = dec_state->shared->frame_header;
449 const ImageMetadata& metadata = frame_header.nonserialized_metadata->m;
450 const LoopFilter& lf = frame_header.loop_filter;
451 const FrameDimensions& frame_dim = dec_state->shared->frame_dim;
452 JXL_DASSERT(frame_rect.xsize() <= kApplyImageFeaturesTileDim);
453 JXL_DASSERT(frame_rect.ysize() <= kApplyImageFeaturesTileDim);
454 JXL_DASSERT(input_rect.xsize() == frame_rect.xsize());
455 JXL_DASSERT(input_rect.ysize() == frame_rect.ysize());
456 JXL_DASSERT(frame_rect.x0() % GroupBorderAssigner::kPaddingXRound == 0);
457 JXL_DASSERT(frame_rect.xsize() % GroupBorderAssigner::kPaddingXRound == 0 ||
458 frame_rect.xsize() + frame_rect.x0() == frame_dim.xsize ||
459 frame_rect.xsize() + frame_rect.x0() == frame_dim.xsize_padded);
460
461 // +----------------------------- STEP 1 ------------------------------+
462 // | Compute the rects on which patches and splines will be applied. |
463 // | In case we are applying upsampling, we need to apply patches on a |
464 // | slightly larger image. |
465 // +-------------------------------------------------------------------+
466
467 // If we are applying upsampling, we need 2 more pixels around the actual rect
468 // for border. Thus, we also need to apply patches and splines to those
469 // pixels. We compute here
470 // - The portion of image that corresponds to the area we are applying IF.
471 // (rect_for_if)
472 // - The rect where that pixel data is stored in upsampling_input_storage.
473 // (rect_for_if_storage)
474 // - The rect where the pixel data that we need to upsample is stored.
475 // (rect_for_upsampling)
476 // - The source rect for the pixel data in `input_image`. It is assumed that,
477 // if `frame_rect` is not on an image border, `input_image:input_rect` has
478 // enough border available. (rect_for_if_input)
479
480 Image3F* output_color =
481 dec_state->rgb_output == nullptr && dec_state->pixel_callback == nullptr
482 ? output_image->color()
483 : nullptr;
484
485 Image3F* storage_for_if = output_color;
486 Rect rect_for_if = frame_rect;
487 Rect rect_for_if_storage = frame_rect;
488 Rect rect_for_upsampling = frame_rect;
489 Rect rect_for_if_input = input_rect;
490 size_t extra_rows_t = 0;
491 size_t extra_rows_b = 0;
492 if (frame_header.upsampling != 1) {
493 size_t ifbx0 = 0;
494 size_t ifbx1 = 0;
495 size_t ifby0 = 0;
496 size_t ifby1 = 0;
497 if (frame_rect.x0() >= 2) {
498 JXL_DASSERT(input_rect.x0() >= 2);
499 ifbx0 = 2;
500 }
501 if (frame_rect.y0() >= 2) {
502 JXL_DASSERT(input_rect.y0() >= 2);
503 extra_rows_t = ifby0 = 2;
504 }
505 for (size_t extra : {1, 2}) {
506 if (frame_rect.x0() + frame_rect.xsize() + extra <=
507 dec_state->shared->frame_dim.xsize_padded) {
508 JXL_DASSERT(input_rect.x0() + input_rect.xsize() + extra <=
509 input_image->xsize());
510 ifbx1 = extra;
511 }
512 if (frame_rect.y0() + frame_rect.ysize() + extra <=
513 dec_state->shared->frame_dim.ysize_padded) {
514 JXL_DASSERT(input_rect.y0() + input_rect.ysize() + extra <=
515 input_image->ysize());
516 extra_rows_b = ifby1 = extra;
517 }
518 }
519 rect_for_if = Rect(frame_rect.x0() - ifbx0, frame_rect.y0() - ifby0,
520 frame_rect.xsize() + ifbx0 + ifbx1,
521 frame_rect.ysize() + ifby0 + ifby1);
522 // Storage for pixel data does not necessarily start at (0, 0) as we need to
523 // have the left border of upsampling_rect aligned to a multiple of
524 // GroupBorderAssigner::kPaddingXRound.
525 rect_for_if_storage =
526 Rect(kBlockDim + RoundUpTo(ifbx0, GroupBorderAssigner::kPaddingXRound) -
527 ifbx0,
528 kBlockDim, rect_for_if.xsize(), rect_for_if.ysize());
529 rect_for_upsampling =
530 Rect(kBlockDim + RoundUpTo(ifbx0, GroupBorderAssigner::kPaddingXRound),
531 kBlockDim + ifby0, frame_rect.xsize(), frame_rect.ysize());
532 rect_for_if_input =
533 Rect(input_rect.x0() - ifbx0, input_rect.y0() - ifby0,
534 rect_for_if_storage.xsize(), rect_for_if_storage.ysize());
535 storage_for_if = &dec_state->upsampling_input_storage[thread];
536 }
537
538 // Variables for upsampling and filtering.
539 Rect upsampled_frame_rect(frame_rect.x0() * frame_header.upsampling,
540 frame_rect.y0() * frame_header.upsampling,
541 frame_rect.xsize() * frame_header.upsampling,
542 frame_rect.ysize() * frame_header.upsampling);
543 Rect full_frame_rect(0, 0, frame_dim.xsize_upsampled,
544 frame_dim.ysize_upsampled);
545 upsampled_frame_rect = upsampled_frame_rect.Crop(full_frame_rect);
546 EnsurePaddingInPlaceRowByRow ensure_padding_upsampling;
547 ssize_t ensure_padding_upsampling_y0 = 0;
548 ssize_t ensure_padding_upsampling_y1 = 0;
549
550 EnsurePaddingInPlaceRowByRow ensure_padding_filter;
551 FilterPipeline* fp = nullptr;
552 ssize_t ensure_padding_filter_y0 = 0;
553 ssize_t ensure_padding_filter_y1 = 0;
554 Rect image_padded_rect;
555 if (lf.epf_iters != 0 || lf.gab) {
556 fp = &dec_state->filter_pipelines[thread];
557 size_t xextra =
558 rect_for_if_input.x0() % GroupBorderAssigner::kPaddingXRound;
559 image_padded_rect = Rect(rect_for_if.x0() - xextra, rect_for_if.y0(),
560 rect_for_if.xsize() + xextra, rect_for_if.ysize());
561 }
562
563 // +----------------------------- STEP 2 ------------------------------+
564 // | Change rects and buffer to not use `output_image` if direct |
565 // | output to rgb8 is requested. |
566 // +-------------------------------------------------------------------+
567 Image3F* output_pixel_data_storage = output_color;
568 Rect upsampled_frame_rect_for_storage = upsampled_frame_rect;
569 if (dec_state->rgb_output || dec_state->pixel_callback) {
570 size_t log2_upsampling = CeilLog2Nonzero(frame_header.upsampling);
571 if (storage_for_if == output_color) {
572 storage_for_if =
573 &dec_state->output_pixel_data_storage[log2_upsampling][thread];
574 rect_for_if_storage =
575 Rect(0, 0, rect_for_if_storage.xsize(), rect_for_if_storage.ysize());
576 }
577 output_pixel_data_storage =
578 &dec_state->output_pixel_data_storage[log2_upsampling][thread];
579 upsampled_frame_rect_for_storage =
580 Rect(0, 0, upsampled_frame_rect.xsize(), upsampled_frame_rect.ysize());
581 if (frame_header.upsampling == 1 && fp == nullptr) {
582 upsampled_frame_rect_for_storage = rect_for_if_storage =
583 rect_for_if_input;
584 output_pixel_data_storage = storage_for_if = input_image;
585 }
586 }
587 // Set up alpha channel.
588 const size_t ec =
589 metadata.Find(ExtraChannel::kAlpha) - metadata.extra_channel_info.data();
590 const ImageF* alpha = nullptr;
591 Rect alpha_rect = upsampled_frame_rect;
592 if (ec < metadata.extra_channel_info.size()) {
593 JXL_ASSERT(ec < extra_channels.size());
594 if (frame_header.extra_channel_upsampling[ec] == 1) {
595 alpha = extra_channels[ec].first;
596 alpha_rect = extra_channels[ec].second;
597 } else {
598 alpha = &output_image->extra_channels()[ec];
599 alpha_rect = upsampled_frame_rect;
600 }
601 }
602
603 // +----------------------------- STEP 3 ------------------------------+
604 // | Set up upsampling and upsample extra channels. |
605 // +-------------------------------------------------------------------+
606 Upsampler* color_upsampler = nullptr;
607 if (frame_header.upsampling != 1) {
608 color_upsampler =
609 &dec_state->upsamplers[CeilLog2Nonzero(frame_header.upsampling) - 1];
610 ensure_padding_upsampling.Init(
611 storage_for_if, rect_for_upsampling, frame_rect, frame_dim.xsize_padded,
612 frame_dim.ysize_padded, 2, 2, &ensure_padding_upsampling_y0,
613 &ensure_padding_upsampling_y1);
614 }
615
616 std::vector<std::pair<ImageF*, Rect>> extra_channels_for_patches;
617 std::vector<EnsurePaddingInPlaceRowByRow> ec_padding;
618
619 bool late_ec_upsample = frame_header.upsampling != 1;
620 for (auto ecups : frame_header.extra_channel_upsampling) {
621 if (ecups != frame_header.upsampling) {
622 // If patches are applied, either frame_header.upsampling == 1 or
623 // late_ec_upsample is true.
624 late_ec_upsample = false;
625 }
626 }
627
628 ssize_t ensure_padding_upsampling_ec_y0 = 0;
629 ssize_t ensure_padding_upsampling_ec_y1 = 0;
630
631 // TODO(veluca) do not upsample extra channels to a full-image-sized buffer if
632 // we are not outputting to an ImageBundle.
633 if (!late_ec_upsample) {
634 // Upsample extra channels first if not all channels have the same
635 // upsampling factor.
636 for (size_t ec = 0; ec < extra_channels.size(); ec++) {
637 size_t ecups = frame_header.extra_channel_upsampling[ec];
638 if (ecups == 1) {
639 extra_channels_for_patches.push_back(extra_channels[ec]);
640 continue;
641 }
642 ssize_t ensure_padding_y0, ensure_padding_y1;
643 EnsurePaddingInPlaceRowByRow ensure_padding;
644 Rect ec_image_rect = ScaleRectForEC(frame_rect, frame_header, ec);
645 size_t ecxs = DivCeil(frame_dim.xsize_upsampled,
646 frame_header.extra_channel_upsampling[ec]);
647 size_t ecys = DivCeil(frame_dim.ysize_upsampled,
648 frame_header.extra_channel_upsampling[ec]);
649 ensure_padding.Init(extra_channels[ec].first, extra_channels[ec].second,
650 ec_image_rect, ecxs, ecys, 2, 2, &ensure_padding_y0,
651 &ensure_padding_y1);
652 for (ssize_t y = ensure_padding_y0; y < ensure_padding_y1; y++) {
653 ensure_padding.Process(y);
654 }
655 Upsampler& upsampler =
656 dec_state->upsamplers[CeilLog2Nonzero(
657 frame_header.extra_channel_upsampling[ec]) -
658 1];
659 upsampler.UpsampleRect(
660 *extra_channels[ec].first, extra_channels[ec].second,
661 &output_image->extra_channels()[ec], upsampled_frame_rect,
662 static_cast<ssize_t>(ec_image_rect.y0()) -
663 static_cast<ssize_t>(extra_channels[ec].second.y0()),
664 ecys);
665 extra_channels_for_patches.emplace_back(
666 &output_image->extra_channels()[ec], upsampled_frame_rect);
667 }
668 } else {
669 // Upsample extra channels last if color channels are upsampled and all the
670 // extra channels have the same upsampling as them.
671 ec_padding.resize(extra_channels.size());
672 for (size_t ec = 0; ec < extra_channels.size(); ec++) {
673 // Add a border to the extra channel rect for when patches are applied.
674 // This ensures that the correct row is accessed (y values for patches are
675 // relative to rect_for_if, not to input_rect).
676 // As the rect is extended by 0 or 2 pixels, and the patches input has,
677 // accordingly, the same padding, this is safe.
678 Rect r(extra_channels[ec].second.x0() + rect_for_upsampling.x0() -
679 rect_for_if_storage.x0(),
680 extra_channels[ec].second.y0() + rect_for_upsampling.y0() -
681 rect_for_if_storage.y0(),
682 extra_channels[ec].second.xsize() + rect_for_if_storage.xsize() -
683 rect_for_upsampling.xsize(),
684 extra_channels[ec].second.ysize() + rect_for_if_storage.ysize() -
685 rect_for_upsampling.ysize());
686 extra_channels_for_patches.emplace_back(extra_channels[ec].first, r);
687 ec_padding[ec].Init(extra_channels[ec].first, extra_channels[ec].second,
688 frame_rect, frame_dim.xsize, frame_dim.ysize, 2, 2,
689 &ensure_padding_upsampling_ec_y0,
690 &ensure_padding_upsampling_ec_y1);
691 }
692 }
693
694 // Initialized to a valid non-null ptr to avoid UB if arithmetic is done with
695 // the pointer value (which would then not be used).
696 std::vector<float*> ec_ptrs_for_patches(extra_channels.size(),
697 input_image->PlaneRow(0, 0));
698
699 // +----------------------------- STEP 4 ------------------------------+
700 // | Set up the filter pipeline. |
701 // +-------------------------------------------------------------------+
702 if (fp) {
703 // If `rect_for_if_input` does not start at a multiple of
704 // GroupBorderAssigner::kPaddingXRound, we extend the rect we run EPF on by
705 // one full padding length to ensure sigma is handled correctly. We also
706 // extend the output and image rects accordingly. To do this, we need 2x the
707 // border.
708 size_t xextra =
709 rect_for_if_input.x0() % GroupBorderAssigner::kPaddingXRound;
710 Rect filter_input_padded_rect(
711 rect_for_if_input.x0() - xextra, rect_for_if_input.y0(),
712 rect_for_if_input.xsize() + xextra, rect_for_if_input.ysize());
713 ensure_padding_filter.Init(
714 input_image, rect_for_if_input, rect_for_if, frame_dim.xsize_padded,
715 frame_dim.ysize_padded, lf.Padding(), lf.Padding(),
716 &ensure_padding_filter_y0, &ensure_padding_filter_y1);
717 Rect filter_output_padded_rect(
718 rect_for_if_storage.x0() - xextra, rect_for_if_storage.y0(),
719 rect_for_if_storage.xsize() + xextra, rect_for_if_storage.ysize());
720 fp = PrepareFilterPipeline(dec_state, image_padded_rect, *input_image,
721 filter_input_padded_rect, frame_dim.ysize_padded,
722 thread, storage_for_if,
723 filter_output_padded_rect);
724 }
725
726 // +----------------------------- STEP 5 ------------------------------+
727 // | Run the prepared pipeline of operations. |
728 // +-------------------------------------------------------------------+
729
730 // y values are relative to rect_for_if.
731 // Automatic mirroring in fp->ApplyFiltersRow() implies that we should ensure
732 // that padding for the first lines of the image is already present before
733 // calling ApplyFiltersRow() with "virtual" rows.
734 // Here we rely on the fact that virtual rows at the beginning of the image
735 // are only present if input_rect.y0() == 0.
736 ssize_t first_ensure_padding_y = ensure_padding_filter_y0;
737 if (frame_rect.y0() == 0) {
738 JXL_DASSERT(ensure_padding_filter_y0 == 0);
739 first_ensure_padding_y =
740 std::min<ssize_t>(lf.Padding(), ensure_padding_filter_y1);
741 for (ssize_t y = 0; y < first_ensure_padding_y; y++) {
742 ensure_padding_filter.Process3(y);
743 }
744 }
745
746 for (ssize_t y = -lf.Padding();
747 y < static_cast<ssize_t>(lf.Padding() + rect_for_if.ysize()); y++) {
748 if (fp) {
749 if (y >= first_ensure_padding_y && y < ensure_padding_filter_y1) {
750 ensure_padding_filter.Process3(y);
751 }
752 fp->ApplyFiltersRow(lf, dec_state->filter_weights, image_padded_rect, y);
753 } else if (output_pixel_data_storage != input_image) {
754 for (size_t c = 0; c < 3; c++) {
755 memcpy(rect_for_if_storage.PlaneRow(storage_for_if, c, y),
756 rect_for_if_input.ConstPlaneRow(*input_image, c, y),
757 rect_for_if_input.xsize() * sizeof(float));
758 }
759 }
760 if (y < static_cast<ssize_t>(lf.Padding())) continue;
761 // At this point, row `y - lf.Padding()` of `rect_for_if` has been produced
762 // by the filters.
763 ssize_t available_y = y - lf.Padding();
764 if (frame_header.upsampling == 1) {
765 for (size_t i = 0; i < extra_channels.size(); i++) {
766 ec_ptrs_for_patches[i] = extra_channels_for_patches[i].second.Row(
767 extra_channels_for_patches[i].first, available_y);
768 }
769 }
770 JXL_RETURN_IF_ERROR(image_features.patches.AddTo(
771 storage_for_if, rect_for_if_storage.Line(available_y),
772 ec_ptrs_for_patches.data(), rect_for_if.Line(available_y)));
773 JXL_RETURN_IF_ERROR(image_features.splines.AddTo(
774 storage_for_if, rect_for_if_storage.Line(available_y),
775 rect_for_if.Line(available_y), dec_state->shared->cmap));
776 size_t num_ys = 1;
777 if (frame_header.upsampling != 1) {
778 // Upsampling `y` values are relative to `rect_for_upsampling`, not to
779 // `rect_for_if`.
780 ssize_t shifted_y = available_y - extra_rows_t;
781 if (shifted_y >= ensure_padding_upsampling_y0 &&
782 shifted_y < ensure_padding_upsampling_y1) {
783 ensure_padding_upsampling.Process3(shifted_y);
784 }
785 if (late_ec_upsample && shifted_y >= ensure_padding_upsampling_ec_y0 &&
786 shifted_y < ensure_padding_upsampling_ec_y1) {
787 for (size_t ec = 0; ec < extra_channels.size(); ec++) {
788 ec_padding[ec].Process(shifted_y);
789 }
790 }
791 // Upsampling will access two rows of border, so the first upsampling
792 // output will be available after shifted_y is at least 2, *unless* image
793 // height is <= 2.
794 if (shifted_y < 2 &&
795 shifted_y + 1 != static_cast<ssize_t>(frame_dim.ysize_padded)) {
796 continue;
797 }
798 // Value relative to upsampled_frame_rect.
799 size_t input_y = std::max<ssize_t>(shifted_y - 2, 0);
800 size_t upsampled_available_y = frame_header.upsampling * input_y;
801 size_t num_input_rows = 1;
802 // If we are going to mirror the last output rows, then we already have 3
803 // input lines ready. This happens iff we did not extend rect_for_if on
804 // the bottom *and* we are at the last `y` value.
805 if (extra_rows_b != 2 &&
806 static_cast<size_t>(y) + 1 == lf.Padding() + rect_for_if.ysize()) {
807 num_input_rows = 3;
808 }
809 num_input_rows = std::min(num_input_rows, frame_dim.ysize_padded);
810 num_ys = num_input_rows * frame_header.upsampling;
811
812 if (static_cast<size_t>(upsampled_available_y) >=
813 upsampled_frame_rect.ysize()) {
814 continue;
815 }
816
817 if (upsampled_available_y + num_ys >= upsampled_frame_rect.ysize()) {
818 num_ys = upsampled_frame_rect.ysize() - upsampled_available_y;
819 }
820
821 Rect upsample_input_rect = rect_for_upsampling.Lines(input_y, 1);
822 color_upsampler->UpsampleRect(
823 *storage_for_if, upsample_input_rect, output_pixel_data_storage,
824 upsampled_frame_rect_for_storage.Lines(upsampled_available_y, num_ys),
825 static_cast<ssize_t>(frame_rect.y0()) -
826 static_cast<ssize_t>(rect_for_upsampling.y0()),
827 frame_dim.ysize_padded);
828 if (late_ec_upsample) {
829 for (size_t ec = 0; ec < extra_channels.size(); ec++) {
830 color_upsampler->UpsampleRect(
831 *extra_channels[ec].first,
832 extra_channels[ec].second.Lines(input_y, num_input_rows),
833 &output_image->extra_channels()[ec],
834 upsampled_frame_rect.Lines(upsampled_available_y, num_ys),
835 static_cast<ssize_t>(frame_rect.y0()) -
836 static_cast<ssize_t>(extra_channels[ec].second.y0()),
837 frame_dim.ysize);
838 }
839 }
840 available_y = upsampled_available_y;
841 }
842
843 if (static_cast<size_t>(available_y) >= upsampled_frame_rect.ysize()) {
844 continue;
845 }
846
847 // The image data is now unconditionally in
848 // `output_image_storage:upsampled_frame_rect_for_storage`.
849 if (frame_header.flags & FrameHeader::kNoise) {
850 PROFILER_ZONE("AddNoise");
851 AddNoise(image_features.noise_params,
852 upsampled_frame_rect.Lines(available_y, num_ys),
853 dec_state->noise,
854 upsampled_frame_rect_for_storage.Lines(available_y, num_ys),
855 dec_state->shared_storage.cmap, output_pixel_data_storage);
856 }
857
858 if (dec_state->pre_color_transform_frame.xsize() != 0) {
859 for (size_t c = 0; c < 3; c++) {
860 for (size_t y = available_y; y < available_y + num_ys; y++) {
861 float* JXL_RESTRICT row_out = upsampled_frame_rect.PlaneRow(
862 &dec_state->pre_color_transform_frame, c, y);
863 const float* JXL_RESTRICT row_in =
864 upsampled_frame_rect_for_storage.ConstPlaneRow(
865 *output_pixel_data_storage, c, y);
866 memcpy(row_out, row_in,
867 upsampled_frame_rect.xsize() * sizeof(*row_in));
868 }
869 }
870 }
871
872 // We skip the color transform entirely if save_before_color_transform and
873 // the frame is not supposed to be displayed.
874
875 if (dec_state->fast_xyb_srgb8_conversion) {
876 FastXYBTosRGB8(
877 *output_pixel_data_storage,
878 upsampled_frame_rect_for_storage.Lines(available_y, num_ys),
879 upsampled_frame_rect.Lines(available_y, num_ys)
880 .Crop(Rect(0, 0, frame_dim.xsize, frame_dim.ysize)),
881 alpha, alpha_rect.Lines(available_y, num_ys),
882 dec_state->rgb_output_is_rgba, dec_state->rgb_output, frame_dim.xsize,
883 dec_state->rgb_stride);
884 } else {
885 if (frame_header.needs_color_transform()) {
886 if (frame_header.color_transform == ColorTransform::kXYB) {
887 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(UndoXYBInPlace)(
888 output_pixel_data_storage,
889 upsampled_frame_rect_for_storage.Lines(available_y, num_ys),
890 dec_state->output_encoding_info));
891 } else if (frame_header.color_transform == ColorTransform::kYCbCr) {
892 YcbcrToRgb(
893 *output_pixel_data_storage, output_pixel_data_storage,
894 upsampled_frame_rect_for_storage.Lines(available_y, num_ys));
895 }
896 }
897
898 // TODO(veluca): all blending should happen here.
899
900 if (dec_state->rgb_output != nullptr) {
901 HWY_DYNAMIC_DISPATCH(FloatToRGBA8)
902 (*output_pixel_data_storage,
903 upsampled_frame_rect_for_storage.Lines(available_y, num_ys),
904 dec_state->rgb_output_is_rgba, alpha,
905 alpha_rect.Lines(available_y, num_ys),
906 upsampled_frame_rect.Lines(available_y, num_ys)
907 .Crop(Rect(0, 0, frame_dim.xsize, frame_dim.ysize)),
908 dec_state->rgb_output, dec_state->rgb_stride);
909 }
910 if (dec_state->pixel_callback != nullptr) {
911 Rect alpha_line_rect = alpha_rect.Lines(available_y, num_ys);
912 Rect color_input_line_rect =
913 upsampled_frame_rect_for_storage.Lines(available_y, num_ys);
914 Rect image_line_rect =
915 upsampled_frame_rect.Lines(available_y, num_ys)
916 .Crop(Rect(0, 0, frame_dim.xsize, frame_dim.ysize));
917 const float* line_buffers[4];
918 for (size_t iy = 0; iy < image_line_rect.ysize(); iy++) {
919 for (size_t c = 0; c < 3; c++) {
920 line_buffers[c] = color_input_line_rect.ConstPlaneRow(
921 *output_pixel_data_storage, c, iy);
922 }
923 if (alpha) {
924 line_buffers[3] = alpha_line_rect.ConstRow(*alpha, iy);
925 } else {
926 line_buffers[3] = dec_state->opaque_alpha.data();
927 }
928 std::vector<float>& interleaved =
929 dec_state->pixel_callback_rows[thread];
930 size_t j = 0;
931 for (size_t i = 0; i < image_line_rect.xsize(); i++) {
932 interleaved[j++] = line_buffers[0][i];
933 interleaved[j++] = line_buffers[1][i];
934 interleaved[j++] = line_buffers[2][i];
935 if (dec_state->rgb_output_is_rgba) {
936 interleaved[j++] = line_buffers[3][i];
937 }
938 }
939 dec_state->pixel_callback(interleaved.data(), image_line_rect.x0(),
940 image_line_rect.y0() + iy,
941 image_line_rect.xsize());
942 }
943 }
944 }
945 }
946
947 return true;
948 }
949
FinalizeFrameDecoding(ImageBundle * decoded,PassesDecoderState * dec_state,ThreadPool * pool,bool force_fir,bool skip_blending)950 Status FinalizeFrameDecoding(ImageBundle* decoded,
951 PassesDecoderState* dec_state, ThreadPool* pool,
952 bool force_fir, bool skip_blending) {
953 const LoopFilter& lf = dec_state->shared->frame_header.loop_filter;
954 const FrameHeader& frame_header = dec_state->shared->frame_header;
955 const FrameDimensions& frame_dim = dec_state->shared->frame_dim;
956
957 const Image3F* finalize_image_rect_input = &dec_state->decoded;
958 Image3F chroma_upsampled_image;
959 // If we used chroma subsampling, we upsample chroma now and run
960 // ApplyImageFeatures after.
961 // TODO(veluca): this should part of the FinalizeImageRect() pipeline.
962 if (!frame_header.chroma_subsampling.Is444()) {
963 chroma_upsampled_image = CopyImage(dec_state->decoded);
964 finalize_image_rect_input = &chroma_upsampled_image;
965 for (size_t c = 0; c < 3; c++) {
966 ImageF& plane = const_cast<ImageF&>(chroma_upsampled_image.Plane(c));
967 plane.ShrinkTo(DivCeil(frame_dim.xsize_padded,
968 1 << frame_header.chroma_subsampling.HShift(c)),
969 DivCeil(frame_dim.ysize_padded,
970 1 << frame_header.chroma_subsampling.VShift(c)));
971 for (size_t i = 0; i < frame_header.chroma_subsampling.HShift(c); i++) {
972 plane.InitializePaddingForUnalignedAccesses();
973 const size_t output_xsize =
974 DivCeil(frame_dim.xsize_padded,
975 1 << (frame_header.chroma_subsampling.HShift(c) - i - 1));
976 plane = UpsampleH2(plane, output_xsize, pool);
977 }
978 for (size_t i = 0; i < frame_header.chroma_subsampling.VShift(c); i++) {
979 plane.InitializePaddingForUnalignedAccesses();
980 plane = UpsampleV2(plane, pool);
981 }
982 plane.ShrinkTo(frame_dim.xsize_padded, frame_dim.ysize_padded);
983 JXL_DASSERT(plane.PixelsPerRow() == dec_state->decoded.PixelsPerRow());
984 }
985 }
986 // FinalizeImageRect was not yet run, or we are forcing a run.
987 if (!dec_state->EagerFinalizeImageRect() || force_fir) {
988 if (lf.epf_iters > 0 && frame_header.encoding == FrameEncoding::kModular) {
989 FillImage(kInvSigmaNum / lf.epf_sigma_for_modular,
990 &dec_state->filter_weights.sigma);
991 }
992 std::vector<Rect> rects_to_process;
993 for (size_t y = 0; y < frame_dim.ysize_padded; y += kGroupDim) {
994 for (size_t x = 0; x < frame_dim.xsize_padded; x += kGroupDim) {
995 Rect rect(x, y, kGroupDim, kGroupDim, frame_dim.xsize_padded,
996 frame_dim.ysize_padded);
997 if (rect.xsize() == 0 || rect.ysize() == 0) continue;
998 rects_to_process.push_back(rect);
999 }
1000 }
1001 const auto allocate_storage = [&](size_t num_threads) {
1002 dec_state->EnsureStorage(num_threads);
1003 return true;
1004 };
1005
1006 {
1007 std::vector<ImageF> ecs;
1008 const ImageMetadata& metadata = frame_header.nonserialized_metadata->m;
1009 for (size_t i = 0; i < metadata.num_extra_channels; i++) {
1010 if (frame_header.extra_channel_upsampling[i] == 1) {
1011 ecs.push_back(std::move(dec_state->extra_channels[i]));
1012 } else {
1013 ecs.emplace_back(frame_dim.xsize_upsampled_padded,
1014 frame_dim.ysize_upsampled_padded);
1015 }
1016 }
1017 decoded->SetExtraChannels(std::move(ecs));
1018 }
1019
1020 std::atomic<bool> apply_features_ok{true};
1021 auto run_apply_features = [&](size_t rect_id, size_t thread) {
1022 size_t xstart = kBlockDim + dec_state->group_border_assigner.PaddingX(
1023 dec_state->FinalizeRectPadding());
1024 size_t ystart = dec_state->FinalizeRectPadding();
1025 Rect group_data_rect(xstart, ystart, rects_to_process[rect_id].xsize(),
1026 rects_to_process[rect_id].ysize());
1027 CopyImageToWithPadding(rects_to_process[rect_id],
1028 *finalize_image_rect_input,
1029 dec_state->FinalizeRectPadding(), group_data_rect,
1030 &dec_state->group_data[thread]);
1031 std::vector<std::pair<ImageF*, Rect>> ec_rects;
1032 ec_rects.reserve(decoded->extra_channels().size());
1033 for (size_t i = 0; i < decoded->extra_channels().size(); i++) {
1034 Rect r = ScaleRectForEC(rects_to_process[rect_id], frame_header, i);
1035 if (frame_header.extra_channel_upsampling[i] != 1) {
1036 Rect ec_input_rect(kBlockDim, 2, r.xsize(), r.ysize());
1037 auto eti =
1038 &dec_state
1039 ->ec_temp_images[thread * decoded->extra_channels().size() +
1040 i];
1041 CopyImageToWithPadding(r, dec_state->extra_channels[i],
1042 /*padding=*/2, ec_input_rect, eti);
1043 ec_rects.emplace_back(eti, ec_input_rect);
1044 } else {
1045 ec_rects.emplace_back(&decoded->extra_channels()[i], r);
1046 }
1047 }
1048 if (!FinalizeImageRect(&dec_state->group_data[thread], group_data_rect,
1049 ec_rects, dec_state, thread, decoded,
1050 rects_to_process[rect_id])) {
1051 apply_features_ok = false;
1052 }
1053 };
1054
1055 RunOnPool(pool, 0, rects_to_process.size(), allocate_storage,
1056 run_apply_features, "ApplyFeatures");
1057
1058 if (!apply_features_ok) {
1059 return JXL_FAILURE("FinalizeImageRect failed");
1060 }
1061 }
1062
1063 const size_t xsize = frame_dim.xsize_upsampled;
1064 const size_t ysize = frame_dim.ysize_upsampled;
1065
1066 decoded->ShrinkTo(xsize, ysize);
1067 if (dec_state->pre_color_transform_frame.xsize() != 0) {
1068 dec_state->pre_color_transform_frame.ShrinkTo(xsize, ysize);
1069 }
1070
1071 if (!skip_blending && ImageBlender::NeedsBlending(dec_state)) {
1072 if (dec_state->pre_color_transform_frame.xsize() != 0) {
1073 // Extra channels are going to be modified. Make a copy.
1074 dec_state->pre_color_transform_ec.clear();
1075 for (const auto& ec : decoded->extra_channels()) {
1076 dec_state->pre_color_transform_ec.emplace_back(CopyImage(ec));
1077 }
1078 }
1079 ImageBlender blender;
1080 ImageBundle foreground = std::move(*decoded);
1081 JXL_RETURN_IF_ERROR(blender.PrepareBlending(
1082 dec_state, foreground.origin, foreground.xsize(), foreground.ysize(),
1083 foreground.c_current(), /*output=*/decoded));
1084
1085 std::vector<Rect> rects_to_process;
1086 for (size_t y = 0; y < frame_dim.ysize; y += kGroupDim) {
1087 for (size_t x = 0; x < frame_dim.xsize; x += kGroupDim) {
1088 Rect rect(x, y, kGroupDim, kGroupDim, frame_dim.xsize, frame_dim.ysize);
1089 if (rect.xsize() == 0 || rect.ysize() == 0) continue;
1090 rects_to_process.push_back(rect);
1091 }
1092 }
1093
1094 std::atomic<bool> blending_ok{true};
1095 JXL_RETURN_IF_ERROR(RunOnPool(
1096 pool, 0, rects_to_process.size(), ThreadPool::SkipInit(),
1097 [&](size_t i, size_t /*thread*/) {
1098 const Rect& rect = rects_to_process[i];
1099 auto rect_blender = blender.PrepareRect(
1100 rect, *foreground.color(), foreground.extra_channels(), rect);
1101 for (size_t y = 0; y < rect.ysize(); ++y) {
1102 if (!rect_blender.DoBlending(y)) {
1103 blending_ok = false;
1104 return;
1105 }
1106 }
1107 },
1108 "Blend"));
1109 JXL_RETURN_IF_ERROR(blending_ok.load());
1110 }
1111
1112 return true;
1113 }
1114
1115 } // namespace jxl
1116 #endif // HWY_ONCE
1117