1 #include "Halide.h"
2 #include "halide_trace_config.h"
3 #include <stdint.h>
4 
5 namespace {
6 
7 using std::vector;
8 
9 using namespace Halide;
10 using namespace Halide::ConciseCasts;
11 
12 // Shared variables
13 Var x, y, c, yi, yo, yii, xi;
14 
15 // Average two positive values rounding up
avg(Expr a,Expr b)16 Expr avg(Expr a, Expr b) {
17     Type wider = a.type().with_bits(a.type().bits() * 2);
18     return cast(a.type(), (cast(wider, a) + b + 1) / 2);
19 }
20 
blur121(Expr a,Expr b,Expr c)21 Expr blur121(Expr a, Expr b, Expr c) {
22     return avg(avg(a, c), b);
23 }
24 
interleave_x(Func a,Func b)25 Func interleave_x(Func a, Func b) {
26     Func out;
27     out(x, y) = select((x % 2) == 0, a(x / 2, y), b(x / 2, y));
28     return out;
29 }
30 
interleave_y(Func a,Func b)31 Func interleave_y(Func a, Func b) {
32     Func out;
33     out(x, y) = select((y % 2) == 0, a(x, y / 2), b(x, y / 2));
34     return out;
35 }
36 
37 class Demosaic : public Halide::Generator<Demosaic> {
38 public:
39     GeneratorParam<LoopLevel> intermed_compute_at{"intermed_compute_at", LoopLevel::inlined()};
40     GeneratorParam<LoopLevel> intermed_store_at{"intermed_store_at", LoopLevel::inlined()};
41     GeneratorParam<LoopLevel> output_compute_at{"output_compute_at", LoopLevel::inlined()};
42 
43     // Inputs and outputs
44     Input<Func> deinterleaved{"deinterleaved", Int(16), 3};
45     Output<Func> output{"output", Int(16), 3};
46 
47     // Defines outputs using inputs
generate()48     void generate() {
49         // These are the values we already know from the input
50         // x_y = the value of channel x at a site in the input of channel y
51         // gb refers to green sites in the blue rows
52         // gr refers to green sites in the red rows
53 
54         // Give more convenient names to the four channels we know
55         Func r_r, g_gr, g_gb, b_b;
56 
57         g_gr(x, y) = deinterleaved(x, y, 0);
58         r_r(x, y) = deinterleaved(x, y, 1);
59         b_b(x, y) = deinterleaved(x, y, 2);
60         g_gb(x, y) = deinterleaved(x, y, 3);
61 
62         // These are the ones we need to interpolate
63         Func b_r, g_r, b_gr, r_gr, b_gb, r_gb, r_b, g_b;
64 
65         // First calculate green at the red and blue sites
66 
67         // Try interpolating vertically and horizontally. Also compute
68         // differences vertically and horizontally. Use interpolation in
69         // whichever direction had the smallest difference.
70         Expr gv_r = avg(g_gb(x, y - 1), g_gb(x, y));
71         Expr gvd_r = absd(g_gb(x, y - 1), g_gb(x, y));
72         Expr gh_r = avg(g_gr(x + 1, y), g_gr(x, y));
73         Expr ghd_r = absd(g_gr(x + 1, y), g_gr(x, y));
74 
75         g_r(x, y) = select(ghd_r < gvd_r, gh_r, gv_r);
76 
77         Expr gv_b = avg(g_gr(x, y + 1), g_gr(x, y));
78         Expr gvd_b = absd(g_gr(x, y + 1), g_gr(x, y));
79         Expr gh_b = avg(g_gb(x - 1, y), g_gb(x, y));
80         Expr ghd_b = absd(g_gb(x - 1, y), g_gb(x, y));
81 
82         g_b(x, y) = select(ghd_b < gvd_b, gh_b, gv_b);
83 
84         // Next interpolate red at gr by first interpolating, then
85         // correcting using the error green would have had if we had
86         // interpolated it in the same way (i.e. add the second derivative
87         // of the green channel at the same place).
88         Expr correction;
89         correction = g_gr(x, y) - avg(g_r(x, y), g_r(x - 1, y));
90         r_gr(x, y) = correction + avg(r_r(x - 1, y), r_r(x, y));
91 
92         // Do the same for other reds and blues at green sites
93         correction = g_gr(x, y) - avg(g_b(x, y), g_b(x, y - 1));
94         b_gr(x, y) = correction + avg(b_b(x, y), b_b(x, y - 1));
95 
96         correction = g_gb(x, y) - avg(g_r(x, y), g_r(x, y + 1));
97         r_gb(x, y) = correction + avg(r_r(x, y), r_r(x, y + 1));
98 
99         correction = g_gb(x, y) - avg(g_b(x, y), g_b(x + 1, y));
100         b_gb(x, y) = correction + avg(b_b(x, y), b_b(x + 1, y));
101 
102         // Now interpolate diagonally to get red at blue and blue at
103         // red. Hold onto your hats; this gets really fancy. We do the
104         // same thing as for interpolating green where we try both
105         // directions (in this case the positive and negative diagonals),
106         // and use the one with the lowest absolute difference. But we
107         // also use the same trick as interpolating red and blue at green
108         // sites - we correct our interpolations using the second
109         // derivative of green at the same sites.
110 
111         correction = g_b(x, y) - avg(g_r(x, y), g_r(x - 1, y + 1));
112         Expr rp_b = correction + avg(r_r(x, y), r_r(x - 1, y + 1));
113         Expr rpd_b = absd(r_r(x, y), r_r(x - 1, y + 1));
114 
115         correction = g_b(x, y) - avg(g_r(x - 1, y), g_r(x, y + 1));
116         Expr rn_b = correction + avg(r_r(x - 1, y), r_r(x, y + 1));
117         Expr rnd_b = absd(r_r(x - 1, y), r_r(x, y + 1));
118 
119         r_b(x, y) = select(rpd_b < rnd_b, rp_b, rn_b);
120 
121         // Same thing for blue at red
122         correction = g_r(x, y) - avg(g_b(x, y), g_b(x + 1, y - 1));
123         Expr bp_r = correction + avg(b_b(x, y), b_b(x + 1, y - 1));
124         Expr bpd_r = absd(b_b(x, y), b_b(x + 1, y - 1));
125 
126         correction = g_r(x, y) - avg(g_b(x + 1, y), g_b(x, y - 1));
127         Expr bn_r = correction + avg(b_b(x + 1, y), b_b(x, y - 1));
128         Expr bnd_r = absd(b_b(x + 1, y), b_b(x, y - 1));
129 
130         b_r(x, y) = select(bpd_r < bnd_r, bp_r, bn_r);
131 
132         // Resulting color channels
133         Func r, g, b;
134 
135         // Interleave the resulting channels
136         r = interleave_y(interleave_x(r_gr, r_r),
137                          interleave_x(r_b, r_gb));
138         g = interleave_y(interleave_x(g_gr, g_r),
139                          interleave_x(g_b, g_gb));
140         b = interleave_y(interleave_x(b_gr, b_r),
141                          interleave_x(b_b, b_gb));
142 
143         output(x, y, c) = mux(c, {r(x, y), g(x, y), b(x, y)});
144 
145         // These are the stencil stages we want to schedule
146         // separately. Everything else we'll just inline.
147         intermediates.push_back(g_r);
148         intermediates.push_back(g_b);
149     }
150 
schedule()151     void schedule() {
152         Pipeline p(output);
153 
154         if (auto_schedule) {
155             // blank
156         } else if (get_target().has_gpu_feature()) {
157             Var xi, yi;
158             for (Func f : intermediates) {
159                 f.compute_at(intermed_compute_at).gpu_threads(x, y);
160             }
161             output.compute_at(output_compute_at)
162                 .unroll(x, 2)
163                 .gpu_threads(x, y)
164                 .reorder(c, x, y)
165                 .unroll(c);
166         } else {
167             int vec = get_target().natural_vector_size(UInt(16));
168             bool use_hexagon = get_target().features_any_of({Target::HVX_64, Target::HVX_128});
169             if (get_target().has_feature(Target::HVX_64)) {
170                 vec = 32;
171             } else if (get_target().has_feature(Target::HVX_128)) {
172                 vec = 64;
173             }
174             for (Func f : intermediates) {
175                 f.compute_at(intermed_compute_at)
176                     .store_at(intermed_store_at)
177                     .vectorize(x, 2 * vec, TailStrategy::RoundUp)
178                     .fold_storage(y, 4);
179             }
180             intermediates[1].compute_with(
181                 intermediates[0], x,
182                 {{x, LoopAlignStrategy::AlignStart}, {y, LoopAlignStrategy::AlignStart}});
183             output.compute_at(output_compute_at)
184                 .vectorize(x)
185                 .unroll(y)
186                 .reorder(c, x, y)
187                 .unroll(c);
188             if (use_hexagon) {
189                 output.hexagon();
190                 for (Func f : intermediates) {
191                     f.align_storage(x, vec);
192                 }
193             }
194         }
195 
196         /* Optional tags to specify layout for HalideTraceViz */
197         Halide::Trace::FuncConfig cfg;
198         cfg.pos = {860, 340 - 220};
199         cfg.max = 1024;
200         for (Func f : intermediates) {
201             std::string label = f.name();
202             std::replace(label.begin(), label.end(), '_', '@');
203             cfg.pos.y += 220;
204             cfg.labels = {{label}};
205             f.add_trace_tag(cfg.to_trace_tag());
206         }
207     }
208 
209 private:
210     // Intermediate stencil stages to schedule
211     vector<Func> intermediates;
212 };
213 
214 class CameraPipe : public Halide::Generator<CameraPipe> {
215 public:
216     // Parameterized output type, because LLVM PTX (GPU) backend does not
217     // currently allow 8-bit computations
218     GeneratorParam<Type> result_type{"result_type", UInt(8)};
219 
220     Input<Buffer<uint16_t>> input{"input", 2};
221     Input<Buffer<float>> matrix_3200{"matrix_3200", 2};
222     Input<Buffer<float>> matrix_7000{"matrix_7000", 2};
223     Input<float> color_temp{"color_temp"};
224     Input<float> gamma{"gamma"};
225     Input<float> contrast{"contrast"};
226     Input<float> sharpen_strength{"sharpen_strength"};
227     Input<int> blackLevel{"blackLevel"};
228     Input<int> whiteLevel{"whiteLevel"};
229 
230     Output<Buffer<uint8_t>> processed{"processed", 3};
231 
232     void generate();
233 
234 private:
235     Func hot_pixel_suppression(Func input);
236     Func deinterleave(Func raw);
237     Func apply_curve(Func input);
238     Func color_correct(Func input);
239     Func sharpen(Func input);
240 };
241 
hot_pixel_suppression(Func input)242 Func CameraPipe::hot_pixel_suppression(Func input) {
243 
244     Expr a = max(input(x - 2, y), input(x + 2, y),
245                  input(x, y - 2), input(x, y + 2));
246 
247     Func denoised;
248     denoised(x, y) = clamp(input(x, y), 0, a);
249 
250     return denoised;
251 }
252 
deinterleave(Func raw)253 Func CameraPipe::deinterleave(Func raw) {
254     // Deinterleave the color channels
255     Func deinterleaved("deinterleaved");
256 
257     deinterleaved(x, y, c) = mux(c,
258                                  {raw(2 * x, 2 * y),
259                                   raw(2 * x + 1, 2 * y),
260                                   raw(2 * x, 2 * y + 1),
261                                   raw(2 * x + 1, 2 * y + 1)});
262     return deinterleaved;
263 }
264 
color_correct(Func input)265 Func CameraPipe::color_correct(Func input) {
266     // Get a color matrix by linearly interpolating between two
267     // calibrated matrices using inverse kelvin.
268     Expr kelvin = color_temp;
269 
270     Func matrix;
271     Expr alpha = (1.0f / kelvin - 1.0f / 3200) / (1.0f / 7000 - 1.0f / 3200);
272     Expr val = (matrix_3200(x, y) * alpha + matrix_7000(x, y) * (1 - alpha));
273     matrix(x, y) = cast<int16_t>(val * 256.0f);  // Q8.8 fixed point
274 
275     if (!auto_schedule) {
276         matrix.compute_root();
277         if (get_target().has_gpu_feature()) {
278             matrix.gpu_single_thread();
279         }
280     }
281 
282     Func corrected;
283     Expr ir = cast<int32_t>(input(x, y, 0));
284     Expr ig = cast<int32_t>(input(x, y, 1));
285     Expr ib = cast<int32_t>(input(x, y, 2));
286 
287     Expr r = matrix(3, 0) + matrix(0, 0) * ir + matrix(1, 0) * ig + matrix(2, 0) * ib;
288     Expr g = matrix(3, 1) + matrix(0, 1) * ir + matrix(1, 1) * ig + matrix(2, 1) * ib;
289     Expr b = matrix(3, 2) + matrix(0, 2) * ir + matrix(1, 2) * ig + matrix(2, 2) * ib;
290 
291     r = cast<int16_t>(r / 256);
292     g = cast<int16_t>(g / 256);
293     b = cast<int16_t>(b / 256);
294     corrected(x, y, c) = mux(c, {r, g, b});
295 
296     return corrected;
297 }
298 
apply_curve(Func input)299 Func CameraPipe::apply_curve(Func input) {
300     // copied from FCam
301     Func curve("curve");
302 
303     Expr minRaw = 0 + blackLevel;
304     Expr maxRaw = whiteLevel;
305 
306     // How much to upsample the LUT by when sampling it.
307     int lutResample = 1;
308     if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
309         // On HVX, LUT lookups are much faster if they are to LUTs not
310         // greater than 256 elements, so we reduce the tonemap to 256
311         // elements and use linear interpolation to upsample it.
312         lutResample = 8;
313     }
314 
315     minRaw /= lutResample;
316     maxRaw /= lutResample;
317 
318     Expr invRange = 1.0f / (maxRaw - minRaw);
319     Expr b = 2.0f - pow(2.0f, contrast / 100.0f);
320     Expr a = 2.0f - 2.0f * b;
321 
322     // Get a linear luminance in the range 0-1
323     Expr xf = clamp(cast<float>(x - minRaw) * invRange, 0.0f, 1.0f);
324     // Gamma correct it
325     Expr g = pow(xf, 1.0f / gamma);
326     // Apply a piecewise quadratic contrast curve
327     Expr z = select(g > 0.5f,
328                     1.0f - (a * (1.0f - g) * (1.0f - g) + b * (1.0f - g)),
329                     a * g * g + b * g);
330 
331     // Convert to 8 bit and save
332     Expr val = cast(result_type, clamp(z * 255.0f + 0.5f, 0.0f, 255.0f));
333     // makeLUT add guard band outside of (minRaw, maxRaw]:
334     curve(x) = select(x <= minRaw, 0, select(x > maxRaw, 255, val));
335 
336     if (!auto_schedule) {
337         // It's a LUT, compute it once ahead of time.
338         curve.compute_root();
339         if (get_target().has_gpu_feature()) {
340             Var xi;
341             curve.gpu_tile(x, xi, 32);
342         }
343     }
344 
345     /* Optional tags to specify layout for HalideTraceViz */
346     {
347         Halide::Trace::FuncConfig cfg;
348         cfg.labels = {{"tone curve"}};
349         cfg.pos = {580, 1000};
350         curve.add_trace_tag(cfg.to_trace_tag());
351     }
352 
353     Func curved;
354 
355     if (lutResample == 1) {
356         // Use clamp to restrict size of LUT as allocated by compute_root
357         curved(x, y, c) = curve(clamp(input(x, y, c), 0, 1023));
358     } else {
359         // Use linear interpolation to sample the LUT.
360         Expr in = input(x, y, c);
361         Expr u0 = in / lutResample;
362         Expr u = in % lutResample;
363         Expr y0 = curve(clamp(u0, 0, 127));
364         Expr y1 = curve(clamp(u0 + 1, 0, 127));
365         curved(x, y, c) = cast<uint8_t>((cast<uint16_t>(y0) * lutResample + (y1 - y0) * u) / lutResample);
366     }
367 
368     return curved;
369 }
370 
sharpen(Func input)371 Func CameraPipe::sharpen(Func input) {
372     // Convert the sharpening strength to 2.5 fixed point. This allows sharpening in the range [0, 4].
373     Func sharpen_strength_x32("sharpen_strength_x32");
374     sharpen_strength_x32() = u8_sat(sharpen_strength * 32);
375     if (!auto_schedule) {
376         sharpen_strength_x32.compute_root();
377         if (get_target().has_gpu_feature()) {
378             sharpen_strength_x32.gpu_single_thread();
379         }
380     }
381 
382     /* Optional tags to specify layout for HalideTraceViz */
383     {
384         Halide::Trace::FuncConfig cfg;
385         cfg.labels = {{"sharpen strength"}};
386         cfg.pos = {10, 1000};
387         sharpen_strength_x32.add_trace_tag(cfg.to_trace_tag());
388     }
389 
390     // Make an unsharp mask by blurring in y, then in x.
391     Func unsharp_y("unsharp_y");
392     unsharp_y(x, y, c) = blur121(input(x, y - 1, c), input(x, y, c), input(x, y + 1, c));
393 
394     Func unsharp("unsharp");
395     unsharp(x, y, c) = blur121(unsharp_y(x - 1, y, c), unsharp_y(x, y, c), unsharp_y(x + 1, y, c));
396 
397     Func mask("mask");
398     mask(x, y, c) = cast<int16_t>(input(x, y, c)) - cast<int16_t>(unsharp(x, y, c));
399 
400     // Weight the mask with the sharpening strength, and add it to the
401     // input to get the sharpened result.
402     Func sharpened("sharpened");
403     sharpened(x, y, c) = u8_sat(input(x, y, c) + (mask(x, y, c) * sharpen_strength_x32()) / 32);
404 
405     return sharpened;
406 }
407 
generate()408 void CameraPipe::generate() {
409     // shift things inwards to give us enough padding on the
410     // boundaries so that we don't need to check bounds. We're going
411     // to make a 2560x1920 output image, just like the FCam pipe, so
412     // shift by 16, 12. We also convert it to be signed, so we can deal
413     // with values that fall below 0 during processing.
414     Func shifted;
415     shifted(x, y) = cast<int16_t>(input(x + 16, y + 12));
416 
417     Func denoised = hot_pixel_suppression(shifted);
418 
419     Func deinterleaved = deinterleave(denoised);
420 
421     auto demosaiced = create<Demosaic>();
422     demosaiced->apply(deinterleaved);
423 
424     Func corrected = color_correct(demosaiced->output);
425 
426     Func curved = apply_curve(corrected);
427 
428     processed(x, y, c) = sharpen(curved)(x, y, c);
429 
430     /* ESTIMATES */
431     // (This can be useful in conjunction with RunGen and benchmarks as well
432     // as auto-schedule, so we do it in all cases.)
433     input.set_estimates({{0, 2592}, {0, 1968}});
434     matrix_3200.set_estimates({{0, 4}, {0, 3}});
435     matrix_7000.set_estimates({{0, 4}, {0, 3}});
436     color_temp.set_estimate(3700);
437     gamma.set_estimate(2.0);
438     contrast.set_estimate(50);
439     sharpen_strength.set_estimate(1.0);
440     blackLevel.set_estimate(25);
441     whiteLevel.set_estimate(1023);
442     processed.set_estimates({{0, 2592}, {0, 1968}, {0, 3}});
443 
444     // Schedule
445     if (auto_schedule) {
446         // nothing
447     } else if (get_target().has_gpu_feature()) {
448 
449         // We can generate slightly better code if we know the output is even-sized
450         if (!auto_schedule) {
451             // TODO: The autoscheduler really ought to be able to
452             // accommodate bounds on the output Func.
453             Expr out_width = processed.width();
454             Expr out_height = processed.height();
455             processed.bound(c, 0, 3)
456                 .bound(x, 0, (out_width / 2) * 2)
457                 .bound(y, 0, (out_height / 2) * 2);
458         }
459 
460         Var xi, yi, xii, xio;
461 
462         /* These tile factors obtain 1391us on a gtx 980. */
463         int tile_x = 28;
464         int tile_y = 12;
465 
466         if (get_target().has_feature(Target::D3D12Compute)) {
467             // D3D12 SM 5.1 can only utilize a limited amount of
468             // shared memory, so we use a slightly smaller
469             // tile size.
470             tile_x = 20;
471             tile_y = 12;
472         }
473 
474         processed.compute_root()
475             .reorder(c, x, y)
476             .unroll(x, 2)
477             .gpu_tile(x, y, xi, yi, tile_x, tile_y);
478 
479         curved.compute_at(processed, x)
480             .unroll(x, 2)
481             .gpu_threads(x, y);
482 
483         corrected.compute_at(processed, x)
484             .unroll(x, 2)
485             .gpu_threads(x, y);
486 
487         demosaiced->output_compute_at.set({processed, x});
488         demosaiced->intermed_compute_at.set({processed, x});
489 
490         denoised.compute_at(processed, x)
491             .tile(x, y, xi, yi, 2, 2)
492             .unroll(xi)
493             .unroll(yi)
494             .gpu_threads(x, y);
495 
496         deinterleaved.compute_at(processed, x)
497             .unroll(x, 2)
498             .gpu_threads(x, y)
499             .reorder(c, x, y)
500             .unroll(c);
501 
502     } else {
503 
504         Expr out_width = processed.width();
505         Expr out_height = processed.height();
506 
507         // In HVX 128, we need 2 threads to saturate HVX with work,
508         //and in HVX 64 we need 4 threads, and on other devices,
509         // we might need many threads.
510         Expr strip_size;
511         if (get_target().has_feature(Target::HVX_128)) {
512             strip_size = processed.dim(1).extent() / 2;
513         } else if (get_target().has_feature(Target::HVX_64)) {
514             strip_size = processed.dim(1).extent() / 4;
515         } else {
516             strip_size = 32;
517         }
518         strip_size = (strip_size / 2) * 2;
519 
520         int vec = get_target().natural_vector_size(UInt(16));
521         if (get_target().has_feature(Target::HVX_64)) {
522             vec = 32;
523         } else if (get_target().has_feature(Target::HVX_128)) {
524             vec = 64;
525         }
526 
527         processed
528             .compute_root()
529             .reorder(c, x, y)
530             .split(y, yi, yii, 2, TailStrategy::RoundUp)
531             .split(yi, yo, yi, strip_size / 2)
532             .vectorize(x, 2 * vec, TailStrategy::RoundUp)
533             .unroll(c)
534             .parallel(yo);
535 
536         denoised
537             .compute_at(processed, yi)
538             .store_at(processed, yo)
539             .prefetch(input, y, 2)
540             .fold_storage(y, 16)
541             .tile(x, y, x, y, xi, yi, 2 * vec, 2)
542             .vectorize(xi)
543             .unroll(yi);
544 
545         deinterleaved
546             .compute_at(processed, yi)
547             .store_at(processed, yo)
548             .fold_storage(y, 8)
549             .reorder(c, x, y)
550             .vectorize(x, 2 * vec, TailStrategy::RoundUp)
551             .unroll(c);
552 
553         curved
554             .compute_at(processed, yi)
555             .store_at(processed, yo)
556             .reorder(c, x, y)
557             .tile(x, y, x, y, xi, yi, 2 * vec, 2, TailStrategy::RoundUp)
558             .vectorize(xi)
559             .unroll(yi)
560             .unroll(c);
561 
562         corrected
563             .compute_at(curved, x)
564             .reorder(c, x, y)
565             .vectorize(x)
566             .unroll(c);
567 
568         demosaiced->intermed_compute_at.set({processed, yi});
569         demosaiced->intermed_store_at.set({processed, yo});
570         demosaiced->output_compute_at.set({curved, x});
571 
572         if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
573             processed.hexagon();
574             denoised.align_storage(x, vec);
575             deinterleaved.align_storage(x, vec);
576             corrected.align_storage(x, vec);
577         }
578 
579         // We can generate slightly better code if we know the splits divide the extent.
580         processed
581             .bound(c, 0, 3)
582             .bound(x, 0, ((out_width) / (2 * vec)) * (2 * vec))
583             .bound(y, 0, (out_height / strip_size) * strip_size);
584 
585         /* Optional tags to specify layout for HalideTraceViz */
586         {
587             Halide::Trace::FuncConfig cfg;
588             cfg.max = 1024;
589             cfg.pos = {10, 348};
590             cfg.labels = {{"input"}};
591             input.add_trace_tag(cfg.to_trace_tag());
592 
593             cfg.pos = {305, 360};
594             cfg.labels = {{"denoised"}};
595             denoised.add_trace_tag(cfg.to_trace_tag());
596 
597             cfg.pos = {580, 120};
598             const int y_offset = 220;
599             cfg.strides = {{1, 0}, {0, 1}, {0, y_offset}};
600             cfg.labels = {
601                 {"gr", {0, 0 * y_offset}},
602                 {"r", {0, 1 * y_offset}},
603                 {"b", {0, 2 * y_offset}},
604                 {"gb", {0, 3 * y_offset}},
605             };
606             deinterleaved.add_trace_tag(cfg.to_trace_tag());
607 
608             cfg.color_dim = 2;
609             cfg.strides = {{1, 0}, {0, 1}, {0, 0}};
610             cfg.pos = {1140, 360};
611             cfg.labels = {{"demosaiced"}};
612             processed.add_trace_tag(cfg.to_trace_tag());
613 
614             cfg.pos = {1400, 360};
615             cfg.labels = {{"color-corrected"}};
616             corrected.add_trace_tag(cfg.to_trace_tag());
617 
618             cfg.max = 256;
619             cfg.pos = {1660, 360};
620             cfg.labels = {{"gamma-corrected"}};
621             curved.add_trace_tag(cfg.to_trace_tag());
622         }
623     }
624 };
625 
626 }  // namespace
627 
628 HALIDE_REGISTER_GENERATOR(CameraPipe, camera_pipe)
629