1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5 
6 #include <stdint.h>
7 
8 #include <algorithm>
9 #include <vector>
10 
11 #include "lib/jxl/ans_params.h"
12 #include "lib/jxl/aux_out.h"
13 #include "lib/jxl/aux_out_fwd.h"
14 #include "lib/jxl/base/padded_bytes.h"
15 #include "lib/jxl/base/profiler.h"
16 #include "lib/jxl/base/span.h"
17 #include "lib/jxl/coeff_order.h"
18 #include "lib/jxl/coeff_order_fwd.h"
19 #include "lib/jxl/dec_ans.h"
20 #include "lib/jxl/dec_bit_reader.h"
21 #include "lib/jxl/enc_ans.h"
22 #include "lib/jxl/enc_bit_writer.h"
23 #include "lib/jxl/entropy_coder.h"
24 #include "lib/jxl/lehmer_code.h"
25 #include "lib/jxl/modular/encoding/encoding.h"
26 #include "lib/jxl/modular/modular_image.h"
27 
28 namespace jxl {
29 
ComputeUsedOrders(const SpeedTier speed,const AcStrategyImage & ac_strategy,const Rect & rect)30 uint32_t ComputeUsedOrders(const SpeedTier speed,
31                            const AcStrategyImage& ac_strategy,
32                            const Rect& rect) {
33   // Use default orders for small images.
34   if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return 0;
35 
36   // Only uses DCT8 = 0, so bitfield = 1.
37   if (speed == SpeedTier::kFalcon) return 1;
38 
39   uint32_t ret = 0;
40   size_t xsize_blocks = rect.xsize();
41   size_t ysize_blocks = rect.ysize();
42   // TODO(veluca): precompute when doing DCT.
43   for (size_t by = 0; by < ysize_blocks; ++by) {
44     AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
45     for (size_t bx = 0; bx < xsize_blocks; ++bx) {
46       int ord = kStrategyOrder[acs_row[bx].RawStrategy()];
47       // Do not customize coefficient orders for blocks bigger than 32x32.
48       if (ord > 6) {
49         continue;
50       }
51       ret |= 1u << ord;
52     }
53   }
54   return ret;
55 }
56 
ComputeCoeffOrder(SpeedTier speed,const ACImage & acs,const AcStrategyImage & ac_strategy,const FrameDimensions & frame_dim,uint32_t & used_orders,coeff_order_t * JXL_RESTRICT order)57 void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs,
58                        const AcStrategyImage& ac_strategy,
59                        const FrameDimensions& frame_dim, uint32_t& used_orders,
60                        coeff_order_t* JXL_RESTRICT order) {
61   std::vector<int32_t> num_zeros(kCoeffOrderMaxSize);
62   // If compressing at high speed and only using 8x8 DCTs, only consider a
63   // subset of blocks.
64   double block_fraction = 1.0f;
65   // TODO(veluca): figure out why sampling blocks if non-8x8s are used makes
66   // encoding significantly less dense.
67   if (speed >= SpeedTier::kSquirrel && used_orders == 1) {
68     block_fraction = 0.5f;
69   }
70   // No need to compute number of zero coefficients if all orders are the
71   // default.
72   if (used_orders != 0) {
73     uint64_t threshold =
74         (std::numeric_limits<uint64_t>::max() >> 32) * block_fraction;
75     uint64_t s[2] = {0x94D049BB133111EBull, 0xBF58476D1CE4E5B9ull};
76     // Xorshift128+ adapted from xorshift128+-inl.h
77     auto use_sample = [&]() {
78       auto s1 = s[0];
79       const auto s0 = s[1];
80       const auto bits = s1 + s0;  // b, c
81       s[0] = s0;
82       s1 ^= s1 << 23;
83       s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
84       s[1] = s1;
85       return (bits >> 32) <= threshold;
86     };
87 
88     // Count number of zero coefficients, separately for each DCT band.
89     // TODO(veluca): precompute when doing DCT.
90     for (size_t group_index = 0; group_index < frame_dim.num_groups;
91          group_index++) {
92       const size_t gx = group_index % frame_dim.xsize_groups;
93       const size_t gy = group_index / frame_dim.xsize_groups;
94       const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks,
95                       kGroupDimInBlocks, kGroupDimInBlocks,
96                       frame_dim.xsize_blocks, frame_dim.ysize_blocks);
97       ConstACPtr rows[3];
98       ACType type = acs.Type();
99       for (size_t c = 0; c < 3; c++) {
100         rows[c] = acs.PlaneRow(c, group_index, 0);
101       }
102       size_t ac_offset = 0;
103 
104       // TODO(veluca): SIMDfy.
105       for (size_t by = 0; by < rect.ysize(); ++by) {
106         AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
107         for (size_t bx = 0; bx < rect.xsize(); ++bx) {
108           AcStrategy acs = acs_row[bx];
109           if (!acs.IsFirstBlock()) continue;
110           if (!use_sample()) continue;
111           size_t size = kDCTBlockSize << acs.log2_covered_blocks();
112           for (size_t c = 0; c < 3; ++c) {
113             const size_t order_offset =
114                 CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c);
115             if (type == ACType::k16) {
116               for (size_t k = 0; k < size; k++) {
117                 bool is_zero = rows[c].ptr16[ac_offset + k] == 0;
118                 num_zeros[order_offset + k] += is_zero ? 1 : 0;
119               }
120             } else {
121               for (size_t k = 0; k < size; k++) {
122                 bool is_zero = rows[c].ptr32[ac_offset + k] == 0;
123                 num_zeros[order_offset + k] += is_zero ? 1 : 0;
124               }
125             }
126             // Ensure LLFs are first in the order.
127             size_t cx = acs.covered_blocks_x();
128             size_t cy = acs.covered_blocks_y();
129             CoefficientLayout(&cy, &cx);
130             for (size_t iy = 0; iy < cy; iy++) {
131               for (size_t ix = 0; ix < cx; ix++) {
132                 num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1;
133               }
134             }
135           }
136           ac_offset += size;
137         }
138       }
139     }
140   }
141   struct PosAndCount {
142     uint32_t pos;
143     uint32_t count;
144   };
145   auto mem = hwy::AllocateAligned<PosAndCount>(AcStrategy::kMaxCoeffArea);
146 
147   uint16_t computed = 0;
148   for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
149     uint8_t ord = kStrategyOrder[o];
150     if (computed & (1 << ord)) continue;
151     computed |= 1 << ord;
152     AcStrategy acs = AcStrategy::FromRawStrategy(o);
153     size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y();
154     // Ensure natural coefficient order is not permuted if the order is
155     // not transmitted.
156     if ((1 << ord) & ~used_orders) {
157       for (size_t c = 0; c < 3; c++) {
158         size_t offset = CoeffOrderOffset(ord, c);
159         JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
160         SetDefaultOrder(AcStrategy::FromRawStrategy(o), &order[offset]);
161       }
162       continue;
163     }
164     const coeff_order_t* natural_coeff_order = acs.NaturalCoeffOrder();
165 
166     bool is_nondefault = false;
167     for (uint8_t c = 0; c < 3; c++) {
168       // Apply zig-zag order.
169       PosAndCount* pos_and_val = mem.get();
170       size_t offset = CoeffOrderOffset(ord, c);
171       JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
172       float inv_sqrt_sz = 1.0f / std::sqrt(sz);
173       for (size_t i = 0; i < sz; ++i) {
174         size_t pos = natural_coeff_order[i];
175         pos_and_val[i].pos = pos;
176         // We don't care for the exact number -> quantize number of zeros,
177         // to get less permuted order.
178         pos_and_val[i].count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f;
179       }
180 
181       // Stable-sort -> elements with same number of zeros will preserve their
182       // order.
183       auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool {
184         return a.count < b.count;
185       };
186       std::stable_sort(pos_and_val, pos_and_val + sz, comparator);
187 
188       // Grab indices.
189       for (size_t i = 0; i < sz; ++i) {
190         order[offset + i] = pos_and_val[i].pos;
191         is_nondefault |= natural_coeff_order[i] != pos_and_val[i].pos;
192       }
193     }
194     if (!is_nondefault) {
195       used_orders &= ~(1 << ord);
196     }
197   }
198 }
199 
200 namespace {
201 
TokenizePermutation(const coeff_order_t * JXL_RESTRICT order,size_t skip,size_t size,std::vector<Token> * tokens)202 void TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
203                          size_t size, std::vector<Token>* tokens) {
204   std::vector<LehmerT> lehmer(size);
205   std::vector<uint32_t> temp(size + 1);
206   ComputeLehmerCode(order, temp.data(), size, lehmer.data());
207   size_t end = size;
208   while (end > skip && lehmer[end - 1] == 0) {
209     --end;
210   }
211   tokens->emplace_back(CoeffOrderContext(size), end - skip);
212   uint32_t last = 0;
213   for (size_t i = skip; i < end; ++i) {
214     tokens->emplace_back(CoeffOrderContext(last), lehmer[i]);
215     last = lehmer[i];
216   }
217 }
218 
219 }  // namespace
220 
EncodePermutation(const coeff_order_t * JXL_RESTRICT order,size_t skip,size_t size,BitWriter * writer,int layer,AuxOut * aux_out)221 void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
222                        size_t size, BitWriter* writer, int layer,
223                        AuxOut* aux_out) {
224   std::vector<std::vector<Token>> tokens(1);
225   TokenizePermutation(order, skip, size, &tokens[0]);
226   std::vector<uint8_t> context_map;
227   EntropyEncodingData codes;
228   BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
229                            &codes, &context_map, writer, layer, aux_out);
230   WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
231 }
232 
233 namespace {
EncodeCoeffOrder(const coeff_order_t * JXL_RESTRICT order,AcStrategy acs,std::vector<Token> * tokens,coeff_order_t * order_zigzag)234 void EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs,
235                       std::vector<Token>* tokens, coeff_order_t* order_zigzag) {
236   const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
237   const size_t size = kDCTBlockSize * llf;
238   const coeff_order_t* natural_coeff_order_lut = acs.NaturalCoeffOrderLut();
239   for (size_t i = 0; i < size; ++i) {
240     order_zigzag[i] = natural_coeff_order_lut[order[i]];
241   }
242   TokenizePermutation(order_zigzag, llf, size, tokens);
243 }
244 }  // namespace
245 
EncodeCoeffOrders(uint16_t used_orders,const coeff_order_t * JXL_RESTRICT order,BitWriter * writer,size_t layer,AuxOut * JXL_RESTRICT aux_out)246 void EncodeCoeffOrders(uint16_t used_orders,
247                        const coeff_order_t* JXL_RESTRICT order,
248                        BitWriter* writer, size_t layer,
249                        AuxOut* JXL_RESTRICT aux_out) {
250   auto mem = hwy::AllocateAligned<coeff_order_t>(AcStrategy::kMaxCoeffArea);
251   uint16_t computed = 0;
252   std::vector<std::vector<Token>> tokens(1);
253   for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
254     uint8_t ord = kStrategyOrder[o];
255     if (computed & (1 << ord)) continue;
256     computed |= 1 << ord;
257     if ((used_orders & (1 << ord)) == 0) continue;
258     AcStrategy acs = AcStrategy::FromRawStrategy(o);
259     for (size_t c = 0; c < 3; c++) {
260       EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, &tokens[0],
261                        mem.get());
262     }
263   }
264   // Do not write anything if no order is used.
265   if (used_orders != 0) {
266     std::vector<uint8_t> context_map;
267     EntropyEncodingData codes;
268     BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
269                              &codes, &context_map, writer, layer, aux_out);
270     WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
271   }
272 }
273 
274 }  // namespace jxl
275