1 // Copyright 2017 Google Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Fast & simple JPEG encoder. Internal header. 16 // 17 // Author: Skal (pascal.massimino@gmail.com) 18 19 #ifndef SJPEG_JPEGI_H_ 20 #define SJPEG_JPEGI_H_ 21 22 #include "sjpeg.h" 23 #include "bit_writer.h" 24 25 #ifndef NULL 26 #define NULL 0 27 #endif 28 29 #if defined(__SSE2__) 30 #define SJPEG_USE_SSE2 31 #endif 32 33 #if defined(__ARM_NEON__) || defined(__aarch64__) 34 #define SJPEG_USE_NEON 35 #endif 36 37 #if defined(SJPEG_NEED_ASM_HEADERS) 38 #if defined(SJPEG_USE_SSE2) 39 #include <emmintrin.h> 40 #endif 41 42 #if defined(SJPEG_USE_NEON) 43 #include <arm_neon.h> 44 #endif 45 #endif // SJPEG_NEED_ASM_HEADERS 46 47 #include <assert.h> 48 49 //////////////////////////////////////////////////////////////////////////////// 50 51 namespace sjpeg { 52 53 extern bool SupportsSSE2(); 54 extern bool SupportsNEON(); 55 56 // Constants below are marker codes defined in JPEG spec 57 // ISO/IEC 10918-1 : 1993(E) Table B.1 58 // See also: http://www.w3.org/Graphics/JPEG/itu-t81.pdf 59 60 #define M_SOF0 0xffc0 61 #define M_SOF1 0xffc1 62 #define M_DHT 0xffc4 63 #define M_SOI 0xffd8 64 #define M_EOI 0xffd9 65 #define M_SOS 0xffda 66 #define M_DQT 0xffdb 67 68 // Forward 8x8 Fourier transforms, in-place. 69 typedef void (*FdctFunc)(int16_t *coeffs, int num_blocks); 70 FdctFunc GetFdct(); 71 72 // these are the default luma/chroma matrices (JPEG spec section K.1) 73 extern const uint8_t kDefaultMatrices[2][64]; 74 extern const uint8_t kZigzag[64]; 75 76 // scoring tables in score_7.cc 77 extern const int kRGBSize; 78 extern const uint8_t kSharpnessScore[]; 79 80 // internal riskiness scoring functions: 81 extern double DCTRiskinessScore(const int16_t yuv[3 * 8], 82 int16_t scores[8 * 8]); 83 extern double BlockRiskinessScore(const uint8_t* rgb, int stride, 84 int16_t scores[8 * 8]); 85 extern int YUVToRiskIdx(int16_t y, int16_t u, int16_t v); 86 87 /////////////////////////////////////////////////////////////////////////////// 88 // RGB->YUV conversion 89 90 // convert 16x16 RGB block into YUV420, or 8x8 RGB block into YUV444 91 typedef void (*RGBToYUVBlockFunc)(const uint8_t* src, int src_stride, 92 int16_t* blocks); 93 extern RGBToYUVBlockFunc GetBlockFunc(bool use_444); 94 95 // convert a row of RGB samples to YUV444 96 typedef void (*RGBToIndexRowFunc)(const uint8_t* src, int width, 97 uint16_t* dst); 98 extern RGBToIndexRowFunc GetRowFunc(); 99 100 // Enhanced slower RGB->YUV conversion: 101 // y_plane[] has dimension W x H, whereas u_plane[] and v_plane[] have 102 // dimension (W + 1)/2 x (H + 1)/2. 103 void ApplySharpYUVConversion(const uint8_t* const rgb, 104 int W, int H, int stride, 105 uint8_t* y_plane, 106 uint8_t* u_plane, uint8_t* v_plane); 107 108 /////////////////////////////////////////////////////////////////////////////// 109 // some useful helper functions around quant matrices 110 111 extern float GetQFactor(float q); // convert quality factor -> scale factor 112 extern void CopyQuantMatrix(const uint8_t in[64], uint8_t out[64]); 113 extern void SetQuantMatrix(const uint8_t in[64], float q_factor, 114 uint8_t out[64]); 115 extern void SetMinQuantMatrix(const uint8_t* const m, uint8_t out[64], 116 int tolerance); 117 118 //////////////////////////////////////////////////////////////////////////////// 119 // main structs 120 121 // Huffman tables 122 struct HuffmanTable { 123 uint8_t bits_[16]; // number of symbols per bit count 124 const uint8_t* syms_; // symbol map, in increasing bit length 125 uint8_t nb_syms_; // cached value of sum(bits_[]) 126 }; 127 128 // quantizer matrices 129 struct Quantizer { 130 uint8_t quant_[64]; // direct quantizer matrix 131 uint8_t min_quant_[64]; // min quantizer value allowed 132 uint16_t iquant_[64]; // precalc'd reciprocal for divisor 133 uint16_t qthresh_[64]; // minimal absolute value that produce non-zero coeff 134 uint16_t bias_[64]; // bias, for coring 135 const uint32_t* codes_; // codes for bit-cost calculation 136 }; 137 138 // compact Run/Level storage, separate from DCTCoeffs infos 139 // Run/Level Information is not yet entropy-coded, but just stored 140 struct RunLevel { 141 int16_t run_; 142 uint16_t level_; // 4bits for length, 12bits for mantissa 143 }; 144 145 // short infos about the block of quantized coefficients 146 struct DCTCoeffs { 147 int16_t last_; // last position (inclusive) of non-zero coeff 148 int16_t nb_coeffs_; // total number of non-zero AC coeffs 149 uint16_t dc_code_; // DC code (4bits for length, 12bits for suffix) 150 int8_t idx_; // component idx 151 int8_t bias_; // perceptual bias 152 }; 153 154 // Histogram of transform coefficients, for adaptive quant matrices 155 // * HSHIFT controls the trade-off between storage size for counts[] 156 // and precision: the fdct doesn't descale and returns coefficients as 157 // signed 16bit value. We are only interested in the absolute values 158 // of coefficients that are less than MAX_HISTO_DCT_COEFF, which are our 159 // best contributors. 160 // Still, storing histogram up to MAX_HISTO_DCT_COEFF can be costly, so 161 // we further aggregate the statistics in bins of size 1 << HSHIFT to save 162 // space. 163 // * HLAMBDA roughly measures how much you are willing to trade in distortion 164 // for a 1-bit gain in filesize. 165 // * QDELTA_MIN / QDELTA_MAX control how much we allow wandering around the 166 // initial point. This helps reducing the CPU cost, as long as keeping the 167 // optimization around the initial desired quality-factor (HLAMBDA also 168 // serve this purpose). 169 enum { HSHIFT = 2, // size of bins is (1 << HSHIFT) 170 HHALF = 1 << (HSHIFT - 1), 171 MAX_HISTO_DCT_COEFF = (1 << 7), // max coefficient, descaled by HSHIFT 172 HLAMBDA = 0x80, 173 // Limits on range of alternate quantizers explored around 174 // the initial value. (see details in AnalyseHisto()) 175 QDELTA_MIN = -12, QDELTA_MAX = 12, 176 QSIZE = QDELTA_MAX + 1 - QDELTA_MIN, 177 }; 178 179 struct Histo { 180 // Reserve one extra entry for counting all coeffs greater than 181 // MAX_HISTO_DCT_COEFF. Result isn't used, but it makes the loop easier. 182 int counts_[64][MAX_HISTO_DCT_COEFF + 1]; 183 }; 184 185 //////////////////////////////////////////////////////////////////////////////// 186 187 struct Encoder { 188 public: 189 Encoder(int W, int H, int step, const uint8_t* rgb, ByteSink* sink); 190 virtual ~Encoder(); OkEncoder191 bool Ok() const { return ok_; } 192 193 // setters 194 void SetQuality(float q); 195 void SetCompressionMethod(int method); 196 197 // all-in-one init from EncoderParam. 198 bool InitFromParam(const EncoderParam& param); 199 200 // Main call. Return false in case of parameter error (setting empty output). 201 bool Encode(); 202 203 // these are colorspace-dependant. 204 virtual void InitComponents() = 0; 205 // return MCU samples at macroblock position (mb_x, mb_y) 206 // clipped is true if the MCU is clipped and needs replication 207 virtual void GetSamples(int mb_x, int mb_y, bool clipped, 208 int16_t* out_blocks) = 0; 209 210 private: 211 // setters 212 void SetQuantMatrices(const uint8_t m[2][64]); 213 void SetMinQuantMatrices(const uint8_t m[2][64], int tolerance); 214 void SetDefaultMinQuantMatrices(); 215 216 void SetQuantizationBias(int bias, bool use_adaptive); 217 void SetQuantizationDeltas(int qdelta_luma, int qdelta_chroma); 218 219 typedef enum { ICC, EXIF, XMP, MARKERS } MetadataType; 220 void SetMetadata(const std::string& data, MetadataType type); 221 222 private: 223 bool CheckBuffers(); // returns false in case of memory alloc error 224 225 void WriteAPP0(); 226 bool WriteAPPMarkers(const std::string& data); 227 bool WriteEXIF(const std::string& data); 228 bool WriteICCP(const std::string& data); 229 bool WriteXMP(const std::string& data); 230 void WriteDQT(); 231 void WriteSOF(); 232 void WriteDHT(); 233 void WriteSOS(); 234 void WriteEOI(); 235 236 void ResetDCs(); 237 238 // collect transformed coeffs (unquantized) only 239 void CollectCoeffs(); 240 241 // 2-pass Huffman optimizing scan 242 void ResetEntropyStats(); 243 void AddEntropyStats(const DCTCoeffs* const coeffs, 244 const RunLevel* const run_levels); 245 void CompileEntropyStats(); 246 void StoreOptimalHuffmanTables(size_t nb_mbs, const DCTCoeffs* coeffs); 247 248 void SinglePassScan(); // finalizing scan 249 void SinglePassScanOptimized(); // optimize the Huffman table + finalize scan 250 251 // quantize and compute run/levels from already stored coeffs 252 void StoreRunLevels(DCTCoeffs* coeffs); 253 // just write already stored run_levels & coeffs: 254 void FinalPassScan(size_t nb_mbs, const DCTCoeffs* coeffs); 255 256 // dichotomy loop 257 void LoopScan(); 258 259 // Histogram pass 260 void CollectHistograms(); 261 262 void BuildHuffmanCodes(const HuffmanTable* const tab, 263 uint32_t* const codes); 264 265 typedef int (*QuantizeBlockFunc)(const int16_t in[64], int idx, 266 const Quantizer* const Q, 267 DCTCoeffs* const out, RunLevel* const rl); 268 static QuantizeBlockFunc quantize_block_; 269 static QuantizeBlockFunc GetQuantizeBlockFunc(); 270 271 static int TrellisQuantizeBlock(const int16_t in[64], int idx, 272 const Quantizer* const Q, 273 DCTCoeffs* const out, 274 RunLevel* const rl); 275 276 typedef uint32_t (*QuantizeErrorFunc)(const int16_t in[64], 277 const Quantizer* const Q); 278 static QuantizeErrorFunc quantize_error_; 279 static QuantizeErrorFunc GetQuantizeErrorFunc(); 280 281 void CodeBlock(const DCTCoeffs* const coeffs, const RunLevel* const rl); 282 // returns DC code (4bits for length, 12bits for suffix), updates DC_predictor 283 static uint16_t GenerateDCDiffCode(int DC, int* const DC_predictor); 284 285 static void FinalizeQuantMatrix(Quantizer* const q, int bias); 286 void SetCostCodes(int idx); 287 void InitCodes(bool only_ac); 288 289 size_t HeaderSize() const; 290 void BlocksSize(int nb_mbs, const DCTCoeffs* coeffs, 291 const RunLevel* rl, sjpeg::BitCounter* const bc) const; 292 float ComputeSize(const DCTCoeffs* coeffs); 293 float ComputePSNR() const; 294 295 protected: 296 bool SetError(); // sets ok_ to true 297 298 // format-specific parameters, set by virtual InitComponents() 299 enum { MAX_COMP = 3 }; 300 int nb_comps_; 301 int quant_idx_[MAX_COMP]; // indices for quantization matrices 302 int nb_blocks_[MAX_COMP]; // number of 8x8 blocks per components 303 uint8_t block_dims_[MAX_COMP]; // component dimensions (8-pixels units) 304 int block_w_, block_h_; // maximum mcu width / height 305 int mcu_blocks_; // total blocks in mcu (= sum of nb_blocks_[]) 306 307 // data accessible to sub-classes implementing alternate input format 308 int W_, H_, step_; // width, height, stride 309 int mb_w_, mb_h_; // width / height in units of mcu 310 const uint8_t* const rgb_; // samples 311 312 // Replicate an RGB source sub_w x sub_h block, expanding it to w x h size. 313 const uint8_t* GetReplicatedSamples(const uint8_t* rgb, // block source 314 int rgb_step, // stride in source 315 int sub_w, int sub_h, // sub-block size 316 int w, int h); // size of mcu 317 // Replicate an YUV sub-block similarly. 318 const uint8_t* GetReplicatedYUVSamples(const uint8_t* in, int step, 319 int sub_w, int sub_h, int w, int h); 320 // set blocks that are totally outside of the picture to an average value 321 void AverageExtraLuma(int sub_w, int sub_h, int16_t* out); 322 uint8_t replicated_buffer_[3 * 16 * 16]; // tmp buffer for replication 323 324 sjpeg::RGBToYUVBlockFunc get_yuv_block_; 325 static sjpeg::RGBToYUVBlockFunc get_yuv444_block_; SetYUVFormatEncoder326 void SetYUVFormat(bool use_444) { 327 get_yuv_block_ = sjpeg::GetBlockFunc(use_444); 328 } 329 bool adaptive_bias_; // if true, use per-block perceptual bias modulation 330 331 // Memory management AllocEncoder332 template<class T> T* Alloc(size_t num) { 333 assert(memory_hook_ != nullptr); 334 T* const ptr = reinterpret_cast<T*>(memory_hook_->Alloc(sizeof(T) * num)); 335 if (ptr == nullptr) SetError(); 336 return ptr; 337 } FreeEncoder338 template<class T> void Free(T* const ptr) { 339 memory_hook_->Free(reinterpret_cast<void*>(ptr)); 340 } 341 342 private: 343 bool ok_; // set to false if a new[] fails 344 sjpeg::BitWriter bw_; // output buffer 345 346 std::string iccp_, xmp_, exif_, app_markers_; // metadata 347 348 // compression tools. See sjpeg.h for description of methods. 349 bool optimize_size_; // Huffman-optimize the codes (method 0, 3) 350 bool use_adaptive_quant_; // modulate the quant matrix (method 3-8) 351 bool use_extra_memory_; // save the unquantized coeffs (method 3, 4) 352 bool reuse_run_levels_; // save quantized run/levels (method 1, 4, 5) 353 bool use_trellis_; // use trellis-quantization (method 7, 8) 354 355 int q_bias_; // [0..255]: rounding bias for quant. of AC coeffs. 356 Quantizer quants_[2]; // quant matrices 357 int DCs_[3]; // DC predictors 358 359 // DCT coefficients storage, aligned 360 static const size_t ALIGN_CST = 15; 361 uint8_t* in_blocks_base_; // base memory for blocks 362 int16_t* in_blocks_; // aligned pointer to in_blocks_base_ 363 bool have_coeffs_; // true if the Fourier coefficients are stored 364 bool AllocateBlocks(size_t num_blocks); // returns false in case of error 365 void DesallocateBlocks(); 366 367 // these are for regular compression methods 0 or 2. 368 RunLevel base_run_levels_[64]; 369 370 // this is the extra memory for compression method 1 371 RunLevel* all_run_levels_; 372 size_t nb_run_levels_, max_run_levels_; 373 374 // Huffman_tables_ indices: 375 // 0: luma dc, 1: chroma dc, 2: luma ac, 3: chroma ac 376 const HuffmanTable *Huffman_tables_[4]; 377 uint32_t ac_codes_[2][256]; 378 uint32_t dc_codes_[2][12]; 379 380 // histograms for dynamic codes. Could be temporaries. 381 uint32_t freq_ac_[2][256 + 1]; // frequency distribution for AC coeffs 382 uint32_t freq_dc_[2][12 + 1]; // frequency distribution for DC coeffs 383 uint8_t opt_syms_ac_[2][256]; // optimal table for AC symbols 384 uint8_t opt_syms_dc_[2][12]; // optimal table for DC symbols 385 HuffmanTable opt_tables_ac_[2]; 386 HuffmanTable opt_tables_dc_[2]; 387 388 // Limits on how much we will decrease the bitrate in the luminance 389 // and chrominance channels (respectively). 390 int qdelta_max_luma_; 391 int qdelta_max_chroma_; 392 393 // Histogram handling 394 395 // This function aggregates each 63 unquantized AC coefficients into an 396 // histogram for further analysis. 397 typedef void (*StoreHistoFunc)(const int16_t in[64], Histo* const histos, 398 int nb_blocks); 399 static StoreHistoFunc store_histo_; 400 static StoreHistoFunc GetStoreHistoFunc(); // select between the above. 401 402 // Provided the AC histograms have been stored with StoreHisto(), this 403 // function will analyze impact of varying the quantization scales around 404 // initial values, trading distortion for bit-rate in a controlled way. 405 void AnalyseHisto(); 406 void ResetHisto(); // initialize histos_[] 407 Histo histos_[2]; 408 409 // multi-pass parameters 410 int passes_; 411 SearchHook default_hook_; 412 SearchHook* search_hook_; 413 414 // lower memory management 415 MemoryManager* memory_hook_; 416 417 static const float kHistoWeight[QSIZE]; 418 419 static void (*fDCT_)(int16_t* in, int num_blocks); 420 static void InitializeStaticPointers(); 421 }; 422 423 //////////////////////////////////////////////////////////////////////////////// 424 425 } // namespace sjpeg 426 427 #endif // SJPEG_JPEGI_H_ 428