1 /*****************************************************************************
2  * This file is part of Kvazaar HEVC encoder.
3  *
4  * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without modification,
8  * are permitted provided that the following conditions are met:
9  *
10  * * Redistributions of source code must retain the above copyright notice, this
11  *   list of conditions and the following disclaimer.
12  *
13  * * Redistributions in binary form must reproduce the above copyright notice, this
14  *   list of conditions and the following disclaimer in the documentation and/or
15  *   other materials provided with the distribution.
16  *
17  * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26  * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
28  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
31  ****************************************************************************/
32 
33 #include "encoderstate.h"
34 
35 #include <math.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 
40 #include "cabac.h"
41 #include "context.h"
42 #include "encode_coding_tree.h"
43 #include "encoder_state-bitstream.h"
44 #include "filter.h"
45 #include "image.h"
46 #include "rate_control.h"
47 #include "sao.h"
48 #include "search.h"
49 #include "tables.h"
50 #include "threadqueue.h"
51 
52 #include "strategies/strategies-picture.h"
53 
54 
kvz_encoder_state_match_children_of_previous_frame(encoder_state_t * const state)55 int kvz_encoder_state_match_children_of_previous_frame(encoder_state_t * const state) {
56   int i;
57   for (i = 0; state->children[i].encoder_control; ++i) {
58     //Child should also exist for previous encoder
59     assert(state->previous_encoder_state->children[i].encoder_control);
60     state->children[i].previous_encoder_state = &state->previous_encoder_state->children[i];
61     kvz_encoder_state_match_children_of_previous_frame(&state->children[i]);
62   }
63   return 1;
64 }
65 
66 /**
67  * \brief Save edge pixels before SAO to buffers.
68  *
69  * Copies pixels at the edges of the area that will be filtered with SAO to
70  * the given buffers. If deblocking is enabled, the pixels must have been
71  * deblocked before this.
72  *
73  * The saved pixels will be needed later when doing SAO for the neighboring
74  * areas.
75  */
encoder_state_recdata_before_sao_to_bufs(encoder_state_t * const state,const lcu_order_element_t * const lcu,yuv_t * const hor_buf,yuv_t * const ver_buf)76 static void encoder_state_recdata_before_sao_to_bufs(
77     encoder_state_t * const state,
78     const lcu_order_element_t * const lcu,
79     yuv_t * const hor_buf,
80     yuv_t * const ver_buf)
81 {
82   videoframe_t* const frame = state->tile->frame;
83 
84   if (hor_buf && lcu->below) {
85     // Copy the bottommost row that will be filtered with SAO to the
86     // horizontal buffer.
87     vector2d_t pos = {
88       .x = lcu->position_px.x,
89       .y = lcu->position_px.y + LCU_WIDTH - SAO_DELAY_PX - 1,
90     };
91     // Copy all pixels that have been deblocked.
92     int length = lcu->size.x - DEBLOCK_DELAY_PX;
93 
94     if (!lcu->right) {
95       // If there is no LCU to the right, the last pixels will be
96       // filtered too.
97       length += DEBLOCK_DELAY_PX;
98     }
99 
100     if (lcu->left) {
101       // The rightmost pixels of the CTU to the left will also be filtered.
102       pos.x -= DEBLOCK_DELAY_PX;
103       length += DEBLOCK_DELAY_PX;
104     }
105 
106     const unsigned from_index = pos.x + pos.y * frame->rec->stride;
107     // NOTE: The horizontal buffer is indexed by
108     //    x_px + y_lcu * frame->width
109     // where x_px is in pixels and y_lcu in number of LCUs.
110     const unsigned to_index = pos.x + lcu->position.y * frame->width;
111 
112     kvz_pixels_blit(&frame->rec->y[from_index],
113                     &hor_buf->y[to_index],
114                     length, 1,
115                     frame->rec->stride,
116                     frame->width);
117 
118     if (state->encoder_control->chroma_format != KVZ_CSP_400) {
119       const unsigned from_index_c = (pos.x / 2) + (pos.y / 2) * frame->rec->stride / 2;
120       const unsigned to_index_c = (pos.x / 2) + lcu->position.y * frame->width / 2;
121 
122       kvz_pixels_blit(&frame->rec->u[from_index_c],
123                       &hor_buf->u[to_index_c],
124                       length / 2, 1,
125                       frame->rec->stride / 2,
126                       frame->width / 2);
127       kvz_pixels_blit(&frame->rec->v[from_index_c],
128                       &hor_buf->v[to_index_c],
129                       length / 2, 1,
130                       frame->rec->stride / 2,
131                       frame->width / 2);
132     }
133   }
134 
135   if (ver_buf && lcu->right) {
136     // Copy the rightmost column that will be filtered with SAO to the
137     // vertical buffer.
138     vector2d_t pos = {
139       .x = lcu->position_px.x + LCU_WIDTH - SAO_DELAY_PX - 1,
140       .y = lcu->position_px.y,
141     };
142     int length = lcu->size.y - DEBLOCK_DELAY_PX;
143 
144     if (!lcu->below) {
145       // If there is no LCU below, the last pixels will be filtered too.
146       length += DEBLOCK_DELAY_PX;
147     }
148 
149     if (lcu->above) {
150       // The bottommost pixels of the CTU above will also be filtered.
151       pos.y -= DEBLOCK_DELAY_PX;
152       length += DEBLOCK_DELAY_PX;
153     }
154 
155     const unsigned from_index = pos.x + pos.y * frame->rec->stride;
156     // NOTE: The vertical buffer is indexed by
157     //    x_lcu * frame->height + y_px
158     // where x_lcu is in number of LCUs and y_px in pixels.
159     const unsigned to_index = lcu->position.x * frame->height + pos.y;
160 
161     kvz_pixels_blit(&frame->rec->y[from_index],
162                     &ver_buf->y[to_index],
163                     1, length,
164                     frame->rec->stride, 1);
165 
166     if (state->encoder_control->chroma_format != KVZ_CSP_400) {
167       const unsigned from_index_c = (pos.x / 2) + (pos.y / 2) * frame->rec->stride / 2;
168       const unsigned to_index_c = lcu->position.x * frame->height / 2 + pos.y / 2;
169 
170       kvz_pixels_blit(&frame->rec->u[from_index_c],
171                       &ver_buf->u[to_index_c],
172                       1, length / 2,
173                       frame->rec->stride / 2, 1);
174       kvz_pixels_blit(&frame->rec->v[from_index_c],
175                       &ver_buf->v[to_index_c],
176                       1, length / 2,
177                       frame->rec->stride / 2, 1);
178     }
179   }
180 }
181 
encoder_state_recdata_to_bufs(encoder_state_t * const state,const lcu_order_element_t * const lcu,yuv_t * const hor_buf,yuv_t * const ver_buf)182 static void encoder_state_recdata_to_bufs(encoder_state_t * const state,
183                                           const lcu_order_element_t * const lcu,
184                                           yuv_t * const hor_buf,
185                                           yuv_t * const ver_buf)
186 {
187   videoframe_t* const frame = state->tile->frame;
188 
189   if (hor_buf) {
190     //Copy the bottom row of this LCU to the horizontal buffer
191     vector2d_t bottom = { lcu->position_px.x, lcu->position_px.y + lcu->size.y - 1 };
192     const int lcu_row = lcu->position.y;
193 
194     unsigned from_index = bottom.y * frame->rec->stride + bottom.x;
195     unsigned to_index = lcu->position_px.x + lcu_row * frame->width;
196 
197     kvz_pixels_blit(&frame->rec->y[from_index],
198                     &hor_buf->y[to_index],
199                     lcu->size.x, 1,
200                     frame->rec->stride, frame->width);
201 
202     if (state->encoder_control->chroma_format != KVZ_CSP_400) {
203       unsigned from_index_c = (bottom.y / 2) * frame->rec->stride / 2 + (bottom.x / 2);
204       unsigned to_index_c = lcu->position_px.x / 2 + lcu_row * frame->width / 2;
205 
206       kvz_pixels_blit(&frame->rec->u[from_index_c],
207                       &hor_buf->u[to_index_c],
208                       lcu->size.x / 2, 1,
209                       frame->rec->stride / 2, frame->width / 2);
210       kvz_pixels_blit(&frame->rec->v[from_index_c],
211                       &hor_buf->v[to_index_c],
212                       lcu->size.x / 2, 1,
213                       frame->rec->stride / 2, frame->width / 2);
214     }
215   }
216 
217   if (ver_buf) {
218     //Copy the right row of this LCU to the vertical buffer.
219 
220     const int lcu_col = lcu->position.x;
221     vector2d_t left = { lcu->position_px.x + lcu->size.x - 1, lcu->position_px.y };
222 
223     kvz_pixels_blit(&frame->rec->y[left.y * frame->rec->stride + left.x],
224                     &ver_buf->y[lcu->position_px.y + lcu_col * frame->height],
225                     1, lcu->size.y,
226                     frame->rec->stride, 1);
227 
228     if (state->encoder_control->chroma_format != KVZ_CSP_400) {
229       unsigned from_index = (left.y / 2) * frame->rec->stride / 2 + (left.x / 2);
230       unsigned to_index = lcu->position_px.y / 2 + lcu_col * frame->height / 2;
231 
232       kvz_pixels_blit(&frame->rec->u[from_index],
233                       &ver_buf->u[to_index],
234                       1, lcu->size.y / 2,
235                       frame->rec->stride / 2, 1);
236       kvz_pixels_blit(&frame->rec->v[from_index],
237                       &ver_buf->v[to_index],
238                       1, lcu->size.y / 2,
239                       frame->rec->stride / 2, 1);
240     }
241   }
242 
243 }
244 
245 /**
246  * \brief Do SAO reconstuction for all available pixels.
247  *
248  * Does SAO reconstruction for all pixels that are available after the
249  * given LCU has been deblocked. This means the following pixels:
250  *  - bottom-right block of SAO_DELAY_PX times SAO_DELAY_PX in the lcu to
251  *    the left and up
252  *  - the rightmost SAO_DELAY_PX pixels of the LCU to the left (excluding
253  *    the bottommost pixel)
254  *  - the bottommost SAO_DELAY_PX pixels of the LCU above (excluding the
255  *    rightmost pixels)
256  *  - all pixels inside the LCU, excluding the rightmost SAO_DELAY_PX and
257  *    bottommost SAO_DELAY_PX
258  */
encoder_sao_reconstruct(const encoder_state_t * const state,const lcu_order_element_t * const lcu)259 static void encoder_sao_reconstruct(const encoder_state_t *const state,
260                                     const lcu_order_element_t *const lcu)
261 {
262   videoframe_t *const frame = state->tile->frame;
263 
264 
265   // Temporary buffers for SAO input pixels. The buffers cover the pixels
266   // inside the LCU (LCU_WIDTH x LCU_WIDTH), SAO_DELAY_PX wide bands to the
267   // left and above the LCU, and one pixel border on the left and top
268   // sides. We add two extra pixels to the buffers because the AVX2 SAO
269   // reconstruction reads up to two extra bytes when using edge SAO in the
270   // horizontal direction.
271 #define SAO_BUF_WIDTH   (1 + SAO_DELAY_PX   + LCU_WIDTH)
272 #define SAO_BUF_WIDTH_C (1 + SAO_DELAY_PX/2 + LCU_WIDTH_C)
273   kvz_pixel sao_buf_y_array[SAO_BUF_WIDTH   * SAO_BUF_WIDTH   + 2];
274   kvz_pixel sao_buf_u_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C + 2];
275   kvz_pixel sao_buf_v_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C + 2];
276 
277   // Pointers to the top-left pixel of the LCU in the buffers.
278   kvz_pixel *const sao_buf_y = &sao_buf_y_array[(SAO_DELAY_PX + 1) * (SAO_BUF_WIDTH + 1)];
279   kvz_pixel *const sao_buf_u = &sao_buf_u_array[(SAO_DELAY_PX/2 + 1) * (SAO_BUF_WIDTH_C + 1)];
280   kvz_pixel *const sao_buf_v = &sao_buf_v_array[(SAO_DELAY_PX/2 + 1) * (SAO_BUF_WIDTH_C + 1)];
281 
282   const int x_offsets[3] = {
283     // If there is an lcu to the left, we need to filter its rightmost
284     // pixels.
285     lcu->left ? -SAO_DELAY_PX : 0,
286     0,
287     // If there is an lcu to the right, the rightmost pixels of this LCU
288     // are filtered when filtering that LCU. Otherwise we filter them now.
289     lcu->size.x - (lcu->right ? SAO_DELAY_PX : 0),
290   };
291 
292   const int y_offsets[3] = {
293     // If there is an lcu above, we need to filter its bottommost pixels.
294     lcu->above ? -SAO_DELAY_PX : 0,
295     0,
296     // If there is an lcu below, the bottommost pixels of this LCU are
297     // filtered when filtering that LCU. Otherwise we filter them now.
298     lcu->size.y - (lcu->below ? SAO_DELAY_PX : 0),
299   };
300 
301   // Number of pixels around the block that need to be copied to the
302   // buffers.
303   const int border_left  = lcu->left  ? 1 : 0;
304   const int border_right = lcu->right ? 1 : 0;
305   const int border_above = lcu->above ? 1 : 0;
306   const int border_below = lcu->below ? 1 : 0;
307 
308   // Index of the pixel at the intersection of the top and left borders.
309   const int border_index = (x_offsets[0] - border_left) +
310                            (y_offsets[0] - border_above) * SAO_BUF_WIDTH;
311   const int border_index_c = (x_offsets[0]/2 - border_left) +
312                              (y_offsets[0]/2 - border_above) * SAO_BUF_WIDTH_C;
313   // Width and height of the whole area to filter.
314   const int width  = x_offsets[2] - x_offsets[0];
315   const int height = y_offsets[2] - y_offsets[0];
316 
317   // Copy bordering pixels from above and left to buffers.
318   if (lcu->above) {
319     const int from_index = (lcu->position_px.x + x_offsets[0] - border_left) +
320                            (lcu->position.y - 1) * frame->width;
321     kvz_pixels_blit(&state->tile->hor_buf_before_sao->y[from_index],
322                     &sao_buf_y[border_index],
323                     width + border_left + border_right,
324                     1,
325                     frame->width,
326                     SAO_BUF_WIDTH);
327     if (state->encoder_control->chroma_format != KVZ_CSP_400) {
328       const int from_index_c = (lcu->position_px.x + x_offsets[0])/2 - border_left +
329                                (lcu->position.y - 1) * frame->width/2;
330       kvz_pixels_blit(&state->tile->hor_buf_before_sao->u[from_index_c],
331                       &sao_buf_u[border_index_c],
332                       width/2 + border_left + border_right,
333                       1,
334                       frame->width/2,
335                       SAO_BUF_WIDTH_C);
336       kvz_pixels_blit(&state->tile->hor_buf_before_sao->v[from_index_c],
337                       &sao_buf_v[border_index_c],
338                       width/2 + border_left + border_right,
339                       1,
340                       frame->width/2,
341                       SAO_BUF_WIDTH_C);
342     }
343   }
344   if (lcu->left) {
345     const int from_index = (lcu->position.x - 1) * frame->height +
346                            (lcu->position_px.y + y_offsets[0] - border_above);
347     kvz_pixels_blit(&state->tile->ver_buf_before_sao->y[from_index],
348                     &sao_buf_y[border_index],
349                     1,
350                     height + border_above + border_below,
351                     1,
352                     SAO_BUF_WIDTH);
353     if (state->encoder_control->chroma_format != KVZ_CSP_400) {
354       const int from_index_c = (lcu->position.x - 1) * frame->height/2 +
355                                (lcu->position_px.y + y_offsets[0])/2 - border_above;
356       kvz_pixels_blit(&state->tile->ver_buf_before_sao->u[from_index_c],
357                       &sao_buf_u[border_index_c],
358                       1,
359                       height/2 + border_above + border_below,
360                       1,
361                       SAO_BUF_WIDTH_C);
362       kvz_pixels_blit(&state->tile->ver_buf_before_sao->v[from_index_c],
363                       &sao_buf_v[border_index_c],
364                       1,
365                       height/2 + border_above + border_below,
366                       1,
367                       SAO_BUF_WIDTH_C);
368     }
369   }
370   // Copy pixels that will be filtered and bordering pixels from right and
371   // below.
372   const int from_index = (lcu->position_px.x + x_offsets[0]) +
373                          (lcu->position_px.y + y_offsets[0]) * frame->rec->stride;
374   const int to_index = x_offsets[0] + y_offsets[0] * SAO_BUF_WIDTH;
375   kvz_pixels_blit(&frame->rec->y[from_index],
376                   &sao_buf_y[to_index],
377                   width + border_right,
378                   height + border_below,
379                   frame->rec->stride,
380                   SAO_BUF_WIDTH);
381   if (state->encoder_control->chroma_format != KVZ_CSP_400) {
382     const int from_index_c = (lcu->position_px.x + x_offsets[0])/2 +
383                              (lcu->position_px.y + y_offsets[0])/2 * frame->rec->stride/2;
384     const int to_index_c = x_offsets[0]/2 + y_offsets[0]/2 * SAO_BUF_WIDTH_C;
385     kvz_pixels_blit(&frame->rec->u[from_index_c],
386                     &sao_buf_u[to_index_c],
387                     width/2 + border_right,
388                     height/2 + border_below,
389                     frame->rec->stride/2,
390                     SAO_BUF_WIDTH_C);
391     kvz_pixels_blit(&frame->rec->v[from_index_c],
392                     &sao_buf_v[to_index_c],
393                     width/2 + border_right,
394                     height/2 + border_below,
395                     frame->rec->stride/2,
396                     SAO_BUF_WIDTH_C);
397   }
398 
399   // We filter the pixels in four parts:
400   //  1. Pixels that belong to the LCU above and to the left
401   //  2. Pixels that belong to the LCU above
402   //  3. Pixels that belong to the LCU to the left
403   //  4. Pixels that belong to the current LCU
404   for (int y_offset_index = 0; y_offset_index < 2; y_offset_index++) {
405     for (int x_offset_index = 0; x_offset_index < 2; x_offset_index++) {
406       const int x = x_offsets[x_offset_index];
407       const int y = y_offsets[y_offset_index];
408       const int width = x_offsets[x_offset_index + 1] - x;
409       const int height = y_offsets[y_offset_index + 1] - y;
410 
411       if (width == 0 || height == 0) continue;
412 
413       const int lcu_x = (lcu->position_px.x + x) >> LOG2_LCU_WIDTH;
414       const int lcu_y = (lcu->position_px.y + y) >> LOG2_LCU_WIDTH;
415       const int lcu_index = lcu_x + lcu_y * frame->width_in_lcu;
416       const sao_info_t *sao_luma   = &frame->sao_luma[lcu_index];
417       const sao_info_t *sao_chroma = &frame->sao_chroma[lcu_index];
418 
419       kvz_sao_reconstruct(state,
420                           &sao_buf_y[x + y * SAO_BUF_WIDTH],
421                           SAO_BUF_WIDTH,
422                           lcu->position_px.x + x,
423                           lcu->position_px.y + y,
424                           width,
425                           height,
426                           sao_luma,
427                           COLOR_Y);
428 
429       if (state->encoder_control->chroma_format != KVZ_CSP_400) {
430         // Coordinates in chroma pixels.
431         int x_c = x >> 1;
432         int y_c = y >> 1;
433 
434         kvz_sao_reconstruct(state,
435                             &sao_buf_u[x_c + y_c * SAO_BUF_WIDTH_C],
436                             SAO_BUF_WIDTH_C,
437                             lcu->position_px.x / 2 + x_c,
438                             lcu->position_px.y / 2 + y_c,
439                             width / 2,
440                             height / 2,
441                             sao_chroma,
442                             COLOR_U);
443         kvz_sao_reconstruct(state,
444                             &sao_buf_v[x_c + y_c * SAO_BUF_WIDTH_C],
445                             SAO_BUF_WIDTH_C,
446                             lcu->position_px.x / 2 + x_c,
447                             lcu->position_px.y / 2 + y_c,
448                             width / 2,
449                             height / 2,
450                             sao_chroma,
451                             COLOR_V);
452       }
453     }
454   }
455 }
456 
encode_sao_color(encoder_state_t * const state,sao_info_t * sao,color_t color_i)457 static void encode_sao_color(encoder_state_t * const state, sao_info_t *sao,
458                              color_t color_i)
459 {
460   cabac_data_t * const cabac = &state->cabac;
461   sao_eo_cat i;
462   int offset_index = (color_i == COLOR_V) ? 5 : 0;
463 
464   // Skip colors with no SAO.
465   //FIXME: for now, we always have SAO for all channels
466   if (color_i == COLOR_Y && 0) return;
467   if (color_i != COLOR_Y && 0) return;
468 
469   /// sao_type_idx_luma:   TR, cMax = 2, cRiceParam = 0, bins = {0, bypass}
470   /// sao_type_idx_chroma: TR, cMax = 2, cRiceParam = 0, bins = {0, bypass}
471   // Encode sao_type_idx for Y and U+V.
472   if (color_i != COLOR_V) {
473     cabac->cur_ctx = &(cabac->ctx.sao_type_idx_model);
474     CABAC_BIN(cabac, sao->type != SAO_TYPE_NONE, "sao_type_idx");
475     if (sao->type == SAO_TYPE_BAND) {
476       CABAC_BIN_EP(cabac, 0, "sao_type_idx_ep");
477     } else if (sao->type == SAO_TYPE_EDGE) {
478       CABAC_BIN_EP(cabac, 1, "sao_type_idx_ep");
479     }
480   }
481 
482   if (sao->type == SAO_TYPE_NONE) return;
483 
484   /// sao_offset_abs[][][][]: TR, cMax = (1 << (Min(bitDepth, 10) - 5)) - 1,
485   ///                         cRiceParam = 0, bins = {bypass x N}
486   for (i = SAO_EO_CAT1; i <= SAO_EO_CAT4; ++i) {
487     kvz_cabac_write_unary_max_symbol_ep(cabac, abs(sao->offsets[i + offset_index]), SAO_ABS_OFFSET_MAX);
488   }
489 
490   /// sao_offset_sign[][][][]: FL, cMax = 1, bins = {bypass}
491   /// sao_band_position[][][]: FL, cMax = 31, bins = {bypass x N}
492   /// sao_eo_class_luma:       FL, cMax = 3, bins = {bypass x 3}
493   /// sao_eo_class_chroma:     FL, cMax = 3, bins = {bypass x 3}
494   if (sao->type == SAO_TYPE_BAND) {
495     for (i = SAO_EO_CAT1; i <= SAO_EO_CAT4; ++i) {
496       // Positive sign is coded as 0.
497       if (sao->offsets[i + offset_index] != 0) {
498         CABAC_BIN_EP(cabac, sao->offsets[i + offset_index] < 0 ? 1 : 0, "sao_offset_sign");
499       }
500     }
501     // TODO: sao_band_position
502     // FL cMax=31 (5 bits)
503     CABAC_BINS_EP(cabac, sao->band_position[color_i == COLOR_V ? 1:0], 5, "sao_band_position");
504   } else if (color_i != COLOR_V) {
505     CABAC_BINS_EP(cabac, sao->eo_class, 2, "sao_eo_class");
506   }
507 }
508 
encode_sao_merge_flags(encoder_state_t * const state,sao_info_t * sao,unsigned x_ctb,unsigned y_ctb)509 static void encode_sao_merge_flags(encoder_state_t * const state, sao_info_t *sao, unsigned x_ctb, unsigned y_ctb)
510 {
511   cabac_data_t * const cabac = &state->cabac;
512   // SAO merge flags are not present for the first row and column.
513   if (x_ctb > 0) {
514     cabac->cur_ctx = &(cabac->ctx.sao_merge_flag_model);
515     CABAC_BIN(cabac, sao->merge_left_flag, "sao_merge_left_flag");
516   }
517   if (y_ctb > 0 && !sao->merge_left_flag) {
518     cabac->cur_ctx = &(cabac->ctx.sao_merge_flag_model);
519     CABAC_BIN(cabac, sao->merge_up_flag, "sao_merge_up_flag");
520   }
521 }
522 
523 
524 /**
525  * \brief Encode SAO information.
526  */
encode_sao(encoder_state_t * const state,unsigned x_lcu,uint16_t y_lcu,sao_info_t * sao_luma,sao_info_t * sao_chroma)527 static void encode_sao(encoder_state_t * const state,
528                        unsigned x_lcu, uint16_t y_lcu,
529                        sao_info_t *sao_luma, sao_info_t *sao_chroma)
530 {
531   // TODO: transmit merge flags outside sao_info
532   encode_sao_merge_flags(state, sao_luma, x_lcu, y_lcu);
533 
534   // If SAO is merged, nothing else needs to be coded.
535   if (!sao_luma->merge_left_flag && !sao_luma->merge_up_flag) {
536     encode_sao_color(state, sao_luma, COLOR_Y);
537     if (state->encoder_control->chroma_format != KVZ_CSP_400) {
538       encode_sao_color(state, sao_chroma, COLOR_U);
539       encode_sao_color(state, sao_chroma, COLOR_V);
540     }
541   }
542 }
543 
544 
545 /**
546  * \brief Sets the QP for each CU in state->tile->frame->cu_array.
547  *
548  * The QPs are used in deblocking and QP prediction.
549  *
550  * The QP delta for a quantization group is coded when the first CU with
551  * coded block flag set is encountered. Hence, for the purposes of
552  * deblocking and QP prediction, all CUs in before the first one that has
553  * cbf set use the QP predictor and all CUs after that use (QP predictor
554  * + QP delta).
555  *
556  * \param state           encoder state
557  * \param x               x-coordinate of the left edge of the root CU
558  * \param y               y-coordinate of the top edge of the root CU
559  * \param depth           depth in the CU quadtree
560  * \param last_qp         QP of the last CU in the last quantization group
561  * \param prev_qp         -1 if QP delta has not been coded in current QG,
562  *                        otherwise the QP of the current QG
563  */
set_cu_qps(encoder_state_t * state,int x,int y,int depth,int * last_qp,int * prev_qp)564 static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *last_qp, int *prev_qp)
565 {
566 
567   // Stop recursion if the CU is completely outside the frame.
568   if (x >= state->tile->frame->width || y >= state->tile->frame->height) return;
569 
570   cu_info_t *cu = kvz_cu_array_at(state->tile->frame->cu_array, x, y);
571   const int cu_width = LCU_WIDTH >> depth;
572 
573   if (depth <= state->encoder_control->max_qp_delta_depth) {
574     *prev_qp = -1;
575   }
576 
577   if (cu->depth > depth) {
578     // Recursively process sub-CUs.
579     const int d = cu_width >> 1;
580     set_cu_qps(state, x,     y,     depth + 1, last_qp, prev_qp);
581     set_cu_qps(state, x + d, y,     depth + 1, last_qp, prev_qp);
582     set_cu_qps(state, x,     y + d, depth + 1, last_qp, prev_qp);
583     set_cu_qps(state, x + d, y + d, depth + 1, last_qp, prev_qp);
584 
585   } else {
586     bool cbf_found = *prev_qp >= 0;
587 
588     if (cu->tr_depth > depth) {
589       // The CU is split into smaller transform units. Check whether coded
590       // block flag is set for any of the TUs.
591       const int tu_width = LCU_WIDTH >> cu->tr_depth;
592       for (int y_scu = y; !cbf_found && y_scu < y + cu_width; y_scu += tu_width) {
593         for (int x_scu = x; !cbf_found && x_scu < x + cu_width; x_scu += tu_width) {
594           cu_info_t *tu = kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu);
595           if (cbf_is_set_any(tu->cbf, cu->depth)) {
596             cbf_found = true;
597           }
598         }
599       }
600     } else if (cbf_is_set_any(cu->cbf, cu->depth)) {
601       cbf_found = true;
602     }
603 
604     int8_t qp;
605     if (cbf_found) {
606       *prev_qp = qp = cu->qp;
607     } else {
608       qp = kvz_get_cu_ref_qp(state, x, y, *last_qp);
609     }
610 
611     // Set the correct QP for all state->tile->frame->cu_array elements in
612     // the area covered by the CU.
613     for (int y_scu = y; y_scu < y + cu_width; y_scu += SCU_WIDTH) {
614       for (int x_scu = x; x_scu < x + cu_width; x_scu += SCU_WIDTH) {
615         kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu)->qp = qp;
616       }
617     }
618 
619     if (is_last_cu_in_qg(state, x, y, depth)) {
620       *last_qp = cu->qp;
621     }
622   }
623 }
624 
625 
encoder_state_worker_encode_lcu(void * opaque)626 static void encoder_state_worker_encode_lcu(void * opaque)
627 {
628   const lcu_order_element_t * const lcu = opaque;
629   encoder_state_t *state = lcu->encoder_state;
630   const encoder_control_t * const encoder = state->encoder_control;
631   videoframe_t* const frame = state->tile->frame;
632 
633   switch (encoder->cfg.rc_algorithm) {
634     case KVZ_NO_RC:
635     case KVZ_LAMBDA:
636       kvz_set_lcu_lambda_and_qp(state, lcu->position);
637       break;
638     case KVZ_OBA:
639       kvz_set_ctu_qp_lambda(state, lcu->position);
640       break;
641     default:
642       assert(0);
643   }
644 
645   lcu_coeff_t coeff;
646   state->coeff = &coeff;
647 
648   //This part doesn't write to bitstream, it's only search, deblock and sao
649   kvz_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search);
650 
651   encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search);
652 
653   if (encoder->max_qp_delta_depth >= 0) {
654     int last_qp = state->last_qp;
655     int prev_qp = -1;
656     set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp);
657   }
658 
659   if (encoder->cfg.deblock_enable) {
660     kvz_filter_deblock_lcu(state, lcu->position_px.x, lcu->position_px.y);
661   }
662 
663   if (encoder->cfg.sao_type) {
664     // Save the post-deblocking but pre-SAO pixels of the LCU to a buffer
665     // so that they can be used in SAO reconstruction later.
666     encoder_state_recdata_before_sao_to_bufs(state,
667                                              lcu,
668                                              state->tile->hor_buf_before_sao,
669                                              state->tile->ver_buf_before_sao);
670     kvz_sao_search_lcu(state, lcu->position.x, lcu->position.y);
671     encoder_sao_reconstruct(state, lcu);
672   }
673 
674   //Now write data to bitstream (required to have a correct CABAC state)
675   const uint64_t existing_bits = kvz_bitstream_tell(&state->stream);
676 
677   //Encode SAO
678   if (encoder->cfg.sao_type) {
679     encode_sao(state, lcu->position.x, lcu->position.y, &frame->sao_luma[lcu->position.y * frame->width_in_lcu + lcu->position.x], &frame->sao_chroma[lcu->position.y * frame->width_in_lcu + lcu->position.x]);
680   }
681 
682   //Encode coding tree
683   kvz_encode_coding_tree(state, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, 0);
684 
685   // Coeffs are not needed anymore.
686   state->coeff = NULL;
687 
688   bool end_of_slice_segment_flag;
689   if (state->encoder_control->cfg.slices & KVZ_SLICES_WPP) {
690     // Slice segments end after each WPP row.
691     end_of_slice_segment_flag = lcu->last_column;
692   } else if (state->encoder_control->cfg.slices & KVZ_SLICES_TILES) {
693     // Slices end after each tile.
694     end_of_slice_segment_flag = lcu->last_column && lcu->last_row;
695   } else {
696     // Slice ends after the last row of the last tile.
697     int last_tile_id = -1 + encoder->cfg.tiles_width_count * encoder->cfg.tiles_height_count;
698     bool is_last_tile = state->tile->id == last_tile_id;
699     end_of_slice_segment_flag = is_last_tile && lcu->last_column && lcu->last_row;
700   }
701   kvz_cabac_encode_bin_trm(&state->cabac, end_of_slice_segment_flag);
702 
703   {
704     const bool end_of_tile = lcu->last_column && lcu->last_row;
705     const bool end_of_wpp_row = encoder->cfg.wpp && lcu->last_column;
706 
707 
708     if (end_of_tile || end_of_wpp_row) {
709       if (!end_of_slice_segment_flag) {
710         // end_of_sub_stream_one_bit
711         kvz_cabac_encode_bin_trm(&state->cabac, 1);
712       }
713 
714       // Finish the substream by writing out remaining state.
715       kvz_cabac_finish(&state->cabac);
716 
717       // Write a rbsp_trailing_bits or a byte_alignment. The first one is used
718       // for ending a slice_segment_layer_rbsp and the second one for ending
719       // a substream. They are identical and align the byte stream.
720       kvz_bitstream_put(state->cabac.stream, 1, 1);
721       kvz_bitstream_align_zero(state->cabac.stream);
722 
723       kvz_cabac_start(&state->cabac);
724 
725       kvz_crypto_delete(&state->crypto_hdl);
726     }
727   }
728 
729   pthread_mutex_lock(&state->frame->rc_lock);
730   const uint32_t bits = kvz_bitstream_tell(&state->stream) - existing_bits;
731   state->frame->cur_frame_bits_coded += bits;
732   // This variable is used differently by intra and inter frames and shouldn't
733   // be touched in intra frames here
734   state->frame->remaining_weight -= !state->frame->is_irap ?
735     kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->original_weight :
736     0;
737   pthread_mutex_unlock(&state->frame->rc_lock);
738   kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->bits = bits;
739 
740   uint8_t not_skip = false;
741   for(int y = 0; y < 64 && !not_skip; y+=8) {
742     for(int x = 0; x < 64 && !not_skip; x+=8) {
743       not_skip |= !kvz_cu_array_at_const(state->tile->frame->cu_array,
744         lcu->position_px.x + x,
745         lcu->position_px.y + y)->skipped;
746     }
747   }
748   kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->skipped = !not_skip;
749 
750   //Wavefronts need the context to be copied to the next row
751   if (state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW && lcu->index == 1) {
752     int j;
753     //Find next encoder (next row)
754     for (j=0; state->parent->children[j].encoder_control; ++j) {
755       if (state->parent->children[j].wfrow->lcu_offset_y == state->wfrow->lcu_offset_y + 1) {
756         //And copy context
757         kvz_context_copy(&state->parent->children[j], state);
758       }
759     }
760   }
761 }
762 
encoder_state_encode_leaf(encoder_state_t * const state)763 static void encoder_state_encode_leaf(encoder_state_t * const state)
764 {
765   assert(state->is_leaf);
766   assert(state->lcu_order_count > 0);
767 
768   const encoder_control_t *ctrl = state->encoder_control;
769   const kvz_config *cfg = &ctrl->cfg;
770 
771   // Signaled slice QP may be different to frame QP with set-qp-in-cu enabled.
772   state->last_qp = ctrl->cfg.set_qp_in_cu ? 26 : state->frame->QP;
773 
774   if (cfg->crypto_features) {
775     state->crypto_hdl = kvz_crypto_create(cfg);
776     state->crypto_prev_pos = 0;
777   }
778 
779   // Select whether to encode the frame/tile in current thread or to define
780   // wavefront jobs for other threads to handle.
781   bool wavefront = state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW;
782   bool use_parallel_encoding = (wavefront && state->parent->children[1].encoder_control);
783   if (!use_parallel_encoding) {
784     // Encode every LCU in order and perform SAO reconstruction after every
785     // frame is encoded. Deblocking and SAO search is done during LCU encoding.
786 
787     for (int i = 0; i < state->lcu_order_count; ++i) {
788       encoder_state_worker_encode_lcu(&state->lcu_order[i]);
789     }
790   } else {
791     // Add each LCU in the wavefront row as it's own job to the queue.
792 
793     // Select which frame dependancies should be set to.
794     const encoder_state_t * ref_state = NULL;
795 
796     if (state->frame->slicetype == KVZ_SLICE_I) {
797       // I-frames have no references.
798       ref_state = NULL;
799     } else if (cfg->gop_lowdelay &&
800                cfg->gop_len > 0 &&
801                state->previous_encoder_state != state)
802     {
803       // For LP-gop, depend on the state of the first reference.
804       int ref_neg = cfg->gop[state->frame->gop_offset].ref_neg[0];
805       if (ref_neg > cfg->owf) {
806         // If frame is not within OWF range, it's already done.
807         ref_state = NULL;
808       } else {
809         ref_state = state->previous_encoder_state;
810         while (ref_neg > 1) {
811           ref_neg -= 1;
812           ref_state = ref_state->previous_encoder_state;
813         }
814       }
815     } else {
816       // Otherwise, depend on the previous frame.
817       ref_state = state->previous_encoder_state;
818     }
819 
820     for (int i = 0; i < state->lcu_order_count; ++i) {
821       const lcu_order_element_t * const lcu = &state->lcu_order[i];
822 
823       kvz_threadqueue_free_job(&state->tile->wf_jobs[lcu->id]);
824       state->tile->wf_jobs[lcu->id] = kvz_threadqueue_job_create(encoder_state_worker_encode_lcu, (void*)lcu);
825       threadqueue_job_t **job = &state->tile->wf_jobs[lcu->id];
826 
827       // If job object was returned, add dependancies and allow it to run.
828       if (job[0]) {
829         // Add inter frame dependancies when ecoding more than one frame at
830         // once. The added dependancy is for the first LCU of each wavefront
831         // row to depend on the reconstruction status of the row below in the
832         // previous frame.
833         if (ref_state != NULL &&
834             state->previous_encoder_state->tqj_recon_done &&
835             state->frame->slicetype != KVZ_SLICE_I)
836         {
837           // We need to wait until the CTUs whose pixels we refer to are
838           // done before we can start this CTU.
839           const lcu_order_element_t *dep_lcu = lcu;
840           for (int i = 0; dep_lcu->below && i < ctrl->max_inter_ref_lcu.down; i++) {
841             dep_lcu = dep_lcu->below;
842           }
843           for (int i = 0; dep_lcu->right && i < ctrl->max_inter_ref_lcu.right; i++) {
844             dep_lcu = dep_lcu->right;
845           }
846           kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[dep_lcu->id]);
847 
848           //TODO: Preparation for the lock free implementation of the new rc
849           if (ref_state->frame->slicetype == KVZ_SLICE_I && ref_state->frame->num != 0 && state->encoder_control->cfg.owf > 1 && true) {
850             kvz_threadqueue_job_dep_add(job[0], ref_state->previous_encoder_state->tile->wf_jobs[dep_lcu->id]);
851           }
852 
853           // Very spesific bug that happens when owf length is longer than the
854           // gop length. Takes care of that.
855           if(!state->encoder_control->cfg.gop_lowdelay &&
856              state->encoder_control->cfg.open_gop &&
857              state->encoder_control->cfg.gop_len != 0 &&
858              state->encoder_control->cfg.owf > state->encoder_control->cfg.gop_len &&
859              ref_state->frame->slicetype == KVZ_SLICE_I &&
860              ref_state->frame->num != 0){
861 
862             while (ref_state->frame->poc != state->frame->poc - state->encoder_control->cfg.gop_len){
863               ref_state = ref_state->previous_encoder_state;
864             }
865             kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[dep_lcu->id]);
866           }
867         }
868 
869         // Add local WPP dependancy to the LCU on the left.
870         if (lcu->left) {
871           kvz_threadqueue_job_dep_add(job[0], job[-1]);
872         }
873         // Add local WPP dependancy to the LCU on the top right.
874         if (lcu->above) {
875           if (lcu->above->right) {
876             kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu + 1]);
877           } else {
878             kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu]);
879           }
880         }
881 
882         kvz_threadqueue_submit(state->encoder_control->threadqueue, state->tile->wf_jobs[lcu->id]);
883 
884         // The wavefront row is done when the last LCU in the row is done.
885         if (i + 1 == state->lcu_order_count) {
886           assert(!state->tqj_recon_done);
887           state->tqj_recon_done =
888             kvz_threadqueue_copy_ref(state->tile->wf_jobs[lcu->id]);
889         }
890       }
891     }
892   }
893 }
894 
895 static void encoder_state_encode(encoder_state_t * const main_state);
896 
encoder_state_worker_encode_children(void * opaque)897 static void encoder_state_worker_encode_children(void * opaque)
898 {
899   encoder_state_t *sub_state = opaque;
900   encoder_state_encode(sub_state);
901 
902   if (sub_state->is_leaf && sub_state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
903     // Set the last wavefront job of this row as the job that completes
904     // the bitstream for this wavefront row state.
905 
906     int wpp_row = sub_state->wfrow->lcu_offset_y;
907     int tile_width = sub_state->tile->frame->width_in_lcu;
908     int end_of_row = (wpp_row + 1) * tile_width - 1;
909     assert(!sub_state->tqj_bitstream_written);
910     if (sub_state->tile->wf_jobs[end_of_row]) {
911       sub_state->tqj_bitstream_written =
912         kvz_threadqueue_copy_ref(sub_state->tile->wf_jobs[end_of_row]);
913     }
914   }
915 }
916 
encoder_state_tree_is_a_chain(const encoder_state_t * const state)917 static int encoder_state_tree_is_a_chain(const encoder_state_t * const state) {
918   if (!state->children[0].encoder_control) return 1;
919   if (state->children[1].encoder_control) return 0;
920   return encoder_state_tree_is_a_chain(&state->children[0]);
921 }
922 
encoder_state_encode(encoder_state_t * const main_state)923 static void encoder_state_encode(encoder_state_t * const main_state) {
924   //If we have children, encode at child level
925   if (main_state->children[0].encoder_control) {
926     //If we have only one child, than it cannot be the last split in tree
927     int node_is_the_last_split_in_tree = (main_state->children[1].encoder_control != 0);
928 
929     for (int i = 0; main_state->children[i].encoder_control; ++i) {
930       encoder_state_t *sub_state = &(main_state->children[i]);
931 
932       if (sub_state->tile != main_state->tile) {
933         const int offset_x = sub_state->tile->offset_x;
934         const int offset_y = sub_state->tile->offset_y;
935         const int width = MIN(sub_state->tile->frame->width_in_lcu * LCU_WIDTH, main_state->tile->frame->width - offset_x);
936         const int height = MIN(sub_state->tile->frame->height_in_lcu * LCU_WIDTH, main_state->tile->frame->height - offset_y);
937 
938         kvz_image_free(sub_state->tile->frame->source);
939         sub_state->tile->frame->source = NULL;
940 
941         kvz_image_free(sub_state->tile->frame->rec);
942         sub_state->tile->frame->rec = NULL;
943 
944         kvz_cu_array_free(&sub_state->tile->frame->cu_array);
945 
946         sub_state->tile->frame->source = kvz_image_make_subimage(
947             main_state->tile->frame->source,
948             offset_x,
949             offset_y,
950             width,
951             height
952         );
953         sub_state->tile->frame->rec = kvz_image_make_subimage(
954             main_state->tile->frame->rec,
955             offset_x,
956             offset_y,
957             width,
958             height
959         );
960         sub_state->tile->frame->cu_array = kvz_cu_subarray(
961             main_state->tile->frame->cu_array,
962             offset_x,
963             offset_y,
964             sub_state->tile->frame->width_in_lcu * LCU_WIDTH,
965             sub_state->tile->frame->height_in_lcu * LCU_WIDTH
966         );
967       }
968 
969       //To be the last split, we require that every child is a chain
970       node_is_the_last_split_in_tree =
971         node_is_the_last_split_in_tree &&
972         encoder_state_tree_is_a_chain(&main_state->children[i]);
973     }
974     //If it's the latest split point
975     if (node_is_the_last_split_in_tree) {
976       for (int i = 0; main_state->children[i].encoder_control; ++i) {
977         //If we don't have wavefronts, parallelize encoding of children.
978         if (main_state->children[i].type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
979           kvz_threadqueue_free_job(&main_state->children[i].tqj_recon_done);
980           main_state->children[i].tqj_recon_done =
981             kvz_threadqueue_job_create(encoder_state_worker_encode_children, &main_state->children[i]);
982           if (main_state->children[i].previous_encoder_state != &main_state->children[i] &&
983               main_state->children[i].previous_encoder_state->tqj_recon_done &&
984               !main_state->children[i].frame->is_irap)
985           {
986 #if 0
987             // Disabled due to non-determinism.
988             if (main_state->encoder_control->cfg->mv_constraint == KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN)
989             {
990               // When MV's don't cross tile boundaries, add dependancy only to the same tile.
991               kvz_threadqueue_job_dep_add(main_state->children[i].tqj_recon_done, main_state->children[i].previous_encoder_state->tqj_recon_done);
992             } else
993 #endif
994             {
995               // Add dependancy to each child in the previous frame.
996               for (int child_id = 0; main_state->children[child_id].encoder_control; ++child_id) {
997                 kvz_threadqueue_job_dep_add(main_state->children[i].tqj_recon_done, main_state->children[child_id].previous_encoder_state->tqj_recon_done);
998               }
999             }
1000           }
1001           kvz_threadqueue_submit(main_state->encoder_control->threadqueue, main_state->children[i].tqj_recon_done);
1002         } else {
1003           //Wavefront rows have parallelism at LCU level, so we should not launch multiple threads here!
1004           //FIXME: add an assert: we can only have wavefront children
1005           encoder_state_worker_encode_children(&(main_state->children[i]));
1006         }
1007       }
1008     } else {
1009       for (int i = 0; main_state->children[i].encoder_control; ++i) {
1010         encoder_state_worker_encode_children(&(main_state->children[i]));
1011       }
1012     }
1013   } else {
1014     switch (main_state->type) {
1015       case ENCODER_STATE_TYPE_TILE:
1016       case ENCODER_STATE_TYPE_SLICE:
1017       case ENCODER_STATE_TYPE_WAVEFRONT_ROW:
1018         encoder_state_encode_leaf(main_state);
1019         break;
1020       default:
1021         fprintf(stderr, "Unsupported leaf type %c!\n", main_state->type);
1022         assert(0);
1023     }
1024   }
1025 }
1026 
1027 
encoder_ref_insertion_sort(const encoder_state_t * const state,uint8_t reflist[16],uint8_t length,bool reverse)1028 static void encoder_ref_insertion_sort(const encoder_state_t *const state,
1029                                        uint8_t reflist[16],
1030                                        uint8_t length,
1031                                        bool reverse)
1032 {
1033 
1034   for (uint8_t i = 1; i < length; ++i) {
1035     const uint8_t cur_idx = reflist[i];
1036     const int32_t cur_poc = state->frame->ref->pocs[cur_idx];
1037     int8_t j = i;
1038     while ((j > 0 && !reverse && cur_poc > state->frame->ref->pocs[reflist[j - 1]]) ||
1039            (j > 0 &&  reverse && cur_poc < state->frame->ref->pocs[reflist[j - 1]]))
1040     {
1041       reflist[j] = reflist[j - 1];
1042       --j;
1043     }
1044     reflist[j] = cur_idx;
1045   }
1046 }
1047 
1048 /**
1049  * \brief Generate reference picture lists.
1050  *
1051  * \param state             main encoder state
1052  */
kvz_encoder_create_ref_lists(const encoder_state_t * const state)1053 void kvz_encoder_create_ref_lists(const encoder_state_t *const state)
1054 {
1055   const kvz_config *cfg = &state->encoder_control->cfg;
1056 
1057   FILL_ARRAY(state->frame->ref_LX_size, 0, 2);
1058 
1059   int num_negative = 0;
1060   int num_positive = 0;
1061 
1062   // Add positive references to L1 list
1063   for (int i = 0; i < state->frame->ref->used_size; i++) {
1064     if (state->frame->ref->pocs[i] > state->frame->poc) {
1065       state->frame->ref_LX[1][state->frame->ref_LX_size[1]] = i;
1066       state->frame->ref_LX_size[1] += 1;
1067       num_positive++;
1068     }
1069   }
1070 
1071   // Add negative references to L1 list when bipred is enabled and GOP is
1072   // either disabled or does not use picture reordering.
1073   bool l1_negative_refs =
1074     (cfg->bipred && (cfg->gop_len == 0 || cfg->gop_lowdelay));
1075 
1076   // Add negative references to L0 and L1 lists.
1077   for (int i = 0; i < state->frame->ref->used_size; i++) {
1078     if (state->frame->ref->pocs[i] < state->frame->poc) {
1079       state->frame->ref_LX[0][state->frame->ref_LX_size[0]] = i;
1080       state->frame->ref_LX_size[0] += 1;
1081       if (l1_negative_refs) {
1082         state->frame->ref_LX[1][state->frame->ref_LX_size[1]] = i;
1083         state->frame->ref_LX_size[1] += 1;
1084       }
1085       num_negative++;
1086     }
1087   }
1088 
1089   // Fill the rest with -1.
1090   for (int i = state->frame->ref_LX_size[0]; i < 16; i++) {
1091     state->frame->ref_LX[0][i] = 0xff;
1092   }
1093   for (int i = state->frame->ref_LX_size[1]; i < 16; i++) {
1094     state->frame->ref_LX[1][i] = 0xff;
1095   }
1096 
1097   // Sort reference lists.
1098   encoder_ref_insertion_sort(state, state->frame->ref_LX[0], num_negative, false);
1099   encoder_ref_insertion_sort(state, state->frame->ref_LX[1], num_positive, true);
1100   if (l1_negative_refs) {
1101     encoder_ref_insertion_sort(state, state->frame->ref_LX[1] + num_positive, num_negative, false);
1102   }
1103 }
1104 
1105 /**
1106  * \brief Remove any references that should no longer be used.
1107  */
encoder_state_remove_refs(encoder_state_t * state)1108 static void encoder_state_remove_refs(encoder_state_t *state) {
1109   const encoder_control_t * const encoder = state->encoder_control;
1110 
1111   int neg_refs = encoder->cfg.gop[state->frame->gop_offset].ref_neg_count;
1112   int pos_refs = encoder->cfg.gop[state->frame->gop_offset].ref_pos_count;
1113 
1114   unsigned target_ref_num;
1115   if (encoder->cfg.gop_len) {
1116     target_ref_num = neg_refs + pos_refs;
1117   } else {
1118     target_ref_num = encoder->cfg.ref_frames;
1119   }
1120 
1121   if (state->frame->pictype == KVZ_NAL_IDR_W_RADL ||
1122       state->frame->pictype == KVZ_NAL_IDR_N_LP)
1123   {
1124     target_ref_num = 0;
1125   }
1126 
1127   if (encoder->cfg.gop_len && target_ref_num > 0) {
1128     // With GOP in use, go through all the existing reference pictures and
1129     // remove any picture that is not referenced by the current picture.
1130 
1131     for (int ref = state->frame->ref->used_size - 1; ref >= 0; --ref) {
1132       bool is_referenced = false;
1133 
1134       int ref_poc = state->frame->ref->pocs[ref];
1135 
1136       for (int i = 0; i < neg_refs; i++) {
1137         int ref_relative_poc = -encoder->cfg.gop[state->frame->gop_offset].ref_neg[i];
1138         if (ref_poc == state->frame->poc + ref_relative_poc) {
1139           is_referenced = true;
1140           break;
1141         }
1142       }
1143 
1144       for (int i = 0; i < pos_refs; i++) {
1145         int ref_relative_poc = encoder->cfg.gop[state->frame->gop_offset].ref_pos[i];
1146         if (ref_poc == state->frame->poc + ref_relative_poc) {
1147           is_referenced = true;
1148           break;
1149         }
1150       }
1151 
1152       if (ref_poc < state->frame->irap_poc &&
1153           state->frame->irap_poc < state->frame->poc)
1154       {
1155         // Trailing frames cannot refer to leading frames.
1156         is_referenced = false;
1157       }
1158 
1159       if (encoder->cfg.intra_period > 0 &&
1160           ref_poc < state->frame->irap_poc - encoder->cfg.intra_period)
1161       {
1162         // No frame can refer past the two preceding IRAP frames.
1163         is_referenced = false;
1164       }
1165 
1166       if (!is_referenced) {
1167         // This reference is not referred to by this frame, it must be removed.
1168         kvz_image_list_rem(state->frame->ref, ref);
1169       }
1170     }
1171   } else {
1172     // Without GOP, remove the oldest picture.
1173     while (state->frame->ref->used_size > target_ref_num) {
1174       int8_t oldest_ref = state->frame->ref->used_size - 1;
1175       kvz_image_list_rem(state->frame->ref, oldest_ref);
1176     }
1177   }
1178 
1179   assert(state->frame->ref->used_size <= target_ref_num);
1180 }
1181 
encoder_set_source_picture(encoder_state_t * const state,kvz_picture * frame)1182 static void encoder_set_source_picture(encoder_state_t * const state, kvz_picture* frame)
1183 {
1184   assert(!state->tile->frame->source);
1185   assert(!state->tile->frame->rec);
1186 
1187   state->tile->frame->source = frame;
1188   if (state->encoder_control->cfg.lossless) {
1189     // In lossless mode, the reconstruction is equal to the source frame.
1190     state->tile->frame->rec = kvz_image_copy_ref(frame);
1191   } else {
1192     state->tile->frame->rec = kvz_image_alloc(state->encoder_control->chroma_format, frame->width, frame->height);
1193     state->tile->frame->rec->dts = frame->dts;
1194     state->tile->frame->rec->pts = frame->pts;
1195   }
1196 
1197   kvz_videoframe_set_poc(state->tile->frame, state->frame->poc);
1198 }
1199 
encoder_state_init_children(encoder_state_t * const state)1200 static void encoder_state_init_children(encoder_state_t * const state) {
1201   kvz_bitstream_clear(&state->stream);
1202 
1203   if (state->is_leaf) {
1204     //Leaf states have cabac and context
1205     kvz_cabac_start(&state->cabac);
1206     kvz_init_contexts(state, state->encoder_control->cfg.set_qp_in_cu ? 26 : state->frame->QP, state->frame->slicetype);
1207   }
1208 
1209   //Clear the jobs
1210   kvz_threadqueue_free_job(&state->tqj_bitstream_written);
1211   kvz_threadqueue_free_job(&state->tqj_recon_done);
1212 
1213   //Copy the constraint pointer
1214   // TODO: Try to do it in the if (state->is_leaf)
1215   //if (state->parent != NULL) {
1216     // state->constraint = state->parent->constraint;
1217   //}
1218 
1219   for (int i = 0; state->children[i].encoder_control; ++i) {
1220     encoder_state_init_children(&state->children[i]);
1221   }
1222 }
1223 
normalize_lcu_weights(encoder_state_t * const state)1224 static void normalize_lcu_weights(encoder_state_t * const state)
1225 {
1226   if (state->frame->num == 0) return;
1227 
1228   const uint32_t num_lcus = state->encoder_control->in.width_in_lcu *
1229                             state->encoder_control->in.height_in_lcu;
1230   double sum = 0.0;
1231   for (uint32_t i = 0; i < num_lcus; i++) {
1232     sum += state->frame->lcu_stats[i].weight;
1233   }
1234 
1235   for (uint32_t i = 0; i < num_lcus; i++) {
1236     state->frame->lcu_stats[i].weight /= sum;
1237   }
1238 }
1239 
1240 // Check if lcu is edge lcu. Return false if frame dimensions are 64 divisible
edge_lcu(int id,int lcus_x,int lcus_y,bool xdiv64,bool ydiv64)1241 static bool edge_lcu(int id, int lcus_x, int lcus_y, bool xdiv64, bool ydiv64)
1242 {
1243   if (xdiv64 && ydiv64) {
1244     return false;
1245   }
1246   int last_row_first_id = (lcus_y - 1) * lcus_x;
1247   if ((id % lcus_x == lcus_x - 1 && !xdiv64) || (id >= last_row_first_id && !ydiv64)) {
1248     return true;
1249   }
1250   else {
1251     return false;
1252   }
1253 }
1254 
encoder_state_init_new_frame(encoder_state_t * const state,kvz_picture * frame)1255 static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_picture* frame) {
1256   assert(state->type == ENCODER_STATE_TYPE_MAIN);
1257 
1258   const kvz_config * const cfg = &state->encoder_control->cfg;
1259 
1260   encoder_set_source_picture(state, frame);
1261 
1262   assert(!state->tile->frame->cu_array);
1263   state->tile->frame->cu_array = kvz_cu_array_alloc(
1264       state->tile->frame->width,
1265       state->tile->frame->height
1266   );
1267 
1268   // Variance adaptive quantization
1269   if (cfg->vaq) {
1270     const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
1271     double d = cfg->vaq * 0.1; // Empirically decided constant. Affects delta-QP strength
1272 
1273     // Calculate frame pixel variance
1274     uint32_t len = state->tile->frame->width * state->tile->frame->height;
1275     uint32_t c_len = len / 4;
1276     double frame_var = kvz_pixel_var(state->tile->frame->source->y, len);
1277     if (has_chroma) {
1278       frame_var += kvz_pixel_var(state->tile->frame->source->u, c_len);
1279       frame_var += kvz_pixel_var(state->tile->frame->source->v, c_len);
1280     }
1281 
1282     // Loop through LCUs
1283     // For each LCU calculate: D * (log(LCU pixel variance) - log(frame pixel variance))
1284     unsigned x_lim = state->tile->frame->width_in_lcu;
1285     unsigned y_lim = state->tile->frame->height_in_lcu;
1286 
1287     unsigned id = 0;
1288     for (int y = 0; y < y_lim; ++y) {
1289       for (int x = 0; x < x_lim; ++x) {
1290         kvz_pixel tmp[LCU_LUMA_SIZE];
1291         int pxl_x = x * LCU_WIDTH;
1292         int pxl_y = y * LCU_WIDTH;
1293         int x_max = MIN(pxl_x + LCU_WIDTH, frame->width) - pxl_x;
1294         int y_max = MIN(pxl_y + LCU_WIDTH, frame->height) - pxl_y;
1295 
1296         bool xdiv64 = false;
1297         bool ydiv64 = false;
1298         if (frame->width % 64 == 0) xdiv64 = true;
1299         if (frame->height % 64 == 0) ydiv64 = true;
1300 
1301         // Luma variance
1302         if (!edge_lcu(id, x_lim, y_lim, xdiv64, ydiv64)) {
1303           kvz_pixels_blit(&state->tile->frame->source->y[pxl_x + pxl_y * state->tile->frame->source->stride], tmp,
1304             x_max, y_max, state->tile->frame->source->stride, LCU_WIDTH);
1305         } else {
1306           // Extend edge pixels for edge lcus
1307           for (int y = 0; y < LCU_WIDTH; y++) {
1308             for (int x = 0; x < LCU_WIDTH; x++) {
1309               int src_y = CLIP(0, frame->height - 1, pxl_y + y);
1310               int src_x = CLIP(0, frame->width - 1, pxl_x + x);
1311               tmp[y * LCU_WIDTH + x] = state->tile->frame->source->y[src_y * state->tile->frame->source->stride + src_x];
1312             }
1313           }
1314         }
1315 
1316         double lcu_var = kvz_pixel_var(tmp, LCU_LUMA_SIZE);
1317 
1318         if (has_chroma) {
1319           // Add chroma variance if not monochrome
1320           int32_t c_stride = state->tile->frame->source->stride >> 1;
1321           kvz_pixel chromau_tmp[LCU_CHROMA_SIZE];
1322           kvz_pixel chromav_tmp[LCU_CHROMA_SIZE];
1323           int lcu_chroma_width = LCU_WIDTH >> 1;
1324           int c_pxl_x = x * lcu_chroma_width;
1325           int c_pxl_y = y * lcu_chroma_width;
1326           int c_x_max = MIN(c_pxl_x + lcu_chroma_width, frame->width >> 1) - c_pxl_x;
1327           int c_y_max = MIN(c_pxl_y + lcu_chroma_width, frame->height >> 1) - c_pxl_y;
1328 
1329           if (!edge_lcu(id, x_lim, y_lim, xdiv64, ydiv64)) {
1330             kvz_pixels_blit(&state->tile->frame->source->u[c_pxl_x + c_pxl_y * c_stride], chromau_tmp, c_x_max, c_y_max, c_stride, lcu_chroma_width);
1331             kvz_pixels_blit(&state->tile->frame->source->v[c_pxl_x + c_pxl_y * c_stride], chromav_tmp, c_x_max, c_y_max, c_stride, lcu_chroma_width);
1332           }
1333           else {
1334             for (int y = 0; y < lcu_chroma_width; y++) {
1335               for (int x = 0; x < lcu_chroma_width; x++) {
1336                 int src_y = CLIP(0, (frame->height >> 1) - 1, c_pxl_y + y);
1337                 int src_x = CLIP(0, (frame->width >> 1) - 1, c_pxl_x + x);
1338                 chromau_tmp[y * lcu_chroma_width + x] = state->tile->frame->source->u[src_y * c_stride + src_x];
1339                 chromav_tmp[y * lcu_chroma_width + x] = state->tile->frame->source->v[src_y * c_stride + src_x];
1340               }
1341             }
1342           }
1343           lcu_var += kvz_pixel_var(chromau_tmp, LCU_CHROMA_SIZE);
1344           lcu_var += kvz_pixel_var(chromav_tmp, LCU_CHROMA_SIZE);
1345         }
1346 
1347         state->frame->aq_offsets[id] = d * (log(lcu_var) - log(frame_var));
1348         id++;
1349       }
1350     }
1351   }
1352   // Variance adaptive quantization - END
1353 
1354   // Use this flag to handle closed gop irap picture selection.
1355   // If set to true, irap is already set and we avoid
1356   // setting it based on the intra period
1357   bool is_closed_normal_gop = false;
1358 
1359   encoder_state_t *previous = state->previous_encoder_state;
1360   int owf = MIN(state->encoder_control->cfg.owf, state->frame->num);
1361 
1362   const int layer = state->encoder_control->cfg.gop[state->frame->gop_offset].layer;
1363 
1364   while (--owf > 0 && layer != state->encoder_control->cfg.gop[previous->frame->gop_offset].layer) {
1365     previous = previous->previous_encoder_state;
1366   }
1367 
1368   if (owf == 0) previous = state;
1369   state->frame->previous_layer_state = previous;
1370   // Set POC.
1371   if (state->frame->num == 0) {
1372     state->frame->poc = 0;
1373   } else if (cfg->gop_len && !cfg->gop_lowdelay) {
1374 
1375     int32_t framenum = state->frame->num - 1;
1376     // Handle closed GOP
1377     // Closed GOP structure has an extra IDR between the GOPs
1378     if (cfg->intra_period > 0 && !cfg->open_gop) {
1379       is_closed_normal_gop = true;
1380       if (framenum % (cfg->intra_period + 1) == cfg->intra_period) {
1381         // Insert IDR before each new GOP after intra period in closed GOP configuration
1382         state->frame->poc = 0;
1383       } else {
1384         // Calculate frame number again and use that for the POC
1385         framenum = framenum % (cfg->intra_period + 1);
1386         int32_t poc_offset = cfg->gop[state->frame->gop_offset].poc_offset;
1387         state->frame->poc = framenum - framenum % cfg->gop_len + poc_offset;
1388         // This should not be an irap picture in closed GOP
1389         state->frame->is_irap = false;
1390       }
1391     } else { // Open GOP
1392       // Calculate POC according to the global frame counter and GOP structure
1393       int32_t poc_offset = cfg->gop[state->frame->gop_offset].poc_offset;
1394       state->frame->poc = framenum - framenum % cfg->gop_len + poc_offset;
1395     }
1396 
1397     kvz_videoframe_set_poc(state->tile->frame, state->frame->poc);
1398   } else if (cfg->intra_period > 0) {
1399     state->frame->poc = state->frame->num % cfg->intra_period;
1400   } else {
1401     state->frame->poc = state->frame->num;
1402   }
1403 
1404   // Check whether the frame is a keyframe or not.
1405   if (state->frame->num == 0 || state->frame->poc == 0) {
1406     state->frame->is_irap = true;
1407   } else if(!is_closed_normal_gop) { // In closed-GOP IDR frames are poc==0 so skip this check
1408     state->frame->is_irap =
1409       cfg->intra_period > 0 &&
1410       (state->frame->poc % cfg->intra_period) == 0;
1411   }
1412   if (state->frame->is_irap) {
1413     state->frame->irap_poc = state->frame->poc;
1414   }
1415 
1416   // Set pictype.
1417   if (state->frame->is_irap) {
1418     if (state->frame->num == 0 ||
1419         cfg->intra_period == 1 ||
1420         cfg->gop_len == 0 ||
1421         cfg->gop_lowdelay ||
1422         !cfg->open_gop) // Closed GOP uses IDR pictures
1423     {
1424       state->frame->pictype = KVZ_NAL_IDR_W_RADL;
1425     } else {
1426       state->frame->pictype = KVZ_NAL_CRA_NUT;
1427     }
1428   } else if (state->frame->poc < state->frame->irap_poc) {
1429     state->frame->pictype = KVZ_NAL_RASL_R;
1430   } else {
1431     state->frame->pictype = KVZ_NAL_TRAIL_R;
1432   }
1433 
1434   encoder_state_remove_refs(state);
1435   kvz_encoder_create_ref_lists(state);
1436 
1437   // Set slicetype.
1438   if (state->frame->is_irap) {
1439     state->frame->slicetype = KVZ_SLICE_I;
1440   } else if (state->frame->ref_LX_size[1] > 0) {
1441     state->frame->slicetype = KVZ_SLICE_B;
1442   } else {
1443     state->frame->slicetype = KVZ_SLICE_P;
1444   }
1445 
1446   if (cfg->target_bitrate > 0 && state->frame->num > cfg->owf) {
1447     normalize_lcu_weights(state);
1448   }
1449   state->frame->cur_frame_bits_coded = 0;
1450 
1451   switch (state->encoder_control->cfg.rc_algorithm) {
1452     case KVZ_NO_RC:
1453     case KVZ_LAMBDA:
1454       kvz_set_picture_lambda_and_qp(state);
1455       break;
1456     case KVZ_OBA:
1457       kvz_estimate_pic_lambda(state);
1458       break;
1459     default:
1460       assert(0);
1461   }
1462 
1463   encoder_state_init_children(state);
1464 }
1465 
_encode_one_frame_add_bitstream_deps(const encoder_state_t * const state,threadqueue_job_t * const job)1466 static void _encode_one_frame_add_bitstream_deps(const encoder_state_t * const state, threadqueue_job_t * const job) {
1467   int i;
1468   for (i = 0; state->children[i].encoder_control; ++i) {
1469     _encode_one_frame_add_bitstream_deps(&state->children[i], job);
1470   }
1471   if (state->tqj_bitstream_written) {
1472     kvz_threadqueue_job_dep_add(job, state->tqj_bitstream_written);
1473   }
1474   if (state->tqj_recon_done) {
1475     kvz_threadqueue_job_dep_add(job, state->tqj_recon_done);
1476   }
1477 }
1478 
1479 
kvz_encode_one_frame(encoder_state_t * const state,kvz_picture * frame)1480 void kvz_encode_one_frame(encoder_state_t * const state, kvz_picture* frame)
1481 {
1482   encoder_state_init_new_frame(state, frame);
1483   encoder_state_encode(state);
1484 
1485   threadqueue_job_t *job =
1486     kvz_threadqueue_job_create(kvz_encoder_state_worker_write_bitstream, state);
1487 
1488   _encode_one_frame_add_bitstream_deps(state, job);
1489   if (state->previous_encoder_state != state && state->previous_encoder_state->tqj_bitstream_written) {
1490     //We need to depend on previous bitstream generation
1491     kvz_threadqueue_job_dep_add(job, state->previous_encoder_state->tqj_bitstream_written);
1492   }
1493   kvz_threadqueue_submit(state->encoder_control->threadqueue, job);
1494   assert(!state->tqj_bitstream_written);
1495   state->tqj_bitstream_written = job;
1496 
1497   state->frame->done = 0;
1498 }
1499 
1500 
1501 /**
1502  * Prepare the encoder state for encoding the next frame.
1503  *
1504  * - Add the previous reconstructed picture as a reference, if needed.
1505  * - Free the previous reconstructed and source pictures.
1506  * - Create a new cu array, if needed.
1507  * - Update frame count and POC.
1508  */
kvz_encoder_prepare(encoder_state_t * state)1509 void kvz_encoder_prepare(encoder_state_t *state)
1510 {
1511   const encoder_control_t * const encoder = state->encoder_control;
1512 
1513   // The previous frame must be done before the next one is started.
1514   assert(state->frame->done);
1515 
1516   if (state->frame->num == -1) {
1517     // We're at the first frame, so don't care about all this stuff.
1518     state->frame->num = 0;
1519     state->frame->poc = 0;
1520     state->frame->irap_poc = 0;
1521     assert(!state->tile->frame->source);
1522     assert(!state->tile->frame->rec);
1523     assert(!state->tile->frame->cu_array);
1524     state->frame->prepared = 1;
1525 
1526     return;
1527   }
1528 
1529   // NOTE: prev_state is equal to state when OWF is zero
1530   encoder_state_t *prev_state = state->previous_encoder_state;
1531 
1532   if (state->previous_encoder_state != state) {
1533     kvz_cu_array_free(&state->tile->frame->cu_array);
1534     unsigned width  = state->tile->frame->width_in_lcu  * LCU_WIDTH;
1535     unsigned height = state->tile->frame->height_in_lcu * LCU_WIDTH;
1536     state->tile->frame->cu_array = kvz_cu_array_alloc(width, height);
1537 
1538     kvz_image_list_copy_contents(state->frame->ref, prev_state->frame->ref);
1539     kvz_encoder_create_ref_lists(state);
1540   }
1541 
1542   if (!encoder->cfg.gop_len ||
1543       !prev_state->frame->poc ||
1544       encoder->cfg.gop[prev_state->frame->gop_offset].is_ref) {
1545 
1546     // Store current list of POCs for use in TMVP derivation
1547     memcpy(prev_state->tile->frame->rec->ref_pocs, state->frame->ref->pocs, sizeof(int32_t)*state->frame->ref->used_size);
1548 
1549     // Add previous reconstructed picture as a reference
1550     kvz_image_list_add(state->frame->ref,
1551                    prev_state->tile->frame->rec,
1552                    prev_state->tile->frame->cu_array,
1553                    prev_state->frame->poc,
1554                    prev_state->frame->ref_LX);
1555     kvz_cu_array_free(&state->tile->frame->cu_array);
1556     unsigned height = state->tile->frame->height_in_lcu * LCU_WIDTH;
1557     unsigned width  = state->tile->frame->width_in_lcu  * LCU_WIDTH;
1558     state->tile->frame->cu_array = kvz_cu_array_alloc(width, height);
1559   }
1560 
1561   // Remove source and reconstructed picture.
1562   kvz_image_free(state->tile->frame->source);
1563   state->tile->frame->source = NULL;
1564 
1565   kvz_image_free(state->tile->frame->rec);
1566   state->tile->frame->rec = NULL;
1567 
1568   kvz_cu_array_free(&state->tile->frame->cu_array);
1569 
1570   // Update POC and frame count.
1571   state->frame->num = prev_state->frame->num + 1;
1572   state->frame->poc = prev_state->frame->poc + 1;
1573   state->frame->irap_poc = prev_state->frame->irap_poc;
1574 
1575   state->frame->prepared = 1;
1576 
1577 
1578 }
1579 
kvz_get_scan_order(int8_t cu_type,int intra_mode,int depth)1580 coeff_scan_order_t kvz_get_scan_order(int8_t cu_type, int intra_mode, int depth)
1581 {
1582   // Scan mode is diagonal, except for 4x4+8x8 luma and 4x4 chroma, where:
1583   // - angular 6-14 = vertical
1584   // - angular 22-30 = horizontal
1585   if (cu_type == CU_INTRA && depth >= 3) {
1586     if (intra_mode >= 6 && intra_mode <= 14) {
1587       return SCAN_VER;
1588     } else if (intra_mode >= 22 && intra_mode <= 30) {
1589       return SCAN_HOR;
1590     }
1591   }
1592 
1593   return SCAN_DIAG;
1594 }
1595 
kvz_get_lcu_stats(encoder_state_t * state,int lcu_x,int lcu_y)1596 lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y)
1597 {
1598   const int index = lcu_x + state->tile->lcu_offset_x +
1599                     (lcu_y + state->tile->lcu_offset_y) *
1600                     state->encoder_control->in.width_in_lcu;
1601   return &state->frame->lcu_stats[index];
1602 }
1603 
kvz_get_cu_ref_qp(const encoder_state_t * state,int x,int y,int last_qp)1604 int kvz_get_cu_ref_qp(const encoder_state_t *state, int x, int y, int last_qp)
1605 {
1606   const encoder_control_t *ctrl = state->encoder_control;
1607   const cu_array_t *cua = state->tile->frame->cu_array;
1608   // Quantization group width
1609   const int qg_width = LCU_WIDTH >> MIN(ctrl->max_qp_delta_depth, kvz_cu_array_at_const(cua, x, y)->depth);
1610 
1611   // Coordinates of the top-left corner of the quantization group
1612   const int x_qg = x & ~(qg_width - 1);
1613   const int y_qg = y & ~(qg_width - 1);
1614 
1615   int qp_pred_a = last_qp;
1616   if (x_qg % LCU_WIDTH > 0) {
1617     qp_pred_a = kvz_cu_array_at_const(cua, x_qg - 1, y_qg)->qp;
1618   }
1619 
1620   int qp_pred_b = last_qp;
1621   if (y_qg % LCU_WIDTH > 0) {
1622     qp_pred_b = kvz_cu_array_at_const(cua, x_qg, y_qg - 1)->qp;
1623   }
1624 
1625   return ((qp_pred_a + qp_pred_b + 1) >> 1);
1626 }
1627