1 /*****************************************************************************
2 * This file is part of Kvazaar HEVC encoder.
3 *
4 * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without modification,
8 * are permitted provided that the following conditions are met:
9 *
10 * * Redistributions of source code must retain the above copyright notice, this
11 * list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright notice, this
14 * list of conditions and the following disclaimer in the documentation and/or
15 * other materials provided with the distribution.
16 *
17 * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
28 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
31 ****************************************************************************/
32
33 #include "encoderstate.h"
34
35 #include <math.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40 #include "cabac.h"
41 #include "context.h"
42 #include "encode_coding_tree.h"
43 #include "encoder_state-bitstream.h"
44 #include "filter.h"
45 #include "image.h"
46 #include "rate_control.h"
47 #include "sao.h"
48 #include "search.h"
49 #include "tables.h"
50 #include "threadqueue.h"
51
52 #include "strategies/strategies-picture.h"
53
54
kvz_encoder_state_match_children_of_previous_frame(encoder_state_t * const state)55 int kvz_encoder_state_match_children_of_previous_frame(encoder_state_t * const state) {
56 int i;
57 for (i = 0; state->children[i].encoder_control; ++i) {
58 //Child should also exist for previous encoder
59 assert(state->previous_encoder_state->children[i].encoder_control);
60 state->children[i].previous_encoder_state = &state->previous_encoder_state->children[i];
61 kvz_encoder_state_match_children_of_previous_frame(&state->children[i]);
62 }
63 return 1;
64 }
65
66 /**
67 * \brief Save edge pixels before SAO to buffers.
68 *
69 * Copies pixels at the edges of the area that will be filtered with SAO to
70 * the given buffers. If deblocking is enabled, the pixels must have been
71 * deblocked before this.
72 *
73 * The saved pixels will be needed later when doing SAO for the neighboring
74 * areas.
75 */
encoder_state_recdata_before_sao_to_bufs(encoder_state_t * const state,const lcu_order_element_t * const lcu,yuv_t * const hor_buf,yuv_t * const ver_buf)76 static void encoder_state_recdata_before_sao_to_bufs(
77 encoder_state_t * const state,
78 const lcu_order_element_t * const lcu,
79 yuv_t * const hor_buf,
80 yuv_t * const ver_buf)
81 {
82 videoframe_t* const frame = state->tile->frame;
83
84 if (hor_buf && lcu->below) {
85 // Copy the bottommost row that will be filtered with SAO to the
86 // horizontal buffer.
87 vector2d_t pos = {
88 .x = lcu->position_px.x,
89 .y = lcu->position_px.y + LCU_WIDTH - SAO_DELAY_PX - 1,
90 };
91 // Copy all pixels that have been deblocked.
92 int length = lcu->size.x - DEBLOCK_DELAY_PX;
93
94 if (!lcu->right) {
95 // If there is no LCU to the right, the last pixels will be
96 // filtered too.
97 length += DEBLOCK_DELAY_PX;
98 }
99
100 if (lcu->left) {
101 // The rightmost pixels of the CTU to the left will also be filtered.
102 pos.x -= DEBLOCK_DELAY_PX;
103 length += DEBLOCK_DELAY_PX;
104 }
105
106 const unsigned from_index = pos.x + pos.y * frame->rec->stride;
107 // NOTE: The horizontal buffer is indexed by
108 // x_px + y_lcu * frame->width
109 // where x_px is in pixels and y_lcu in number of LCUs.
110 const unsigned to_index = pos.x + lcu->position.y * frame->width;
111
112 kvz_pixels_blit(&frame->rec->y[from_index],
113 &hor_buf->y[to_index],
114 length, 1,
115 frame->rec->stride,
116 frame->width);
117
118 if (state->encoder_control->chroma_format != KVZ_CSP_400) {
119 const unsigned from_index_c = (pos.x / 2) + (pos.y / 2) * frame->rec->stride / 2;
120 const unsigned to_index_c = (pos.x / 2) + lcu->position.y * frame->width / 2;
121
122 kvz_pixels_blit(&frame->rec->u[from_index_c],
123 &hor_buf->u[to_index_c],
124 length / 2, 1,
125 frame->rec->stride / 2,
126 frame->width / 2);
127 kvz_pixels_blit(&frame->rec->v[from_index_c],
128 &hor_buf->v[to_index_c],
129 length / 2, 1,
130 frame->rec->stride / 2,
131 frame->width / 2);
132 }
133 }
134
135 if (ver_buf && lcu->right) {
136 // Copy the rightmost column that will be filtered with SAO to the
137 // vertical buffer.
138 vector2d_t pos = {
139 .x = lcu->position_px.x + LCU_WIDTH - SAO_DELAY_PX - 1,
140 .y = lcu->position_px.y,
141 };
142 int length = lcu->size.y - DEBLOCK_DELAY_PX;
143
144 if (!lcu->below) {
145 // If there is no LCU below, the last pixels will be filtered too.
146 length += DEBLOCK_DELAY_PX;
147 }
148
149 if (lcu->above) {
150 // The bottommost pixels of the CTU above will also be filtered.
151 pos.y -= DEBLOCK_DELAY_PX;
152 length += DEBLOCK_DELAY_PX;
153 }
154
155 const unsigned from_index = pos.x + pos.y * frame->rec->stride;
156 // NOTE: The vertical buffer is indexed by
157 // x_lcu * frame->height + y_px
158 // where x_lcu is in number of LCUs and y_px in pixels.
159 const unsigned to_index = lcu->position.x * frame->height + pos.y;
160
161 kvz_pixels_blit(&frame->rec->y[from_index],
162 &ver_buf->y[to_index],
163 1, length,
164 frame->rec->stride, 1);
165
166 if (state->encoder_control->chroma_format != KVZ_CSP_400) {
167 const unsigned from_index_c = (pos.x / 2) + (pos.y / 2) * frame->rec->stride / 2;
168 const unsigned to_index_c = lcu->position.x * frame->height / 2 + pos.y / 2;
169
170 kvz_pixels_blit(&frame->rec->u[from_index_c],
171 &ver_buf->u[to_index_c],
172 1, length / 2,
173 frame->rec->stride / 2, 1);
174 kvz_pixels_blit(&frame->rec->v[from_index_c],
175 &ver_buf->v[to_index_c],
176 1, length / 2,
177 frame->rec->stride / 2, 1);
178 }
179 }
180 }
181
encoder_state_recdata_to_bufs(encoder_state_t * const state,const lcu_order_element_t * const lcu,yuv_t * const hor_buf,yuv_t * const ver_buf)182 static void encoder_state_recdata_to_bufs(encoder_state_t * const state,
183 const lcu_order_element_t * const lcu,
184 yuv_t * const hor_buf,
185 yuv_t * const ver_buf)
186 {
187 videoframe_t* const frame = state->tile->frame;
188
189 if (hor_buf) {
190 //Copy the bottom row of this LCU to the horizontal buffer
191 vector2d_t bottom = { lcu->position_px.x, lcu->position_px.y + lcu->size.y - 1 };
192 const int lcu_row = lcu->position.y;
193
194 unsigned from_index = bottom.y * frame->rec->stride + bottom.x;
195 unsigned to_index = lcu->position_px.x + lcu_row * frame->width;
196
197 kvz_pixels_blit(&frame->rec->y[from_index],
198 &hor_buf->y[to_index],
199 lcu->size.x, 1,
200 frame->rec->stride, frame->width);
201
202 if (state->encoder_control->chroma_format != KVZ_CSP_400) {
203 unsigned from_index_c = (bottom.y / 2) * frame->rec->stride / 2 + (bottom.x / 2);
204 unsigned to_index_c = lcu->position_px.x / 2 + lcu_row * frame->width / 2;
205
206 kvz_pixels_blit(&frame->rec->u[from_index_c],
207 &hor_buf->u[to_index_c],
208 lcu->size.x / 2, 1,
209 frame->rec->stride / 2, frame->width / 2);
210 kvz_pixels_blit(&frame->rec->v[from_index_c],
211 &hor_buf->v[to_index_c],
212 lcu->size.x / 2, 1,
213 frame->rec->stride / 2, frame->width / 2);
214 }
215 }
216
217 if (ver_buf) {
218 //Copy the right row of this LCU to the vertical buffer.
219
220 const int lcu_col = lcu->position.x;
221 vector2d_t left = { lcu->position_px.x + lcu->size.x - 1, lcu->position_px.y };
222
223 kvz_pixels_blit(&frame->rec->y[left.y * frame->rec->stride + left.x],
224 &ver_buf->y[lcu->position_px.y + lcu_col * frame->height],
225 1, lcu->size.y,
226 frame->rec->stride, 1);
227
228 if (state->encoder_control->chroma_format != KVZ_CSP_400) {
229 unsigned from_index = (left.y / 2) * frame->rec->stride / 2 + (left.x / 2);
230 unsigned to_index = lcu->position_px.y / 2 + lcu_col * frame->height / 2;
231
232 kvz_pixels_blit(&frame->rec->u[from_index],
233 &ver_buf->u[to_index],
234 1, lcu->size.y / 2,
235 frame->rec->stride / 2, 1);
236 kvz_pixels_blit(&frame->rec->v[from_index],
237 &ver_buf->v[to_index],
238 1, lcu->size.y / 2,
239 frame->rec->stride / 2, 1);
240 }
241 }
242
243 }
244
245 /**
246 * \brief Do SAO reconstuction for all available pixels.
247 *
248 * Does SAO reconstruction for all pixels that are available after the
249 * given LCU has been deblocked. This means the following pixels:
250 * - bottom-right block of SAO_DELAY_PX times SAO_DELAY_PX in the lcu to
251 * the left and up
252 * - the rightmost SAO_DELAY_PX pixels of the LCU to the left (excluding
253 * the bottommost pixel)
254 * - the bottommost SAO_DELAY_PX pixels of the LCU above (excluding the
255 * rightmost pixels)
256 * - all pixels inside the LCU, excluding the rightmost SAO_DELAY_PX and
257 * bottommost SAO_DELAY_PX
258 */
encoder_sao_reconstruct(const encoder_state_t * const state,const lcu_order_element_t * const lcu)259 static void encoder_sao_reconstruct(const encoder_state_t *const state,
260 const lcu_order_element_t *const lcu)
261 {
262 videoframe_t *const frame = state->tile->frame;
263
264
265 // Temporary buffers for SAO input pixels. The buffers cover the pixels
266 // inside the LCU (LCU_WIDTH x LCU_WIDTH), SAO_DELAY_PX wide bands to the
267 // left and above the LCU, and one pixel border on the left and top
268 // sides. We add two extra pixels to the buffers because the AVX2 SAO
269 // reconstruction reads up to two extra bytes when using edge SAO in the
270 // horizontal direction.
271 #define SAO_BUF_WIDTH (1 + SAO_DELAY_PX + LCU_WIDTH)
272 #define SAO_BUF_WIDTH_C (1 + SAO_DELAY_PX/2 + LCU_WIDTH_C)
273 kvz_pixel sao_buf_y_array[SAO_BUF_WIDTH * SAO_BUF_WIDTH + 2];
274 kvz_pixel sao_buf_u_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C + 2];
275 kvz_pixel sao_buf_v_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C + 2];
276
277 // Pointers to the top-left pixel of the LCU in the buffers.
278 kvz_pixel *const sao_buf_y = &sao_buf_y_array[(SAO_DELAY_PX + 1) * (SAO_BUF_WIDTH + 1)];
279 kvz_pixel *const sao_buf_u = &sao_buf_u_array[(SAO_DELAY_PX/2 + 1) * (SAO_BUF_WIDTH_C + 1)];
280 kvz_pixel *const sao_buf_v = &sao_buf_v_array[(SAO_DELAY_PX/2 + 1) * (SAO_BUF_WIDTH_C + 1)];
281
282 const int x_offsets[3] = {
283 // If there is an lcu to the left, we need to filter its rightmost
284 // pixels.
285 lcu->left ? -SAO_DELAY_PX : 0,
286 0,
287 // If there is an lcu to the right, the rightmost pixels of this LCU
288 // are filtered when filtering that LCU. Otherwise we filter them now.
289 lcu->size.x - (lcu->right ? SAO_DELAY_PX : 0),
290 };
291
292 const int y_offsets[3] = {
293 // If there is an lcu above, we need to filter its bottommost pixels.
294 lcu->above ? -SAO_DELAY_PX : 0,
295 0,
296 // If there is an lcu below, the bottommost pixels of this LCU are
297 // filtered when filtering that LCU. Otherwise we filter them now.
298 lcu->size.y - (lcu->below ? SAO_DELAY_PX : 0),
299 };
300
301 // Number of pixels around the block that need to be copied to the
302 // buffers.
303 const int border_left = lcu->left ? 1 : 0;
304 const int border_right = lcu->right ? 1 : 0;
305 const int border_above = lcu->above ? 1 : 0;
306 const int border_below = lcu->below ? 1 : 0;
307
308 // Index of the pixel at the intersection of the top and left borders.
309 const int border_index = (x_offsets[0] - border_left) +
310 (y_offsets[0] - border_above) * SAO_BUF_WIDTH;
311 const int border_index_c = (x_offsets[0]/2 - border_left) +
312 (y_offsets[0]/2 - border_above) * SAO_BUF_WIDTH_C;
313 // Width and height of the whole area to filter.
314 const int width = x_offsets[2] - x_offsets[0];
315 const int height = y_offsets[2] - y_offsets[0];
316
317 // Copy bordering pixels from above and left to buffers.
318 if (lcu->above) {
319 const int from_index = (lcu->position_px.x + x_offsets[0] - border_left) +
320 (lcu->position.y - 1) * frame->width;
321 kvz_pixels_blit(&state->tile->hor_buf_before_sao->y[from_index],
322 &sao_buf_y[border_index],
323 width + border_left + border_right,
324 1,
325 frame->width,
326 SAO_BUF_WIDTH);
327 if (state->encoder_control->chroma_format != KVZ_CSP_400) {
328 const int from_index_c = (lcu->position_px.x + x_offsets[0])/2 - border_left +
329 (lcu->position.y - 1) * frame->width/2;
330 kvz_pixels_blit(&state->tile->hor_buf_before_sao->u[from_index_c],
331 &sao_buf_u[border_index_c],
332 width/2 + border_left + border_right,
333 1,
334 frame->width/2,
335 SAO_BUF_WIDTH_C);
336 kvz_pixels_blit(&state->tile->hor_buf_before_sao->v[from_index_c],
337 &sao_buf_v[border_index_c],
338 width/2 + border_left + border_right,
339 1,
340 frame->width/2,
341 SAO_BUF_WIDTH_C);
342 }
343 }
344 if (lcu->left) {
345 const int from_index = (lcu->position.x - 1) * frame->height +
346 (lcu->position_px.y + y_offsets[0] - border_above);
347 kvz_pixels_blit(&state->tile->ver_buf_before_sao->y[from_index],
348 &sao_buf_y[border_index],
349 1,
350 height + border_above + border_below,
351 1,
352 SAO_BUF_WIDTH);
353 if (state->encoder_control->chroma_format != KVZ_CSP_400) {
354 const int from_index_c = (lcu->position.x - 1) * frame->height/2 +
355 (lcu->position_px.y + y_offsets[0])/2 - border_above;
356 kvz_pixels_blit(&state->tile->ver_buf_before_sao->u[from_index_c],
357 &sao_buf_u[border_index_c],
358 1,
359 height/2 + border_above + border_below,
360 1,
361 SAO_BUF_WIDTH_C);
362 kvz_pixels_blit(&state->tile->ver_buf_before_sao->v[from_index_c],
363 &sao_buf_v[border_index_c],
364 1,
365 height/2 + border_above + border_below,
366 1,
367 SAO_BUF_WIDTH_C);
368 }
369 }
370 // Copy pixels that will be filtered and bordering pixels from right and
371 // below.
372 const int from_index = (lcu->position_px.x + x_offsets[0]) +
373 (lcu->position_px.y + y_offsets[0]) * frame->rec->stride;
374 const int to_index = x_offsets[0] + y_offsets[0] * SAO_BUF_WIDTH;
375 kvz_pixels_blit(&frame->rec->y[from_index],
376 &sao_buf_y[to_index],
377 width + border_right,
378 height + border_below,
379 frame->rec->stride,
380 SAO_BUF_WIDTH);
381 if (state->encoder_control->chroma_format != KVZ_CSP_400) {
382 const int from_index_c = (lcu->position_px.x + x_offsets[0])/2 +
383 (lcu->position_px.y + y_offsets[0])/2 * frame->rec->stride/2;
384 const int to_index_c = x_offsets[0]/2 + y_offsets[0]/2 * SAO_BUF_WIDTH_C;
385 kvz_pixels_blit(&frame->rec->u[from_index_c],
386 &sao_buf_u[to_index_c],
387 width/2 + border_right,
388 height/2 + border_below,
389 frame->rec->stride/2,
390 SAO_BUF_WIDTH_C);
391 kvz_pixels_blit(&frame->rec->v[from_index_c],
392 &sao_buf_v[to_index_c],
393 width/2 + border_right,
394 height/2 + border_below,
395 frame->rec->stride/2,
396 SAO_BUF_WIDTH_C);
397 }
398
399 // We filter the pixels in four parts:
400 // 1. Pixels that belong to the LCU above and to the left
401 // 2. Pixels that belong to the LCU above
402 // 3. Pixels that belong to the LCU to the left
403 // 4. Pixels that belong to the current LCU
404 for (int y_offset_index = 0; y_offset_index < 2; y_offset_index++) {
405 for (int x_offset_index = 0; x_offset_index < 2; x_offset_index++) {
406 const int x = x_offsets[x_offset_index];
407 const int y = y_offsets[y_offset_index];
408 const int width = x_offsets[x_offset_index + 1] - x;
409 const int height = y_offsets[y_offset_index + 1] - y;
410
411 if (width == 0 || height == 0) continue;
412
413 const int lcu_x = (lcu->position_px.x + x) >> LOG2_LCU_WIDTH;
414 const int lcu_y = (lcu->position_px.y + y) >> LOG2_LCU_WIDTH;
415 const int lcu_index = lcu_x + lcu_y * frame->width_in_lcu;
416 const sao_info_t *sao_luma = &frame->sao_luma[lcu_index];
417 const sao_info_t *sao_chroma = &frame->sao_chroma[lcu_index];
418
419 kvz_sao_reconstruct(state,
420 &sao_buf_y[x + y * SAO_BUF_WIDTH],
421 SAO_BUF_WIDTH,
422 lcu->position_px.x + x,
423 lcu->position_px.y + y,
424 width,
425 height,
426 sao_luma,
427 COLOR_Y);
428
429 if (state->encoder_control->chroma_format != KVZ_CSP_400) {
430 // Coordinates in chroma pixels.
431 int x_c = x >> 1;
432 int y_c = y >> 1;
433
434 kvz_sao_reconstruct(state,
435 &sao_buf_u[x_c + y_c * SAO_BUF_WIDTH_C],
436 SAO_BUF_WIDTH_C,
437 lcu->position_px.x / 2 + x_c,
438 lcu->position_px.y / 2 + y_c,
439 width / 2,
440 height / 2,
441 sao_chroma,
442 COLOR_U);
443 kvz_sao_reconstruct(state,
444 &sao_buf_v[x_c + y_c * SAO_BUF_WIDTH_C],
445 SAO_BUF_WIDTH_C,
446 lcu->position_px.x / 2 + x_c,
447 lcu->position_px.y / 2 + y_c,
448 width / 2,
449 height / 2,
450 sao_chroma,
451 COLOR_V);
452 }
453 }
454 }
455 }
456
encode_sao_color(encoder_state_t * const state,sao_info_t * sao,color_t color_i)457 static void encode_sao_color(encoder_state_t * const state, sao_info_t *sao,
458 color_t color_i)
459 {
460 cabac_data_t * const cabac = &state->cabac;
461 sao_eo_cat i;
462 int offset_index = (color_i == COLOR_V) ? 5 : 0;
463
464 // Skip colors with no SAO.
465 //FIXME: for now, we always have SAO for all channels
466 if (color_i == COLOR_Y && 0) return;
467 if (color_i != COLOR_Y && 0) return;
468
469 /// sao_type_idx_luma: TR, cMax = 2, cRiceParam = 0, bins = {0, bypass}
470 /// sao_type_idx_chroma: TR, cMax = 2, cRiceParam = 0, bins = {0, bypass}
471 // Encode sao_type_idx for Y and U+V.
472 if (color_i != COLOR_V) {
473 cabac->cur_ctx = &(cabac->ctx.sao_type_idx_model);
474 CABAC_BIN(cabac, sao->type != SAO_TYPE_NONE, "sao_type_idx");
475 if (sao->type == SAO_TYPE_BAND) {
476 CABAC_BIN_EP(cabac, 0, "sao_type_idx_ep");
477 } else if (sao->type == SAO_TYPE_EDGE) {
478 CABAC_BIN_EP(cabac, 1, "sao_type_idx_ep");
479 }
480 }
481
482 if (sao->type == SAO_TYPE_NONE) return;
483
484 /// sao_offset_abs[][][][]: TR, cMax = (1 << (Min(bitDepth, 10) - 5)) - 1,
485 /// cRiceParam = 0, bins = {bypass x N}
486 for (i = SAO_EO_CAT1; i <= SAO_EO_CAT4; ++i) {
487 kvz_cabac_write_unary_max_symbol_ep(cabac, abs(sao->offsets[i + offset_index]), SAO_ABS_OFFSET_MAX);
488 }
489
490 /// sao_offset_sign[][][][]: FL, cMax = 1, bins = {bypass}
491 /// sao_band_position[][][]: FL, cMax = 31, bins = {bypass x N}
492 /// sao_eo_class_luma: FL, cMax = 3, bins = {bypass x 3}
493 /// sao_eo_class_chroma: FL, cMax = 3, bins = {bypass x 3}
494 if (sao->type == SAO_TYPE_BAND) {
495 for (i = SAO_EO_CAT1; i <= SAO_EO_CAT4; ++i) {
496 // Positive sign is coded as 0.
497 if (sao->offsets[i + offset_index] != 0) {
498 CABAC_BIN_EP(cabac, sao->offsets[i + offset_index] < 0 ? 1 : 0, "sao_offset_sign");
499 }
500 }
501 // TODO: sao_band_position
502 // FL cMax=31 (5 bits)
503 CABAC_BINS_EP(cabac, sao->band_position[color_i == COLOR_V ? 1:0], 5, "sao_band_position");
504 } else if (color_i != COLOR_V) {
505 CABAC_BINS_EP(cabac, sao->eo_class, 2, "sao_eo_class");
506 }
507 }
508
encode_sao_merge_flags(encoder_state_t * const state,sao_info_t * sao,unsigned x_ctb,unsigned y_ctb)509 static void encode_sao_merge_flags(encoder_state_t * const state, sao_info_t *sao, unsigned x_ctb, unsigned y_ctb)
510 {
511 cabac_data_t * const cabac = &state->cabac;
512 // SAO merge flags are not present for the first row and column.
513 if (x_ctb > 0) {
514 cabac->cur_ctx = &(cabac->ctx.sao_merge_flag_model);
515 CABAC_BIN(cabac, sao->merge_left_flag, "sao_merge_left_flag");
516 }
517 if (y_ctb > 0 && !sao->merge_left_flag) {
518 cabac->cur_ctx = &(cabac->ctx.sao_merge_flag_model);
519 CABAC_BIN(cabac, sao->merge_up_flag, "sao_merge_up_flag");
520 }
521 }
522
523
524 /**
525 * \brief Encode SAO information.
526 */
encode_sao(encoder_state_t * const state,unsigned x_lcu,uint16_t y_lcu,sao_info_t * sao_luma,sao_info_t * sao_chroma)527 static void encode_sao(encoder_state_t * const state,
528 unsigned x_lcu, uint16_t y_lcu,
529 sao_info_t *sao_luma, sao_info_t *sao_chroma)
530 {
531 // TODO: transmit merge flags outside sao_info
532 encode_sao_merge_flags(state, sao_luma, x_lcu, y_lcu);
533
534 // If SAO is merged, nothing else needs to be coded.
535 if (!sao_luma->merge_left_flag && !sao_luma->merge_up_flag) {
536 encode_sao_color(state, sao_luma, COLOR_Y);
537 if (state->encoder_control->chroma_format != KVZ_CSP_400) {
538 encode_sao_color(state, sao_chroma, COLOR_U);
539 encode_sao_color(state, sao_chroma, COLOR_V);
540 }
541 }
542 }
543
544
545 /**
546 * \brief Sets the QP for each CU in state->tile->frame->cu_array.
547 *
548 * The QPs are used in deblocking and QP prediction.
549 *
550 * The QP delta for a quantization group is coded when the first CU with
551 * coded block flag set is encountered. Hence, for the purposes of
552 * deblocking and QP prediction, all CUs in before the first one that has
553 * cbf set use the QP predictor and all CUs after that use (QP predictor
554 * + QP delta).
555 *
556 * \param state encoder state
557 * \param x x-coordinate of the left edge of the root CU
558 * \param y y-coordinate of the top edge of the root CU
559 * \param depth depth in the CU quadtree
560 * \param last_qp QP of the last CU in the last quantization group
561 * \param prev_qp -1 if QP delta has not been coded in current QG,
562 * otherwise the QP of the current QG
563 */
set_cu_qps(encoder_state_t * state,int x,int y,int depth,int * last_qp,int * prev_qp)564 static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *last_qp, int *prev_qp)
565 {
566
567 // Stop recursion if the CU is completely outside the frame.
568 if (x >= state->tile->frame->width || y >= state->tile->frame->height) return;
569
570 cu_info_t *cu = kvz_cu_array_at(state->tile->frame->cu_array, x, y);
571 const int cu_width = LCU_WIDTH >> depth;
572
573 if (depth <= state->encoder_control->max_qp_delta_depth) {
574 *prev_qp = -1;
575 }
576
577 if (cu->depth > depth) {
578 // Recursively process sub-CUs.
579 const int d = cu_width >> 1;
580 set_cu_qps(state, x, y, depth + 1, last_qp, prev_qp);
581 set_cu_qps(state, x + d, y, depth + 1, last_qp, prev_qp);
582 set_cu_qps(state, x, y + d, depth + 1, last_qp, prev_qp);
583 set_cu_qps(state, x + d, y + d, depth + 1, last_qp, prev_qp);
584
585 } else {
586 bool cbf_found = *prev_qp >= 0;
587
588 if (cu->tr_depth > depth) {
589 // The CU is split into smaller transform units. Check whether coded
590 // block flag is set for any of the TUs.
591 const int tu_width = LCU_WIDTH >> cu->tr_depth;
592 for (int y_scu = y; !cbf_found && y_scu < y + cu_width; y_scu += tu_width) {
593 for (int x_scu = x; !cbf_found && x_scu < x + cu_width; x_scu += tu_width) {
594 cu_info_t *tu = kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu);
595 if (cbf_is_set_any(tu->cbf, cu->depth)) {
596 cbf_found = true;
597 }
598 }
599 }
600 } else if (cbf_is_set_any(cu->cbf, cu->depth)) {
601 cbf_found = true;
602 }
603
604 int8_t qp;
605 if (cbf_found) {
606 *prev_qp = qp = cu->qp;
607 } else {
608 qp = kvz_get_cu_ref_qp(state, x, y, *last_qp);
609 }
610
611 // Set the correct QP for all state->tile->frame->cu_array elements in
612 // the area covered by the CU.
613 for (int y_scu = y; y_scu < y + cu_width; y_scu += SCU_WIDTH) {
614 for (int x_scu = x; x_scu < x + cu_width; x_scu += SCU_WIDTH) {
615 kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu)->qp = qp;
616 }
617 }
618
619 if (is_last_cu_in_qg(state, x, y, depth)) {
620 *last_qp = cu->qp;
621 }
622 }
623 }
624
625
encoder_state_worker_encode_lcu(void * opaque)626 static void encoder_state_worker_encode_lcu(void * opaque)
627 {
628 const lcu_order_element_t * const lcu = opaque;
629 encoder_state_t *state = lcu->encoder_state;
630 const encoder_control_t * const encoder = state->encoder_control;
631 videoframe_t* const frame = state->tile->frame;
632
633 switch (encoder->cfg.rc_algorithm) {
634 case KVZ_NO_RC:
635 case KVZ_LAMBDA:
636 kvz_set_lcu_lambda_and_qp(state, lcu->position);
637 break;
638 case KVZ_OBA:
639 kvz_set_ctu_qp_lambda(state, lcu->position);
640 break;
641 default:
642 assert(0);
643 }
644
645 lcu_coeff_t coeff;
646 state->coeff = &coeff;
647
648 //This part doesn't write to bitstream, it's only search, deblock and sao
649 kvz_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search);
650
651 encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search);
652
653 if (encoder->max_qp_delta_depth >= 0) {
654 int last_qp = state->last_qp;
655 int prev_qp = -1;
656 set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp);
657 }
658
659 if (encoder->cfg.deblock_enable) {
660 kvz_filter_deblock_lcu(state, lcu->position_px.x, lcu->position_px.y);
661 }
662
663 if (encoder->cfg.sao_type) {
664 // Save the post-deblocking but pre-SAO pixels of the LCU to a buffer
665 // so that they can be used in SAO reconstruction later.
666 encoder_state_recdata_before_sao_to_bufs(state,
667 lcu,
668 state->tile->hor_buf_before_sao,
669 state->tile->ver_buf_before_sao);
670 kvz_sao_search_lcu(state, lcu->position.x, lcu->position.y);
671 encoder_sao_reconstruct(state, lcu);
672 }
673
674 //Now write data to bitstream (required to have a correct CABAC state)
675 const uint64_t existing_bits = kvz_bitstream_tell(&state->stream);
676
677 //Encode SAO
678 if (encoder->cfg.sao_type) {
679 encode_sao(state, lcu->position.x, lcu->position.y, &frame->sao_luma[lcu->position.y * frame->width_in_lcu + lcu->position.x], &frame->sao_chroma[lcu->position.y * frame->width_in_lcu + lcu->position.x]);
680 }
681
682 //Encode coding tree
683 kvz_encode_coding_tree(state, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, 0);
684
685 // Coeffs are not needed anymore.
686 state->coeff = NULL;
687
688 bool end_of_slice_segment_flag;
689 if (state->encoder_control->cfg.slices & KVZ_SLICES_WPP) {
690 // Slice segments end after each WPP row.
691 end_of_slice_segment_flag = lcu->last_column;
692 } else if (state->encoder_control->cfg.slices & KVZ_SLICES_TILES) {
693 // Slices end after each tile.
694 end_of_slice_segment_flag = lcu->last_column && lcu->last_row;
695 } else {
696 // Slice ends after the last row of the last tile.
697 int last_tile_id = -1 + encoder->cfg.tiles_width_count * encoder->cfg.tiles_height_count;
698 bool is_last_tile = state->tile->id == last_tile_id;
699 end_of_slice_segment_flag = is_last_tile && lcu->last_column && lcu->last_row;
700 }
701 kvz_cabac_encode_bin_trm(&state->cabac, end_of_slice_segment_flag);
702
703 {
704 const bool end_of_tile = lcu->last_column && lcu->last_row;
705 const bool end_of_wpp_row = encoder->cfg.wpp && lcu->last_column;
706
707
708 if (end_of_tile || end_of_wpp_row) {
709 if (!end_of_slice_segment_flag) {
710 // end_of_sub_stream_one_bit
711 kvz_cabac_encode_bin_trm(&state->cabac, 1);
712 }
713
714 // Finish the substream by writing out remaining state.
715 kvz_cabac_finish(&state->cabac);
716
717 // Write a rbsp_trailing_bits or a byte_alignment. The first one is used
718 // for ending a slice_segment_layer_rbsp and the second one for ending
719 // a substream. They are identical and align the byte stream.
720 kvz_bitstream_put(state->cabac.stream, 1, 1);
721 kvz_bitstream_align_zero(state->cabac.stream);
722
723 kvz_cabac_start(&state->cabac);
724
725 kvz_crypto_delete(&state->crypto_hdl);
726 }
727 }
728
729 pthread_mutex_lock(&state->frame->rc_lock);
730 const uint32_t bits = kvz_bitstream_tell(&state->stream) - existing_bits;
731 state->frame->cur_frame_bits_coded += bits;
732 // This variable is used differently by intra and inter frames and shouldn't
733 // be touched in intra frames here
734 state->frame->remaining_weight -= !state->frame->is_irap ?
735 kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->original_weight :
736 0;
737 pthread_mutex_unlock(&state->frame->rc_lock);
738 kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->bits = bits;
739
740 uint8_t not_skip = false;
741 for(int y = 0; y < 64 && !not_skip; y+=8) {
742 for(int x = 0; x < 64 && !not_skip; x+=8) {
743 not_skip |= !kvz_cu_array_at_const(state->tile->frame->cu_array,
744 lcu->position_px.x + x,
745 lcu->position_px.y + y)->skipped;
746 }
747 }
748 kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->skipped = !not_skip;
749
750 //Wavefronts need the context to be copied to the next row
751 if (state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW && lcu->index == 1) {
752 int j;
753 //Find next encoder (next row)
754 for (j=0; state->parent->children[j].encoder_control; ++j) {
755 if (state->parent->children[j].wfrow->lcu_offset_y == state->wfrow->lcu_offset_y + 1) {
756 //And copy context
757 kvz_context_copy(&state->parent->children[j], state);
758 }
759 }
760 }
761 }
762
encoder_state_encode_leaf(encoder_state_t * const state)763 static void encoder_state_encode_leaf(encoder_state_t * const state)
764 {
765 assert(state->is_leaf);
766 assert(state->lcu_order_count > 0);
767
768 const encoder_control_t *ctrl = state->encoder_control;
769 const kvz_config *cfg = &ctrl->cfg;
770
771 // Signaled slice QP may be different to frame QP with set-qp-in-cu enabled.
772 state->last_qp = ctrl->cfg.set_qp_in_cu ? 26 : state->frame->QP;
773
774 if (cfg->crypto_features) {
775 state->crypto_hdl = kvz_crypto_create(cfg);
776 state->crypto_prev_pos = 0;
777 }
778
779 // Select whether to encode the frame/tile in current thread or to define
780 // wavefront jobs for other threads to handle.
781 bool wavefront = state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW;
782 bool use_parallel_encoding = (wavefront && state->parent->children[1].encoder_control);
783 if (!use_parallel_encoding) {
784 // Encode every LCU in order and perform SAO reconstruction after every
785 // frame is encoded. Deblocking and SAO search is done during LCU encoding.
786
787 for (int i = 0; i < state->lcu_order_count; ++i) {
788 encoder_state_worker_encode_lcu(&state->lcu_order[i]);
789 }
790 } else {
791 // Add each LCU in the wavefront row as it's own job to the queue.
792
793 // Select which frame dependancies should be set to.
794 const encoder_state_t * ref_state = NULL;
795
796 if (state->frame->slicetype == KVZ_SLICE_I) {
797 // I-frames have no references.
798 ref_state = NULL;
799 } else if (cfg->gop_lowdelay &&
800 cfg->gop_len > 0 &&
801 state->previous_encoder_state != state)
802 {
803 // For LP-gop, depend on the state of the first reference.
804 int ref_neg = cfg->gop[state->frame->gop_offset].ref_neg[0];
805 if (ref_neg > cfg->owf) {
806 // If frame is not within OWF range, it's already done.
807 ref_state = NULL;
808 } else {
809 ref_state = state->previous_encoder_state;
810 while (ref_neg > 1) {
811 ref_neg -= 1;
812 ref_state = ref_state->previous_encoder_state;
813 }
814 }
815 } else {
816 // Otherwise, depend on the previous frame.
817 ref_state = state->previous_encoder_state;
818 }
819
820 for (int i = 0; i < state->lcu_order_count; ++i) {
821 const lcu_order_element_t * const lcu = &state->lcu_order[i];
822
823 kvz_threadqueue_free_job(&state->tile->wf_jobs[lcu->id]);
824 state->tile->wf_jobs[lcu->id] = kvz_threadqueue_job_create(encoder_state_worker_encode_lcu, (void*)lcu);
825 threadqueue_job_t **job = &state->tile->wf_jobs[lcu->id];
826
827 // If job object was returned, add dependancies and allow it to run.
828 if (job[0]) {
829 // Add inter frame dependancies when ecoding more than one frame at
830 // once. The added dependancy is for the first LCU of each wavefront
831 // row to depend on the reconstruction status of the row below in the
832 // previous frame.
833 if (ref_state != NULL &&
834 state->previous_encoder_state->tqj_recon_done &&
835 state->frame->slicetype != KVZ_SLICE_I)
836 {
837 // We need to wait until the CTUs whose pixels we refer to are
838 // done before we can start this CTU.
839 const lcu_order_element_t *dep_lcu = lcu;
840 for (int i = 0; dep_lcu->below && i < ctrl->max_inter_ref_lcu.down; i++) {
841 dep_lcu = dep_lcu->below;
842 }
843 for (int i = 0; dep_lcu->right && i < ctrl->max_inter_ref_lcu.right; i++) {
844 dep_lcu = dep_lcu->right;
845 }
846 kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[dep_lcu->id]);
847
848 //TODO: Preparation for the lock free implementation of the new rc
849 if (ref_state->frame->slicetype == KVZ_SLICE_I && ref_state->frame->num != 0 && state->encoder_control->cfg.owf > 1 && true) {
850 kvz_threadqueue_job_dep_add(job[0], ref_state->previous_encoder_state->tile->wf_jobs[dep_lcu->id]);
851 }
852
853 // Very spesific bug that happens when owf length is longer than the
854 // gop length. Takes care of that.
855 if(!state->encoder_control->cfg.gop_lowdelay &&
856 state->encoder_control->cfg.open_gop &&
857 state->encoder_control->cfg.gop_len != 0 &&
858 state->encoder_control->cfg.owf > state->encoder_control->cfg.gop_len &&
859 ref_state->frame->slicetype == KVZ_SLICE_I &&
860 ref_state->frame->num != 0){
861
862 while (ref_state->frame->poc != state->frame->poc - state->encoder_control->cfg.gop_len){
863 ref_state = ref_state->previous_encoder_state;
864 }
865 kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[dep_lcu->id]);
866 }
867 }
868
869 // Add local WPP dependancy to the LCU on the left.
870 if (lcu->left) {
871 kvz_threadqueue_job_dep_add(job[0], job[-1]);
872 }
873 // Add local WPP dependancy to the LCU on the top right.
874 if (lcu->above) {
875 if (lcu->above->right) {
876 kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu + 1]);
877 } else {
878 kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu]);
879 }
880 }
881
882 kvz_threadqueue_submit(state->encoder_control->threadqueue, state->tile->wf_jobs[lcu->id]);
883
884 // The wavefront row is done when the last LCU in the row is done.
885 if (i + 1 == state->lcu_order_count) {
886 assert(!state->tqj_recon_done);
887 state->tqj_recon_done =
888 kvz_threadqueue_copy_ref(state->tile->wf_jobs[lcu->id]);
889 }
890 }
891 }
892 }
893 }
894
895 static void encoder_state_encode(encoder_state_t * const main_state);
896
encoder_state_worker_encode_children(void * opaque)897 static void encoder_state_worker_encode_children(void * opaque)
898 {
899 encoder_state_t *sub_state = opaque;
900 encoder_state_encode(sub_state);
901
902 if (sub_state->is_leaf && sub_state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
903 // Set the last wavefront job of this row as the job that completes
904 // the bitstream for this wavefront row state.
905
906 int wpp_row = sub_state->wfrow->lcu_offset_y;
907 int tile_width = sub_state->tile->frame->width_in_lcu;
908 int end_of_row = (wpp_row + 1) * tile_width - 1;
909 assert(!sub_state->tqj_bitstream_written);
910 if (sub_state->tile->wf_jobs[end_of_row]) {
911 sub_state->tqj_bitstream_written =
912 kvz_threadqueue_copy_ref(sub_state->tile->wf_jobs[end_of_row]);
913 }
914 }
915 }
916
encoder_state_tree_is_a_chain(const encoder_state_t * const state)917 static int encoder_state_tree_is_a_chain(const encoder_state_t * const state) {
918 if (!state->children[0].encoder_control) return 1;
919 if (state->children[1].encoder_control) return 0;
920 return encoder_state_tree_is_a_chain(&state->children[0]);
921 }
922
encoder_state_encode(encoder_state_t * const main_state)923 static void encoder_state_encode(encoder_state_t * const main_state) {
924 //If we have children, encode at child level
925 if (main_state->children[0].encoder_control) {
926 //If we have only one child, than it cannot be the last split in tree
927 int node_is_the_last_split_in_tree = (main_state->children[1].encoder_control != 0);
928
929 for (int i = 0; main_state->children[i].encoder_control; ++i) {
930 encoder_state_t *sub_state = &(main_state->children[i]);
931
932 if (sub_state->tile != main_state->tile) {
933 const int offset_x = sub_state->tile->offset_x;
934 const int offset_y = sub_state->tile->offset_y;
935 const int width = MIN(sub_state->tile->frame->width_in_lcu * LCU_WIDTH, main_state->tile->frame->width - offset_x);
936 const int height = MIN(sub_state->tile->frame->height_in_lcu * LCU_WIDTH, main_state->tile->frame->height - offset_y);
937
938 kvz_image_free(sub_state->tile->frame->source);
939 sub_state->tile->frame->source = NULL;
940
941 kvz_image_free(sub_state->tile->frame->rec);
942 sub_state->tile->frame->rec = NULL;
943
944 kvz_cu_array_free(&sub_state->tile->frame->cu_array);
945
946 sub_state->tile->frame->source = kvz_image_make_subimage(
947 main_state->tile->frame->source,
948 offset_x,
949 offset_y,
950 width,
951 height
952 );
953 sub_state->tile->frame->rec = kvz_image_make_subimage(
954 main_state->tile->frame->rec,
955 offset_x,
956 offset_y,
957 width,
958 height
959 );
960 sub_state->tile->frame->cu_array = kvz_cu_subarray(
961 main_state->tile->frame->cu_array,
962 offset_x,
963 offset_y,
964 sub_state->tile->frame->width_in_lcu * LCU_WIDTH,
965 sub_state->tile->frame->height_in_lcu * LCU_WIDTH
966 );
967 }
968
969 //To be the last split, we require that every child is a chain
970 node_is_the_last_split_in_tree =
971 node_is_the_last_split_in_tree &&
972 encoder_state_tree_is_a_chain(&main_state->children[i]);
973 }
974 //If it's the latest split point
975 if (node_is_the_last_split_in_tree) {
976 for (int i = 0; main_state->children[i].encoder_control; ++i) {
977 //If we don't have wavefronts, parallelize encoding of children.
978 if (main_state->children[i].type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
979 kvz_threadqueue_free_job(&main_state->children[i].tqj_recon_done);
980 main_state->children[i].tqj_recon_done =
981 kvz_threadqueue_job_create(encoder_state_worker_encode_children, &main_state->children[i]);
982 if (main_state->children[i].previous_encoder_state != &main_state->children[i] &&
983 main_state->children[i].previous_encoder_state->tqj_recon_done &&
984 !main_state->children[i].frame->is_irap)
985 {
986 #if 0
987 // Disabled due to non-determinism.
988 if (main_state->encoder_control->cfg->mv_constraint == KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN)
989 {
990 // When MV's don't cross tile boundaries, add dependancy only to the same tile.
991 kvz_threadqueue_job_dep_add(main_state->children[i].tqj_recon_done, main_state->children[i].previous_encoder_state->tqj_recon_done);
992 } else
993 #endif
994 {
995 // Add dependancy to each child in the previous frame.
996 for (int child_id = 0; main_state->children[child_id].encoder_control; ++child_id) {
997 kvz_threadqueue_job_dep_add(main_state->children[i].tqj_recon_done, main_state->children[child_id].previous_encoder_state->tqj_recon_done);
998 }
999 }
1000 }
1001 kvz_threadqueue_submit(main_state->encoder_control->threadqueue, main_state->children[i].tqj_recon_done);
1002 } else {
1003 //Wavefront rows have parallelism at LCU level, so we should not launch multiple threads here!
1004 //FIXME: add an assert: we can only have wavefront children
1005 encoder_state_worker_encode_children(&(main_state->children[i]));
1006 }
1007 }
1008 } else {
1009 for (int i = 0; main_state->children[i].encoder_control; ++i) {
1010 encoder_state_worker_encode_children(&(main_state->children[i]));
1011 }
1012 }
1013 } else {
1014 switch (main_state->type) {
1015 case ENCODER_STATE_TYPE_TILE:
1016 case ENCODER_STATE_TYPE_SLICE:
1017 case ENCODER_STATE_TYPE_WAVEFRONT_ROW:
1018 encoder_state_encode_leaf(main_state);
1019 break;
1020 default:
1021 fprintf(stderr, "Unsupported leaf type %c!\n", main_state->type);
1022 assert(0);
1023 }
1024 }
1025 }
1026
1027
encoder_ref_insertion_sort(const encoder_state_t * const state,uint8_t reflist[16],uint8_t length,bool reverse)1028 static void encoder_ref_insertion_sort(const encoder_state_t *const state,
1029 uint8_t reflist[16],
1030 uint8_t length,
1031 bool reverse)
1032 {
1033
1034 for (uint8_t i = 1; i < length; ++i) {
1035 const uint8_t cur_idx = reflist[i];
1036 const int32_t cur_poc = state->frame->ref->pocs[cur_idx];
1037 int8_t j = i;
1038 while ((j > 0 && !reverse && cur_poc > state->frame->ref->pocs[reflist[j - 1]]) ||
1039 (j > 0 && reverse && cur_poc < state->frame->ref->pocs[reflist[j - 1]]))
1040 {
1041 reflist[j] = reflist[j - 1];
1042 --j;
1043 }
1044 reflist[j] = cur_idx;
1045 }
1046 }
1047
1048 /**
1049 * \brief Generate reference picture lists.
1050 *
1051 * \param state main encoder state
1052 */
kvz_encoder_create_ref_lists(const encoder_state_t * const state)1053 void kvz_encoder_create_ref_lists(const encoder_state_t *const state)
1054 {
1055 const kvz_config *cfg = &state->encoder_control->cfg;
1056
1057 FILL_ARRAY(state->frame->ref_LX_size, 0, 2);
1058
1059 int num_negative = 0;
1060 int num_positive = 0;
1061
1062 // Add positive references to L1 list
1063 for (int i = 0; i < state->frame->ref->used_size; i++) {
1064 if (state->frame->ref->pocs[i] > state->frame->poc) {
1065 state->frame->ref_LX[1][state->frame->ref_LX_size[1]] = i;
1066 state->frame->ref_LX_size[1] += 1;
1067 num_positive++;
1068 }
1069 }
1070
1071 // Add negative references to L1 list when bipred is enabled and GOP is
1072 // either disabled or does not use picture reordering.
1073 bool l1_negative_refs =
1074 (cfg->bipred && (cfg->gop_len == 0 || cfg->gop_lowdelay));
1075
1076 // Add negative references to L0 and L1 lists.
1077 for (int i = 0; i < state->frame->ref->used_size; i++) {
1078 if (state->frame->ref->pocs[i] < state->frame->poc) {
1079 state->frame->ref_LX[0][state->frame->ref_LX_size[0]] = i;
1080 state->frame->ref_LX_size[0] += 1;
1081 if (l1_negative_refs) {
1082 state->frame->ref_LX[1][state->frame->ref_LX_size[1]] = i;
1083 state->frame->ref_LX_size[1] += 1;
1084 }
1085 num_negative++;
1086 }
1087 }
1088
1089 // Fill the rest with -1.
1090 for (int i = state->frame->ref_LX_size[0]; i < 16; i++) {
1091 state->frame->ref_LX[0][i] = 0xff;
1092 }
1093 for (int i = state->frame->ref_LX_size[1]; i < 16; i++) {
1094 state->frame->ref_LX[1][i] = 0xff;
1095 }
1096
1097 // Sort reference lists.
1098 encoder_ref_insertion_sort(state, state->frame->ref_LX[0], num_negative, false);
1099 encoder_ref_insertion_sort(state, state->frame->ref_LX[1], num_positive, true);
1100 if (l1_negative_refs) {
1101 encoder_ref_insertion_sort(state, state->frame->ref_LX[1] + num_positive, num_negative, false);
1102 }
1103 }
1104
1105 /**
1106 * \brief Remove any references that should no longer be used.
1107 */
encoder_state_remove_refs(encoder_state_t * state)1108 static void encoder_state_remove_refs(encoder_state_t *state) {
1109 const encoder_control_t * const encoder = state->encoder_control;
1110
1111 int neg_refs = encoder->cfg.gop[state->frame->gop_offset].ref_neg_count;
1112 int pos_refs = encoder->cfg.gop[state->frame->gop_offset].ref_pos_count;
1113
1114 unsigned target_ref_num;
1115 if (encoder->cfg.gop_len) {
1116 target_ref_num = neg_refs + pos_refs;
1117 } else {
1118 target_ref_num = encoder->cfg.ref_frames;
1119 }
1120
1121 if (state->frame->pictype == KVZ_NAL_IDR_W_RADL ||
1122 state->frame->pictype == KVZ_NAL_IDR_N_LP)
1123 {
1124 target_ref_num = 0;
1125 }
1126
1127 if (encoder->cfg.gop_len && target_ref_num > 0) {
1128 // With GOP in use, go through all the existing reference pictures and
1129 // remove any picture that is not referenced by the current picture.
1130
1131 for (int ref = state->frame->ref->used_size - 1; ref >= 0; --ref) {
1132 bool is_referenced = false;
1133
1134 int ref_poc = state->frame->ref->pocs[ref];
1135
1136 for (int i = 0; i < neg_refs; i++) {
1137 int ref_relative_poc = -encoder->cfg.gop[state->frame->gop_offset].ref_neg[i];
1138 if (ref_poc == state->frame->poc + ref_relative_poc) {
1139 is_referenced = true;
1140 break;
1141 }
1142 }
1143
1144 for (int i = 0; i < pos_refs; i++) {
1145 int ref_relative_poc = encoder->cfg.gop[state->frame->gop_offset].ref_pos[i];
1146 if (ref_poc == state->frame->poc + ref_relative_poc) {
1147 is_referenced = true;
1148 break;
1149 }
1150 }
1151
1152 if (ref_poc < state->frame->irap_poc &&
1153 state->frame->irap_poc < state->frame->poc)
1154 {
1155 // Trailing frames cannot refer to leading frames.
1156 is_referenced = false;
1157 }
1158
1159 if (encoder->cfg.intra_period > 0 &&
1160 ref_poc < state->frame->irap_poc - encoder->cfg.intra_period)
1161 {
1162 // No frame can refer past the two preceding IRAP frames.
1163 is_referenced = false;
1164 }
1165
1166 if (!is_referenced) {
1167 // This reference is not referred to by this frame, it must be removed.
1168 kvz_image_list_rem(state->frame->ref, ref);
1169 }
1170 }
1171 } else {
1172 // Without GOP, remove the oldest picture.
1173 while (state->frame->ref->used_size > target_ref_num) {
1174 int8_t oldest_ref = state->frame->ref->used_size - 1;
1175 kvz_image_list_rem(state->frame->ref, oldest_ref);
1176 }
1177 }
1178
1179 assert(state->frame->ref->used_size <= target_ref_num);
1180 }
1181
encoder_set_source_picture(encoder_state_t * const state,kvz_picture * frame)1182 static void encoder_set_source_picture(encoder_state_t * const state, kvz_picture* frame)
1183 {
1184 assert(!state->tile->frame->source);
1185 assert(!state->tile->frame->rec);
1186
1187 state->tile->frame->source = frame;
1188 if (state->encoder_control->cfg.lossless) {
1189 // In lossless mode, the reconstruction is equal to the source frame.
1190 state->tile->frame->rec = kvz_image_copy_ref(frame);
1191 } else {
1192 state->tile->frame->rec = kvz_image_alloc(state->encoder_control->chroma_format, frame->width, frame->height);
1193 state->tile->frame->rec->dts = frame->dts;
1194 state->tile->frame->rec->pts = frame->pts;
1195 }
1196
1197 kvz_videoframe_set_poc(state->tile->frame, state->frame->poc);
1198 }
1199
encoder_state_init_children(encoder_state_t * const state)1200 static void encoder_state_init_children(encoder_state_t * const state) {
1201 kvz_bitstream_clear(&state->stream);
1202
1203 if (state->is_leaf) {
1204 //Leaf states have cabac and context
1205 kvz_cabac_start(&state->cabac);
1206 kvz_init_contexts(state, state->encoder_control->cfg.set_qp_in_cu ? 26 : state->frame->QP, state->frame->slicetype);
1207 }
1208
1209 //Clear the jobs
1210 kvz_threadqueue_free_job(&state->tqj_bitstream_written);
1211 kvz_threadqueue_free_job(&state->tqj_recon_done);
1212
1213 //Copy the constraint pointer
1214 // TODO: Try to do it in the if (state->is_leaf)
1215 //if (state->parent != NULL) {
1216 // state->constraint = state->parent->constraint;
1217 //}
1218
1219 for (int i = 0; state->children[i].encoder_control; ++i) {
1220 encoder_state_init_children(&state->children[i]);
1221 }
1222 }
1223
normalize_lcu_weights(encoder_state_t * const state)1224 static void normalize_lcu_weights(encoder_state_t * const state)
1225 {
1226 if (state->frame->num == 0) return;
1227
1228 const uint32_t num_lcus = state->encoder_control->in.width_in_lcu *
1229 state->encoder_control->in.height_in_lcu;
1230 double sum = 0.0;
1231 for (uint32_t i = 0; i < num_lcus; i++) {
1232 sum += state->frame->lcu_stats[i].weight;
1233 }
1234
1235 for (uint32_t i = 0; i < num_lcus; i++) {
1236 state->frame->lcu_stats[i].weight /= sum;
1237 }
1238 }
1239
1240 // Check if lcu is edge lcu. Return false if frame dimensions are 64 divisible
edge_lcu(int id,int lcus_x,int lcus_y,bool xdiv64,bool ydiv64)1241 static bool edge_lcu(int id, int lcus_x, int lcus_y, bool xdiv64, bool ydiv64)
1242 {
1243 if (xdiv64 && ydiv64) {
1244 return false;
1245 }
1246 int last_row_first_id = (lcus_y - 1) * lcus_x;
1247 if ((id % lcus_x == lcus_x - 1 && !xdiv64) || (id >= last_row_first_id && !ydiv64)) {
1248 return true;
1249 }
1250 else {
1251 return false;
1252 }
1253 }
1254
encoder_state_init_new_frame(encoder_state_t * const state,kvz_picture * frame)1255 static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_picture* frame) {
1256 assert(state->type == ENCODER_STATE_TYPE_MAIN);
1257
1258 const kvz_config * const cfg = &state->encoder_control->cfg;
1259
1260 encoder_set_source_picture(state, frame);
1261
1262 assert(!state->tile->frame->cu_array);
1263 state->tile->frame->cu_array = kvz_cu_array_alloc(
1264 state->tile->frame->width,
1265 state->tile->frame->height
1266 );
1267
1268 // Variance adaptive quantization
1269 if (cfg->vaq) {
1270 const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
1271 double d = cfg->vaq * 0.1; // Empirically decided constant. Affects delta-QP strength
1272
1273 // Calculate frame pixel variance
1274 uint32_t len = state->tile->frame->width * state->tile->frame->height;
1275 uint32_t c_len = len / 4;
1276 double frame_var = kvz_pixel_var(state->tile->frame->source->y, len);
1277 if (has_chroma) {
1278 frame_var += kvz_pixel_var(state->tile->frame->source->u, c_len);
1279 frame_var += kvz_pixel_var(state->tile->frame->source->v, c_len);
1280 }
1281
1282 // Loop through LCUs
1283 // For each LCU calculate: D * (log(LCU pixel variance) - log(frame pixel variance))
1284 unsigned x_lim = state->tile->frame->width_in_lcu;
1285 unsigned y_lim = state->tile->frame->height_in_lcu;
1286
1287 unsigned id = 0;
1288 for (int y = 0; y < y_lim; ++y) {
1289 for (int x = 0; x < x_lim; ++x) {
1290 kvz_pixel tmp[LCU_LUMA_SIZE];
1291 int pxl_x = x * LCU_WIDTH;
1292 int pxl_y = y * LCU_WIDTH;
1293 int x_max = MIN(pxl_x + LCU_WIDTH, frame->width) - pxl_x;
1294 int y_max = MIN(pxl_y + LCU_WIDTH, frame->height) - pxl_y;
1295
1296 bool xdiv64 = false;
1297 bool ydiv64 = false;
1298 if (frame->width % 64 == 0) xdiv64 = true;
1299 if (frame->height % 64 == 0) ydiv64 = true;
1300
1301 // Luma variance
1302 if (!edge_lcu(id, x_lim, y_lim, xdiv64, ydiv64)) {
1303 kvz_pixels_blit(&state->tile->frame->source->y[pxl_x + pxl_y * state->tile->frame->source->stride], tmp,
1304 x_max, y_max, state->tile->frame->source->stride, LCU_WIDTH);
1305 } else {
1306 // Extend edge pixels for edge lcus
1307 for (int y = 0; y < LCU_WIDTH; y++) {
1308 for (int x = 0; x < LCU_WIDTH; x++) {
1309 int src_y = CLIP(0, frame->height - 1, pxl_y + y);
1310 int src_x = CLIP(0, frame->width - 1, pxl_x + x);
1311 tmp[y * LCU_WIDTH + x] = state->tile->frame->source->y[src_y * state->tile->frame->source->stride + src_x];
1312 }
1313 }
1314 }
1315
1316 double lcu_var = kvz_pixel_var(tmp, LCU_LUMA_SIZE);
1317
1318 if (has_chroma) {
1319 // Add chroma variance if not monochrome
1320 int32_t c_stride = state->tile->frame->source->stride >> 1;
1321 kvz_pixel chromau_tmp[LCU_CHROMA_SIZE];
1322 kvz_pixel chromav_tmp[LCU_CHROMA_SIZE];
1323 int lcu_chroma_width = LCU_WIDTH >> 1;
1324 int c_pxl_x = x * lcu_chroma_width;
1325 int c_pxl_y = y * lcu_chroma_width;
1326 int c_x_max = MIN(c_pxl_x + lcu_chroma_width, frame->width >> 1) - c_pxl_x;
1327 int c_y_max = MIN(c_pxl_y + lcu_chroma_width, frame->height >> 1) - c_pxl_y;
1328
1329 if (!edge_lcu(id, x_lim, y_lim, xdiv64, ydiv64)) {
1330 kvz_pixels_blit(&state->tile->frame->source->u[c_pxl_x + c_pxl_y * c_stride], chromau_tmp, c_x_max, c_y_max, c_stride, lcu_chroma_width);
1331 kvz_pixels_blit(&state->tile->frame->source->v[c_pxl_x + c_pxl_y * c_stride], chromav_tmp, c_x_max, c_y_max, c_stride, lcu_chroma_width);
1332 }
1333 else {
1334 for (int y = 0; y < lcu_chroma_width; y++) {
1335 for (int x = 0; x < lcu_chroma_width; x++) {
1336 int src_y = CLIP(0, (frame->height >> 1) - 1, c_pxl_y + y);
1337 int src_x = CLIP(0, (frame->width >> 1) - 1, c_pxl_x + x);
1338 chromau_tmp[y * lcu_chroma_width + x] = state->tile->frame->source->u[src_y * c_stride + src_x];
1339 chromav_tmp[y * lcu_chroma_width + x] = state->tile->frame->source->v[src_y * c_stride + src_x];
1340 }
1341 }
1342 }
1343 lcu_var += kvz_pixel_var(chromau_tmp, LCU_CHROMA_SIZE);
1344 lcu_var += kvz_pixel_var(chromav_tmp, LCU_CHROMA_SIZE);
1345 }
1346
1347 state->frame->aq_offsets[id] = d * (log(lcu_var) - log(frame_var));
1348 id++;
1349 }
1350 }
1351 }
1352 // Variance adaptive quantization - END
1353
1354 // Use this flag to handle closed gop irap picture selection.
1355 // If set to true, irap is already set and we avoid
1356 // setting it based on the intra period
1357 bool is_closed_normal_gop = false;
1358
1359 encoder_state_t *previous = state->previous_encoder_state;
1360 int owf = MIN(state->encoder_control->cfg.owf, state->frame->num);
1361
1362 const int layer = state->encoder_control->cfg.gop[state->frame->gop_offset].layer;
1363
1364 while (--owf > 0 && layer != state->encoder_control->cfg.gop[previous->frame->gop_offset].layer) {
1365 previous = previous->previous_encoder_state;
1366 }
1367
1368 if (owf == 0) previous = state;
1369 state->frame->previous_layer_state = previous;
1370 // Set POC.
1371 if (state->frame->num == 0) {
1372 state->frame->poc = 0;
1373 } else if (cfg->gop_len && !cfg->gop_lowdelay) {
1374
1375 int32_t framenum = state->frame->num - 1;
1376 // Handle closed GOP
1377 // Closed GOP structure has an extra IDR between the GOPs
1378 if (cfg->intra_period > 0 && !cfg->open_gop) {
1379 is_closed_normal_gop = true;
1380 if (framenum % (cfg->intra_period + 1) == cfg->intra_period) {
1381 // Insert IDR before each new GOP after intra period in closed GOP configuration
1382 state->frame->poc = 0;
1383 } else {
1384 // Calculate frame number again and use that for the POC
1385 framenum = framenum % (cfg->intra_period + 1);
1386 int32_t poc_offset = cfg->gop[state->frame->gop_offset].poc_offset;
1387 state->frame->poc = framenum - framenum % cfg->gop_len + poc_offset;
1388 // This should not be an irap picture in closed GOP
1389 state->frame->is_irap = false;
1390 }
1391 } else { // Open GOP
1392 // Calculate POC according to the global frame counter and GOP structure
1393 int32_t poc_offset = cfg->gop[state->frame->gop_offset].poc_offset;
1394 state->frame->poc = framenum - framenum % cfg->gop_len + poc_offset;
1395 }
1396
1397 kvz_videoframe_set_poc(state->tile->frame, state->frame->poc);
1398 } else if (cfg->intra_period > 0) {
1399 state->frame->poc = state->frame->num % cfg->intra_period;
1400 } else {
1401 state->frame->poc = state->frame->num;
1402 }
1403
1404 // Check whether the frame is a keyframe or not.
1405 if (state->frame->num == 0 || state->frame->poc == 0) {
1406 state->frame->is_irap = true;
1407 } else if(!is_closed_normal_gop) { // In closed-GOP IDR frames are poc==0 so skip this check
1408 state->frame->is_irap =
1409 cfg->intra_period > 0 &&
1410 (state->frame->poc % cfg->intra_period) == 0;
1411 }
1412 if (state->frame->is_irap) {
1413 state->frame->irap_poc = state->frame->poc;
1414 }
1415
1416 // Set pictype.
1417 if (state->frame->is_irap) {
1418 if (state->frame->num == 0 ||
1419 cfg->intra_period == 1 ||
1420 cfg->gop_len == 0 ||
1421 cfg->gop_lowdelay ||
1422 !cfg->open_gop) // Closed GOP uses IDR pictures
1423 {
1424 state->frame->pictype = KVZ_NAL_IDR_W_RADL;
1425 } else {
1426 state->frame->pictype = KVZ_NAL_CRA_NUT;
1427 }
1428 } else if (state->frame->poc < state->frame->irap_poc) {
1429 state->frame->pictype = KVZ_NAL_RASL_R;
1430 } else {
1431 state->frame->pictype = KVZ_NAL_TRAIL_R;
1432 }
1433
1434 encoder_state_remove_refs(state);
1435 kvz_encoder_create_ref_lists(state);
1436
1437 // Set slicetype.
1438 if (state->frame->is_irap) {
1439 state->frame->slicetype = KVZ_SLICE_I;
1440 } else if (state->frame->ref_LX_size[1] > 0) {
1441 state->frame->slicetype = KVZ_SLICE_B;
1442 } else {
1443 state->frame->slicetype = KVZ_SLICE_P;
1444 }
1445
1446 if (cfg->target_bitrate > 0 && state->frame->num > cfg->owf) {
1447 normalize_lcu_weights(state);
1448 }
1449 state->frame->cur_frame_bits_coded = 0;
1450
1451 switch (state->encoder_control->cfg.rc_algorithm) {
1452 case KVZ_NO_RC:
1453 case KVZ_LAMBDA:
1454 kvz_set_picture_lambda_and_qp(state);
1455 break;
1456 case KVZ_OBA:
1457 kvz_estimate_pic_lambda(state);
1458 break;
1459 default:
1460 assert(0);
1461 }
1462
1463 encoder_state_init_children(state);
1464 }
1465
_encode_one_frame_add_bitstream_deps(const encoder_state_t * const state,threadqueue_job_t * const job)1466 static void _encode_one_frame_add_bitstream_deps(const encoder_state_t * const state, threadqueue_job_t * const job) {
1467 int i;
1468 for (i = 0; state->children[i].encoder_control; ++i) {
1469 _encode_one_frame_add_bitstream_deps(&state->children[i], job);
1470 }
1471 if (state->tqj_bitstream_written) {
1472 kvz_threadqueue_job_dep_add(job, state->tqj_bitstream_written);
1473 }
1474 if (state->tqj_recon_done) {
1475 kvz_threadqueue_job_dep_add(job, state->tqj_recon_done);
1476 }
1477 }
1478
1479
kvz_encode_one_frame(encoder_state_t * const state,kvz_picture * frame)1480 void kvz_encode_one_frame(encoder_state_t * const state, kvz_picture* frame)
1481 {
1482 encoder_state_init_new_frame(state, frame);
1483 encoder_state_encode(state);
1484
1485 threadqueue_job_t *job =
1486 kvz_threadqueue_job_create(kvz_encoder_state_worker_write_bitstream, state);
1487
1488 _encode_one_frame_add_bitstream_deps(state, job);
1489 if (state->previous_encoder_state != state && state->previous_encoder_state->tqj_bitstream_written) {
1490 //We need to depend on previous bitstream generation
1491 kvz_threadqueue_job_dep_add(job, state->previous_encoder_state->tqj_bitstream_written);
1492 }
1493 kvz_threadqueue_submit(state->encoder_control->threadqueue, job);
1494 assert(!state->tqj_bitstream_written);
1495 state->tqj_bitstream_written = job;
1496
1497 state->frame->done = 0;
1498 }
1499
1500
1501 /**
1502 * Prepare the encoder state for encoding the next frame.
1503 *
1504 * - Add the previous reconstructed picture as a reference, if needed.
1505 * - Free the previous reconstructed and source pictures.
1506 * - Create a new cu array, if needed.
1507 * - Update frame count and POC.
1508 */
kvz_encoder_prepare(encoder_state_t * state)1509 void kvz_encoder_prepare(encoder_state_t *state)
1510 {
1511 const encoder_control_t * const encoder = state->encoder_control;
1512
1513 // The previous frame must be done before the next one is started.
1514 assert(state->frame->done);
1515
1516 if (state->frame->num == -1) {
1517 // We're at the first frame, so don't care about all this stuff.
1518 state->frame->num = 0;
1519 state->frame->poc = 0;
1520 state->frame->irap_poc = 0;
1521 assert(!state->tile->frame->source);
1522 assert(!state->tile->frame->rec);
1523 assert(!state->tile->frame->cu_array);
1524 state->frame->prepared = 1;
1525
1526 return;
1527 }
1528
1529 // NOTE: prev_state is equal to state when OWF is zero
1530 encoder_state_t *prev_state = state->previous_encoder_state;
1531
1532 if (state->previous_encoder_state != state) {
1533 kvz_cu_array_free(&state->tile->frame->cu_array);
1534 unsigned width = state->tile->frame->width_in_lcu * LCU_WIDTH;
1535 unsigned height = state->tile->frame->height_in_lcu * LCU_WIDTH;
1536 state->tile->frame->cu_array = kvz_cu_array_alloc(width, height);
1537
1538 kvz_image_list_copy_contents(state->frame->ref, prev_state->frame->ref);
1539 kvz_encoder_create_ref_lists(state);
1540 }
1541
1542 if (!encoder->cfg.gop_len ||
1543 !prev_state->frame->poc ||
1544 encoder->cfg.gop[prev_state->frame->gop_offset].is_ref) {
1545
1546 // Store current list of POCs for use in TMVP derivation
1547 memcpy(prev_state->tile->frame->rec->ref_pocs, state->frame->ref->pocs, sizeof(int32_t)*state->frame->ref->used_size);
1548
1549 // Add previous reconstructed picture as a reference
1550 kvz_image_list_add(state->frame->ref,
1551 prev_state->tile->frame->rec,
1552 prev_state->tile->frame->cu_array,
1553 prev_state->frame->poc,
1554 prev_state->frame->ref_LX);
1555 kvz_cu_array_free(&state->tile->frame->cu_array);
1556 unsigned height = state->tile->frame->height_in_lcu * LCU_WIDTH;
1557 unsigned width = state->tile->frame->width_in_lcu * LCU_WIDTH;
1558 state->tile->frame->cu_array = kvz_cu_array_alloc(width, height);
1559 }
1560
1561 // Remove source and reconstructed picture.
1562 kvz_image_free(state->tile->frame->source);
1563 state->tile->frame->source = NULL;
1564
1565 kvz_image_free(state->tile->frame->rec);
1566 state->tile->frame->rec = NULL;
1567
1568 kvz_cu_array_free(&state->tile->frame->cu_array);
1569
1570 // Update POC and frame count.
1571 state->frame->num = prev_state->frame->num + 1;
1572 state->frame->poc = prev_state->frame->poc + 1;
1573 state->frame->irap_poc = prev_state->frame->irap_poc;
1574
1575 state->frame->prepared = 1;
1576
1577
1578 }
1579
kvz_get_scan_order(int8_t cu_type,int intra_mode,int depth)1580 coeff_scan_order_t kvz_get_scan_order(int8_t cu_type, int intra_mode, int depth)
1581 {
1582 // Scan mode is diagonal, except for 4x4+8x8 luma and 4x4 chroma, where:
1583 // - angular 6-14 = vertical
1584 // - angular 22-30 = horizontal
1585 if (cu_type == CU_INTRA && depth >= 3) {
1586 if (intra_mode >= 6 && intra_mode <= 14) {
1587 return SCAN_VER;
1588 } else if (intra_mode >= 22 && intra_mode <= 30) {
1589 return SCAN_HOR;
1590 }
1591 }
1592
1593 return SCAN_DIAG;
1594 }
1595
kvz_get_lcu_stats(encoder_state_t * state,int lcu_x,int lcu_y)1596 lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y)
1597 {
1598 const int index = lcu_x + state->tile->lcu_offset_x +
1599 (lcu_y + state->tile->lcu_offset_y) *
1600 state->encoder_control->in.width_in_lcu;
1601 return &state->frame->lcu_stats[index];
1602 }
1603
kvz_get_cu_ref_qp(const encoder_state_t * state,int x,int y,int last_qp)1604 int kvz_get_cu_ref_qp(const encoder_state_t *state, int x, int y, int last_qp)
1605 {
1606 const encoder_control_t *ctrl = state->encoder_control;
1607 const cu_array_t *cua = state->tile->frame->cu_array;
1608 // Quantization group width
1609 const int qg_width = LCU_WIDTH >> MIN(ctrl->max_qp_delta_depth, kvz_cu_array_at_const(cua, x, y)->depth);
1610
1611 // Coordinates of the top-left corner of the quantization group
1612 const int x_qg = x & ~(qg_width - 1);
1613 const int y_qg = y & ~(qg_width - 1);
1614
1615 int qp_pred_a = last_qp;
1616 if (x_qg % LCU_WIDTH > 0) {
1617 qp_pred_a = kvz_cu_array_at_const(cua, x_qg - 1, y_qg)->qp;
1618 }
1619
1620 int qp_pred_b = last_qp;
1621 if (y_qg % LCU_WIDTH > 0) {
1622 qp_pred_b = kvz_cu_array_at_const(cua, x_qg, y_qg - 1)->qp;
1623 }
1624
1625 return ((qp_pred_a + qp_pred_b + 1) >> 1);
1626 }
1627