1 /* 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "vp9/encoder/vp9_encodeframe.h" 12 #include "vp9/encoder/vp9_encoder.h" 13 #include "vp9/encoder/vp9_ethread.h" 14 15 static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { 16 int i, j, k, l, m, n; 17 18 for (i = 0; i < REFERENCE_MODES; i++) 19 td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i]; 20 21 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 22 td->rd_counts.filter_diff[i] += td_t->rd_counts.filter_diff[i]; 23 24 for (i = 0; i < TX_MODES; i++) 25 td->rd_counts.tx_select_diff[i] += td_t->rd_counts.tx_select_diff[i]; 26 27 for (i = 0; i < TX_SIZES; i++) 28 for (j = 0; j < PLANE_TYPES; j++) 29 for (k = 0; k < REF_TYPES; k++) 30 for (l = 0; l < COEF_BANDS; l++) 31 for (m = 0; m < COEFF_CONTEXTS; m++) 32 for (n = 0; n < ENTROPY_TOKENS; n++) 33 td->rd_counts.coef_counts[i][j][k][l][m][n] += 34 td_t->rd_counts.coef_counts[i][j][k][l][m][n]; 35 } 36 37 static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) { 38 VP9_COMP *const cpi = thread_data->cpi; 39 const VP9_COMMON *const cm = &cpi->common; 40 const int tile_cols = 1 << cm->log2_tile_cols; 41 const int tile_rows = 1 << cm->log2_tile_rows; 42 int t; 43 44 (void) unused; 45 46 for (t = thread_data->start; t < tile_rows * tile_cols; 47 t += cpi->num_workers) { 48 int tile_row = t / tile_cols; 49 int tile_col = t % tile_cols; 50 51 vp9_encode_tile(cpi, thread_data->td, tile_row, tile_col); 52 } 53 54 return 0; 55 } 56 57 void vp9_encode_tiles_mt(VP9_COMP *cpi) { 58 VP9_COMMON *const cm = &cpi->common; 59 const int tile_cols = 1 << cm->log2_tile_cols; 60 const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); 61 const int num_workers = MIN(cpi->oxcf.max_threads, tile_cols); 62 int i; 63 64 vp9_init_tile_data(cpi); 65 66 // Only run once to create threads and allocate thread data. 67 if (cpi->num_workers == 0) { 68 CHECK_MEM_ERROR(cm, cpi->workers, 69 vpx_malloc(num_workers * sizeof(*cpi->workers))); 70 71 CHECK_MEM_ERROR(cm, cpi->tile_thr_data, 72 vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data))); 73 74 for (i = 0; i < num_workers; i++) { 75 VP9Worker *const worker = &cpi->workers[i]; 76 EncWorkerData *thread_data = &cpi->tile_thr_data[i]; 77 78 ++cpi->num_workers; 79 winterface->init(worker); 80 81 if (i < num_workers - 1) { 82 thread_data->cpi = cpi; 83 84 // Allocate thread data. 85 CHECK_MEM_ERROR(cm, thread_data->td, 86 vpx_memalign(32, sizeof(*thread_data->td))); 87 vp9_zero(*thread_data->td); 88 89 // Set up pc_tree. 90 thread_data->td->leaf_tree = NULL; 91 thread_data->td->pc_tree = NULL; 92 vp9_setup_pc_tree(cm, thread_data->td); 93 94 // Allocate frame counters in thread data. 95 CHECK_MEM_ERROR(cm, thread_data->td->counts, 96 vpx_calloc(1, sizeof(*thread_data->td->counts))); 97 98 // Create threads 99 if (!winterface->reset(worker)) 100 vpx_internal_error(&cm->error, VPX_CODEC_ERROR, 101 "Tile encoder thread creation failed"); 102 } else { 103 // Main thread acts as a worker and uses the thread data in cpi. 104 thread_data->cpi = cpi; 105 thread_data->td = &cpi->td; 106 } 107 108 winterface->sync(worker); 109 } 110 } 111 112 for (i = 0; i < num_workers; i++) { 113 VP9Worker *const worker = &cpi->workers[i]; 114 EncWorkerData *thread_data; 115 116 worker->hook = (VP9WorkerHook)enc_worker_hook; 117 worker->data1 = &cpi->tile_thr_data[i]; 118 worker->data2 = NULL; 119 thread_data = (EncWorkerData*)worker->data1; 120 121 // Before encoding a frame, copy the thread data from cpi. 122 if (thread_data->td != &cpi->td) { 123 thread_data->td->mb = cpi->td.mb; 124 thread_data->td->rd_counts = cpi->td.rd_counts; 125 } 126 if (thread_data->td->counts != &cpi->common.counts) { 127 memcpy(thread_data->td->counts, &cpi->common.counts, 128 sizeof(cpi->common.counts)); 129 } 130 131 // Handle use_nonrd_pick_mode case. 132 if (cpi->sf.use_nonrd_pick_mode) { 133 MACROBLOCK *const x = &thread_data->td->mb; 134 MACROBLOCKD *const xd = &x->e_mbd; 135 struct macroblock_plane *const p = x->plane; 136 struct macroblockd_plane *const pd = xd->plane; 137 PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; 138 int j; 139 140 for (j = 0; j < MAX_MB_PLANE; ++j) { 141 p[j].coeff = ctx->coeff_pbuf[j][0]; 142 p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; 143 pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; 144 p[j].eobs = ctx->eobs_pbuf[j][0]; 145 } 146 } 147 } 148 149 // Encode a frame 150 for (i = 0; i < num_workers; i++) { 151 VP9Worker *const worker = &cpi->workers[i]; 152 EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; 153 154 // Set the starting tile for each thread. 155 thread_data->start = i; 156 157 if (i == num_workers - 1) 158 winterface->execute(worker); 159 else 160 winterface->launch(worker); 161 } 162 163 // Encoding ends. 164 for (i = 0; i < num_workers; i++) { 165 VP9Worker *const worker = &cpi->workers[i]; 166 winterface->sync(worker); 167 } 168 169 for (i = 0; i < num_workers; i++) { 170 VP9Worker *const worker = &cpi->workers[i]; 171 EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; 172 173 // Accumulate counters. 174 if (i < num_workers - 1) { 175 vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0); 176 accumulate_rd_opt(&cpi->td, thread_data->td); 177 } 178 } 179 } 180