1 /*
2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "vp9/encoder/vp9_encodeframe.h"
12 #include "vp9/encoder/vp9_encoder.h"
13 #include "vp9/encoder/vp9_ethread.h"
14 
15 static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
16   int i, j, k, l, m, n;
17 
18   for (i = 0; i < REFERENCE_MODES; i++)
19     td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
20 
21   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
22     td->rd_counts.filter_diff[i] += td_t->rd_counts.filter_diff[i];
23 
24   for (i = 0; i < TX_MODES; i++)
25     td->rd_counts.tx_select_diff[i] += td_t->rd_counts.tx_select_diff[i];
26 
27   for (i = 0; i < TX_SIZES; i++)
28     for (j = 0; j < PLANE_TYPES; j++)
29       for (k = 0; k < REF_TYPES; k++)
30         for (l = 0; l < COEF_BANDS; l++)
31           for (m = 0; m < COEFF_CONTEXTS; m++)
32             for (n = 0; n < ENTROPY_TOKENS; n++)
33               td->rd_counts.coef_counts[i][j][k][l][m][n] +=
34                   td_t->rd_counts.coef_counts[i][j][k][l][m][n];
35 }
36 
37 static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
38   VP9_COMP *const cpi = thread_data->cpi;
39   const VP9_COMMON *const cm = &cpi->common;
40   const int tile_cols = 1 << cm->log2_tile_cols;
41   const int tile_rows = 1 << cm->log2_tile_rows;
42   int t;
43 
44   (void) unused;
45 
46   for (t = thread_data->start; t < tile_rows * tile_cols;
47       t += cpi->num_workers) {
48     int tile_row = t / tile_cols;
49     int tile_col = t % tile_cols;
50 
51     vp9_encode_tile(cpi, thread_data->td, tile_row, tile_col);
52   }
53 
54   return 0;
55 }
56 
57 void vp9_encode_tiles_mt(VP9_COMP *cpi) {
58   VP9_COMMON *const cm = &cpi->common;
59   const int tile_cols = 1 << cm->log2_tile_cols;
60   const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
61   const int num_workers = MIN(cpi->oxcf.max_threads, tile_cols);
62   int i;
63 
64   vp9_init_tile_data(cpi);
65 
66   // Only run once to create threads and allocate thread data.
67   if (cpi->num_workers == 0) {
68     CHECK_MEM_ERROR(cm, cpi->workers,
69                     vpx_malloc(num_workers * sizeof(*cpi->workers)));
70 
71     CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
72                     vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
73 
74     for (i = 0; i < num_workers; i++) {
75       VP9Worker *const worker = &cpi->workers[i];
76       EncWorkerData *thread_data = &cpi->tile_thr_data[i];
77 
78       ++cpi->num_workers;
79       winterface->init(worker);
80 
81       if (i < num_workers - 1) {
82         thread_data->cpi = cpi;
83 
84         // Allocate thread data.
85         CHECK_MEM_ERROR(cm, thread_data->td,
86                         vpx_memalign(32, sizeof(*thread_data->td)));
87         vp9_zero(*thread_data->td);
88 
89         // Set up pc_tree.
90         thread_data->td->leaf_tree = NULL;
91         thread_data->td->pc_tree = NULL;
92         vp9_setup_pc_tree(cm, thread_data->td);
93 
94         // Allocate frame counters in thread data.
95         CHECK_MEM_ERROR(cm, thread_data->td->counts,
96                         vpx_calloc(1, sizeof(*thread_data->td->counts)));
97 
98         // Create threads
99         if (!winterface->reset(worker))
100           vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
101                              "Tile encoder thread creation failed");
102       } else {
103         // Main thread acts as a worker and uses the thread data in cpi.
104         thread_data->cpi = cpi;
105         thread_data->td = &cpi->td;
106       }
107 
108       winterface->sync(worker);
109     }
110   }
111 
112   for (i = 0; i < num_workers; i++) {
113     VP9Worker *const worker = &cpi->workers[i];
114     EncWorkerData *thread_data;
115 
116     worker->hook = (VP9WorkerHook)enc_worker_hook;
117     worker->data1 = &cpi->tile_thr_data[i];
118     worker->data2 = NULL;
119     thread_data = (EncWorkerData*)worker->data1;
120 
121     // Before encoding a frame, copy the thread data from cpi.
122     if (thread_data->td != &cpi->td) {
123       thread_data->td->mb = cpi->td.mb;
124       thread_data->td->rd_counts = cpi->td.rd_counts;
125     }
126     if (thread_data->td->counts != &cpi->common.counts) {
127       memcpy(thread_data->td->counts, &cpi->common.counts,
128              sizeof(cpi->common.counts));
129     }
130 
131     // Handle use_nonrd_pick_mode case.
132     if (cpi->sf.use_nonrd_pick_mode) {
133       MACROBLOCK *const x = &thread_data->td->mb;
134       MACROBLOCKD *const xd = &x->e_mbd;
135       struct macroblock_plane *const p = x->plane;
136       struct macroblockd_plane *const pd = xd->plane;
137       PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
138       int j;
139 
140       for (j = 0; j < MAX_MB_PLANE; ++j) {
141         p[j].coeff = ctx->coeff_pbuf[j][0];
142         p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
143         pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
144         p[j].eobs = ctx->eobs_pbuf[j][0];
145       }
146     }
147   }
148 
149   // Encode a frame
150   for (i = 0; i < num_workers; i++) {
151     VP9Worker *const worker = &cpi->workers[i];
152     EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
153 
154     // Set the starting tile for each thread.
155     thread_data->start = i;
156 
157     if (i == num_workers - 1)
158       winterface->execute(worker);
159     else
160       winterface->launch(worker);
161   }
162 
163   // Encoding ends.
164   for (i = 0; i < num_workers; i++) {
165     VP9Worker *const worker = &cpi->workers[i];
166     winterface->sync(worker);
167   }
168 
169   for (i = 0; i < num_workers; i++) {
170     VP9Worker *const worker = &cpi->workers[i];
171     EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
172 
173     // Accumulate counters.
174     if (i < num_workers - 1) {
175       vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
176       accumulate_rd_opt(&cpi->td, thread_data->td);
177     }
178   }
179 }
180