1 /*
2 * Copyright (c) 2017-2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_vdenc_hevc.cpp
24 //! \brief Defines base class for HEVC VDEnc encoder.
25 //!
26
27 #include "codechal_vdenc_hevc.h"
28
29 //!< \cond SKIP_DOXYGEN
30 const uint8_t CodechalVdencHevcState::m_estRateThreshP0[7] =
31 {
32 4, 8, 12, 16, 20, 24, 28
33 };
34
35 const uint8_t CodechalVdencHevcState::m_estRateThreshB0[7] =
36 {
37 4, 8, 12, 16, 20, 24, 28
38 };
39
40 const uint8_t CodechalVdencHevcState::m_estRateThreshI0[7] =
41 {
42 4, 8, 12, 16, 20, 24, 28
43 };
44
45 const int8_t CodechalVdencHevcState::m_instRateThreshP0[4] =
46 {
47 40, 60, 80, 120
48 };
49
50 const int8_t CodechalVdencHevcState::m_instRateThreshB0[4] =
51 {
52 35, 60, 80, 120
53 };
54
55 const int8_t CodechalVdencHevcState::m_instRateThreshI0[4] =
56 {
57 40, 60, 90, 115
58 };
59
60 const uint16_t CodechalVdencHevcState::m_startGAdjFrame[4] =
61 {
62 10, 50, 100, 150
63 };
64
65 const uint8_t CodechalVdencHevcState::m_startGAdjMult[5] =
66 {
67 1, 1, 3, 2, 1
68 };
69
70 const uint8_t CodechalVdencHevcState::m_startGAdjDiv[5] =
71 {
72 40, 5, 5, 3, 1
73 };
74
75 const uint8_t CodechalVdencHevcState::m_rateRatioThreshold[7] =
76 {
77 40, 75, 97, 103, 125, 160, 0
78 };
79
80 const uint8_t CodechalVdencHevcState::m_rateRatioThresholdQP[8] =
81 {
82 253, 254, 255, 0, 1, 2, 3, 0
83 };
84
85 const uint32_t CodechalVdencHevcState::m_hucModeCostsIFrame[] = {
86 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
87 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
88 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
89 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
90 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
91 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
92 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
93 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
94 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
95 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
96 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
97 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
98 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
99 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
100 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
101 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
102 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
103 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
104 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
105 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
106 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
107 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
108 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
109 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
110 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
111 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
112 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
113 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
114 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
115 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
116 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
117 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
118 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
119 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
120 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
121 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
122 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
123 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
124 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
125 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
126 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
127 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
128 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
129 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
130 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
131 0x00000000, 0x00000000, 0x00000000, 0x00000000
132 };
133
134 const uint32_t CodechalVdencHevcState::m_hucModeCostsPbFrame[] = {
135 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
136 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
137 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
138 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
139 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
140 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
141 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
142 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
143 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
144 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
145 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
146 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
147 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
148 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
149 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
150 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
151 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
152 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
153 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
154 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
155 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
156 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
157 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
158 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
159 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
160 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
161 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
162 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
163 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
164 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
165 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
166 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
167 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
168 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
169 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
170 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
171 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
172 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
173 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
174 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
175 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
176 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
177 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
178 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
179 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
180 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b
181 };
182
183 const uint16_t CodechalVdencHevcState::m_sadQpLambdaI[] = {
184 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0004,
185 0x0005, 0x0006, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000E, 0x0010, 0x0012, 0x0014, 0x0016, 0x0019, 0x001C,
186 0x001F, 0x0023, 0x0027, 0x002C, 0x0032, 0x0038, 0x003E, 0x0046, 0x004F, 0x0058, 0x0063, 0x006F, 0x007D, 0x008C, 0x009D, 0x00B1,
187 0x00C6, 0x00DF, 0x00FA, 0x0118
188 };
189
190 // new table for visual quality improvement
191 const uint16_t CodechalVdencHevcState::m_sadQpLambdaI_VQI[] = {
192 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0004,
193 0x0005, 0x0006, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000D, 0x000F, 0x0011, 0x0014, 0x0017, 0x001A, 0x001E, 0x0022,
194 0x0027, 0x002D, 0x0033, 0x003B, 0x0043, 0x004D, 0x0057, 0x0064, 0x0072, 0x0082, 0x0095, 0x00A7, 0x00BB, 0x00D2, 0x00EC, 0x0109,
195 0x0129, 0x014E, 0x0177, 0x01A5
196 };
197
198 const uint16_t CodechalVdencHevcState::m_sadQpLambdaP[] = {
199 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0004, 0x0005,
200 0x0005, 0x0006, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000D, 0x000E, 0x0010, 0x0012, 0x0014, 0x0017, 0x001A, 0x001D,
201 0x0021, 0x0024, 0x0029, 0x002E, 0x0034, 0x003A, 0x0041, 0x0049, 0x0052, 0x005C, 0x0067, 0x0074, 0x0082, 0x0092, 0x00A4, 0x00B8,
202 0x00CE, 0x00E8, 0x0104, 0x0124
203 };
204
205 const uint16_t CodechalVdencHevcState::m_rdQpLambdaI[] = {
206 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0003, 0x0004, 0x0005,
207 0x0006, 0x0008, 0x000A, 0x000C, 0x000F, 0x0013, 0x0018, 0x001E, 0x0026, 0x0030, 0x003D, 0x004D, 0x0061, 0x007A, 0x009A, 0x00C2,
208 0x00F4, 0x0133, 0x0183, 0x01E8, 0x0266, 0x0306, 0x03CF, 0x04CD, 0x060C, 0x079F, 0x099A, 0x0C18, 0x0F3D, 0x1333, 0x1831, 0x1E7A,
209 0x2666, 0x3062, 0x3CF5, 0x4CCD
210 };
211
212 const uint16_t CodechalVdencHevcState::m_rdQpLambdaP[] = {
213 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0005,
214 0x0007, 0x0008, 0x000A, 0x000D, 0x0011, 0x0015, 0x001A, 0x0021, 0x002A, 0x0034, 0x0042, 0x0053, 0x0069, 0x0084, 0x00A6, 0x00D2,
215 0x0108, 0x014D, 0x01A3, 0x0210, 0x029A, 0x0347, 0x0421, 0x0533, 0x068D, 0x0841, 0x0A66, 0x0D1A, 0x1082, 0x14CD, 0x1A35, 0x2105,
216 0x299A, 0x346A, 0x4209, 0x5333
217 };
218
219 // Originial CodechalVdencHevcState::m_penaltyForIntraNonDC32x32PredMode table
220 const uint8_t CodechalVdencHevcState::m_penaltyForIntraNonDC32x32PredMode[] = {
221 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
222 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
223 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
224 0x00, 0x00, 0x00, 0x00
225 };
226
227 // New table for visual quality improvement
228 const uint8_t CodechalVdencHevcState::m_penaltyForIntraNonDC32x32PredMode_VQI[] = {
229 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
230 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0C, 0x12, 0x19, 0x1f, 0x25, 0x2C, 0x32, 0x38,
231 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
232 0x3F, 0x3F, 0x3F, 0x3F
233 };
234 //! \endcond
235
GetMaxAllowedSlices(uint8_t levelIdc)236 uint32_t CodechalVdencHevcState::GetMaxAllowedSlices(uint8_t levelIdc)
237 {
238 uint32_t maxAllowedNumSlices = 0;
239
240 switch (levelIdc)
241 {
242 case 10:
243 case 20:
244 maxAllowedNumSlices = 16;
245 break;
246 case 21:
247 maxAllowedNumSlices = 20;
248 break;
249 case 30:
250 maxAllowedNumSlices = 30;
251 break;
252 case 31:
253 maxAllowedNumSlices = 40;
254 break;
255 case 40:
256 case 41:
257 maxAllowedNumSlices = 75;
258 break;
259 case 50:
260 case 51:
261 case 52:
262 maxAllowedNumSlices = 200;
263 break;
264 case 60:
265 case 61:
266 case 62:
267 maxAllowedNumSlices = 600;
268 break;
269 default:
270 maxAllowedNumSlices = 0;
271 break;
272 }
273
274 return maxAllowedNumSlices;
275 }
276
SetPakPassType()277 void CodechalVdencHevcState::SetPakPassType()
278 {
279 CODECHAL_ENCODE_FUNCTION_ENTER;
280
281 // default: VDEnc+PAK pass
282 m_pakOnlyPass = false;
283
284 // BRC
285 if (m_brcEnabled)
286 {
287 // BRC with SSC, BRC without SSC
288 // BRC fast 2nd pass needed, but weighted prediction/SSC 2nd pass not needed
289 // HuC will update PAK pass type to be VDEnc+PAK if WP/SSC 2nd pass is needed
290 if (GetCurrentPass() == 1)
291 {
292 m_pakOnlyPass = true;
293 }
294 }
295
296 // CQP, ACQP, BRC
297 if (m_hevcSeqParams->SAO_enabled_flag)
298 {
299 // SAO 2nd pass is always PAK only pass
300 if (m_b2NdSaoPassNeeded && (GetCurrentPass() == m_uc2NdSaoPass))
301 {
302 m_pakOnlyPass = true;
303 }
304 }
305
306 return;
307 }
308
ComputeVDEncInitQP(int32_t & initQPIP,int32_t & initQPB)309 void CodechalVdencHevcState::ComputeVDEncInitQP(int32_t& initQPIP, int32_t& initQPB)
310 {
311 CODECHAL_ENCODE_FUNCTION_ENTER;
312
313 const float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
314 uint32_t frameSize = ((m_frameWidth * m_frameHeight * 3) >> 1);
315
316 initQPIP = (int)(1. / 1.2 * pow(10.0, (log10(frameSize * 2. / 3. * ((float)m_hevcSeqParams->FrameRate.Numerator / ((float)m_hevcSeqParams->FrameRate.Denominator * (float)m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS))) - x0) * (y1 - y0) / (x1 - x0) + y0) + 0.5);
317
318 initQPIP += 2;
319
320 int32_t gopP = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
321 int32_t gopB = m_hevcSeqParams->GopPicSize - 1 - gopP;
322 int32_t gopB1 = 0;
323 int32_t gopB2 = 0;
324 int32_t gopSize = 1 + gopP + gopB + gopB1 + gopB2;
325
326 if (gopSize == 1)
327 {
328 initQPIP += 12;
329 }
330 else if (gopSize < 15)
331 {
332 initQPIP += ((14 - gopSize) >> 1);
333 }
334
335 initQPIP = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPIP);
336 initQPIP--;
337
338 if (initQPIP < 0)
339 {
340 initQPIP = 1;
341 }
342
343 initQPB = ((initQPIP + initQPIP) * 563 >> 10) + 1;
344 initQPB = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPB);
345
346 if (gopSize > 300) //if intra frame is not inserted frequently
347 {
348 initQPIP -= 8;
349 initQPB -= 8;
350 }
351 else
352 {
353 initQPIP -= 2;
354 initQPB -= 2;
355 }
356
357 initQPIP = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPIP);
358 initQPB = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPB);
359 }
360
StoreHuCStatus2Register(PMOS_COMMAND_BUFFER cmdBuffer)361 MOS_STATUS CodechalVdencHevcState::StoreHuCStatus2Register(PMOS_COMMAND_BUFFER cmdBuffer)
362 {
363 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
364
365 CODECHAL_ENCODE_FUNCTION_ENTER;
366
367 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
368
369 // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
370 MHW_MI_STORE_DATA_PARAMS storeDataParams;
371 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
372 storeDataParams.pOsResource = &m_resHucStatus2Buffer;
373 storeDataParams.dwResourceOffset = 0;
374 storeDataParams.dwValue = m_hucInterface->GetHucStatus2ImemLoadedMask();
375 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
376
377 // Store HUC_STATUS2 register
378 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
379 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
380 storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
381 storeRegParams.dwOffset = sizeof(uint32_t);
382 storeRegParams.dwRegister = m_hucInterface->GetMmioRegisters(m_vdboxIndex)->hucStatus2RegOffset;
383 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
384
385 return eStatus;
386 }
387
HuCBrcInitReset()388 MOS_STATUS CodechalVdencHevcState::HuCBrcInitReset()
389 {
390 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
391
392 CODECHAL_ENCODE_FUNCTION_ENTER;
393
394 MOS_COMMAND_BUFFER cmdBuffer;
395 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
396
397 if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) )
398 {
399 // Send command buffer header at the beginning (OS dependent)
400 bool requestFrameTracking = m_singleTaskPhaseSupported ?
401 m_firstTaskInPhase : 0;
402 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
403 }
404
405 // load kernel from WOPCM into L2 storage RAM
406 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
407 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
408 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcInitKernelDescriptor;
409
410 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
411
412 // pipe mode select
413 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
414 pipeModeSelectParams.Mode = m_mode;
415 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
416
417 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
418
419 // set HuC DMEM param
420 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
421 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
422 dmemParams.presHucDataSource = &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx];
423 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
424 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
425 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
426
427 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
428 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
429 virtualAddrParams.regionParams[0].presRegion = &m_vdencBrcHistoryBuffer;
430 virtualAddrParams.regionParams[0].isWritable = true;
431 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
432
433 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcDummyStreamObject(&cmdBuffer));
434
435 // Store HUC_STATUS2 register bit 6 before HUC_Start command
436 // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
437 // (HUC_Start command with last start bit set).
438 CODECHAL_DEBUG_TOOL(
439 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
440 )
441
442 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
443
444 // wait Huc completion (use HEVC bit for now)
445 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
446 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
447 vdPipeFlushParams.Flags.bFlushHEVC = 1;
448 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
449 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
450
451 // Flush the engine to ensure memory written out
452 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
453 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
454 flushDwParams.bVideoPipelineCacheInvalidate = true;
455 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
456
457 if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd))
458 {
459 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
460 }
461
462 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
463
464 if (!m_singleTaskPhaseSupported)
465 {
466 bool renderingFlags = m_videoContextUsesNullHw;
467
468 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
469 &cmdBuffer,
470 CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
471 "ENC")));
472
473 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
474 }
475
476 CODECHAL_DEBUG_TOOL(DumpHucBrcInit());
477 return eStatus;
478 }
479
SetupBRCROIStreamIn(PMOS_RESOURCE streamIn,PMOS_RESOURCE deltaQpBuffer)480 MOS_STATUS CodechalVdencHevcState::SetupBRCROIStreamIn(PMOS_RESOURCE streamIn, PMOS_RESOURCE deltaQpBuffer)
481 {
482 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
483
484 CODECHAL_ENCODE_FUNCTION_ENTER;
485
486 CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
487 CODECHAL_ENCODE_CHK_NULL_RETURN(deltaQpBuffer);
488
489 MOS_LOCK_PARAMS lockFlags;
490 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
491 lockFlags.WriteOnly = true;
492
493 PDeltaQpForROI deltaQpData = (PDeltaQpForROI)m_osInterface->pfnLockResource(
494 m_osInterface,
495 deltaQpBuffer,
496 &lockFlags);
497 CODECHAL_ENCODE_CHK_NULL_RETURN(deltaQpData);
498
499 MOS_ZeroMemory(deltaQpData, m_deltaQpRoiBufferSize);
500
501 uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
502 uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
503 uint32_t deltaQpBufWidth = (MOS_ALIGN_CEIL(m_frameWidth, 32) / 32);
504 uint32_t deltaQpBufHeight = (MOS_ALIGN_CEIL(m_frameHeight, 32) / 32);
505 bool cu64Align = true;
506
507 for (auto i = m_hevcPicParams->NumROI - 1; i >= 0; i--)
508 {
509 //Check if the region is with in the borders
510 uint16_t top = (uint16_t)CodecHal_Clip3(0, (deltaQpBufHeight - 1), m_hevcPicParams->ROI[i].Top);
511 uint16_t bottom = (uint16_t)CodecHal_Clip3(0, deltaQpBufHeight, m_hevcPicParams->ROI[i].Bottom);
512 uint16_t left = (uint16_t)CodecHal_Clip3(0, (deltaQpBufWidth - 1), m_hevcPicParams->ROI[i].Left);
513 uint16_t right = (uint16_t)CodecHal_Clip3(0, deltaQpBufWidth, m_hevcPicParams->ROI[i].Right);
514
515 //Check if all the sides of ROI regions are aligned to 64CU
516 if ((top % 2 == 1) || (bottom % 2 == 1) || (left % 2 == 1) || (right % 2 == 1))
517 {
518 cu64Align = false;
519 }
520
521 SetBrcRoiDeltaQpMap(streamInWidth, top, bottom, left, right, (uint8_t)i, deltaQpData);
522 }
523
524 m_osInterface->pfnUnlockResource(
525 m_osInterface,
526 deltaQpBuffer);
527
528 uint8_t* data = (uint8_t*) m_osInterface->pfnLockResource(
529 m_osInterface,
530 streamIn,
531 &lockFlags);
532 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
533
534 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
535 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
536 streaminDataParams.maxTuSize = 3; //Maximum TU Size allowed, restriction to be set to 3
537 streaminDataParams.maxCuSize = (cu64Align) ? 3 : 2;
538 switch (m_hevcSeqParams->TargetUsage)
539 {
540 case 1:
541 case 4:
542 streaminDataParams.numMergeCandidateCu64x64 = 4;
543 streaminDataParams.numMergeCandidateCu32x32 = 3;
544 streaminDataParams.numMergeCandidateCu16x16 = 2;
545 streaminDataParams.numMergeCandidateCu8x8 = 1;
546 streaminDataParams.numImePredictors = m_imgStateImePredictors;
547 break;
548 case 7:
549 streaminDataParams.numMergeCandidateCu64x64 = 2;
550 streaminDataParams.numMergeCandidateCu32x32 = 2;
551 streaminDataParams.numMergeCandidateCu16x16 = 2;
552 streaminDataParams.numMergeCandidateCu8x8 = 0;
553 streaminDataParams.numImePredictors = 4;
554 break;
555 }
556
557 int32_t streamInNumCUs = streamInWidth * streamInHeight;
558 for (auto i = 0; i < streamInNumCUs; i++)
559 {
560 SetStreaminDataPerLcu(&streaminDataParams, data+(i*64));
561 }
562
563 m_osInterface->pfnUnlockResource(
564 m_osInterface,
565 streamIn);
566
567 return eStatus;
568 }
569
SetBrcRoiDeltaQpMap(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,uint8_t regionId,PDeltaQpForROI deltaQpMap)570 void CodechalVdencHevcState::SetBrcRoiDeltaQpMap(
571 uint32_t streamInWidth,
572 uint32_t top,
573 uint32_t bottom,
574 uint32_t left,
575 uint32_t right,
576 uint8_t regionId,
577 PDeltaQpForROI deltaQpMap)
578 {
579 CODECHAL_ENCODE_FUNCTION_ENTER;
580
581 for (auto y = top; y < bottom; y++)
582 {
583 for (auto x = left; x < right; x++)
584 {
585 uint32_t offset = 0, xyOffset = 0;
586 StreaminZigZagToLinearMap(streamInWidth, x, y, &offset, &xyOffset);
587
588 (deltaQpMap + (offset + xyOffset))->iDeltaQp = m_hevcPicParams->ROI[regionId].PriorityLevelOrDQp;
589 }
590 }
591 }
592
ProcessRoiDeltaQp()593 void CodechalVdencHevcState::ProcessRoiDeltaQp()
594 {
595 CODECHAL_ENCODE_FUNCTION_ENTER;
596
597 // Intialize ROIDistinctDeltaQp to be min expected delta qp, setting to -128
598 // Check if forceQp is needed or not
599 // forceQp is enabled if there are greater than 3 distinct delta qps or if the deltaqp is beyond range (-8, 7)
600
601 for (auto k = 0; k < m_maxNumROI; k++)
602 {
603 m_hevcPicParams->ROIDistinctDeltaQp[k] = -128;
604 }
605
606 int32_t numQp = 0;
607 for (int32_t i = 0; i < m_hevcPicParams->NumROI; i++)
608 {
609 bool dqpNew = true;
610
611 //Get distinct delta Qps among all ROI regions, index 0 having the lowest delta qp
612 int32_t k = numQp - 1;
613 for (; k >= 0; k--)
614 {
615 if (m_hevcPicParams->ROI[i].PriorityLevelOrDQp == m_hevcPicParams->ROIDistinctDeltaQp[k] || m_hevcPicParams->ROI[i].PriorityLevelOrDQp == 0)
616 {
617 dqpNew = false;
618 break;
619 }
620 else if (m_hevcPicParams->ROI[i].PriorityLevelOrDQp < m_hevcPicParams->ROIDistinctDeltaQp[k])
621 {
622 continue;
623 }
624 else
625 {
626 break;
627 }
628 }
629
630 if (dqpNew)
631 {
632 for (int32_t j = numQp - 1; (j >= k + 1 && j >= 0); j--)
633 {
634 m_hevcPicParams->ROIDistinctDeltaQp[j + 1] = m_hevcPicParams->ROIDistinctDeltaQp[j];
635 }
636 m_hevcPicParams->ROIDistinctDeltaQp[k + 1] = m_hevcPicParams->ROI[i].PriorityLevelOrDQp;
637 numQp++;
638 }
639 }
640
641 //Set the ROI DeltaQp to zero for remaining array elements
642 for (auto k = numQp; k < m_maxNumROI; k++)
643 {
644 m_hevcPicParams->ROIDistinctDeltaQp[k] = 0;
645 }
646
647 m_vdencNativeROIEnabled = !(numQp > m_maxNumNativeROI || m_hevcPicParams->ROIDistinctDeltaQp[0] < -8 || m_hevcPicParams->ROIDistinctDeltaQp[numQp - 1] > 7);
648 }
649
SetupROIStreamIn(PMOS_RESOURCE streamIn)650 MOS_STATUS CodechalVdencHevcState::SetupROIStreamIn(PMOS_RESOURCE streamIn)
651 {
652 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
653
654 CODECHAL_ENCODE_FUNCTION_ENTER;
655
656 CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
657
658 MOS_LOCK_PARAMS lockFlags;
659 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
660 lockFlags.WriteOnly = true;
661
662 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
663 m_osInterface,
664 streamIn,
665 &lockFlags);
666 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
667
668 uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
669 uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
670 int32_t streamInNumCUs = streamInWidth * streamInHeight;
671
672 MOS_ZeroMemory(data, streamInNumCUs * 64);
673
674 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
675
676 //ROI higher priority for smaller index.
677 bool cu64Align = true;
678 for (int32_t i = m_hevcPicParams->NumROI - 1; i >= 0; i--)
679 {
680
681 //Check if the region is with in the borders
682 uint16_t top = (uint16_t)CodecHal_Clip3(0, (streamInHeight - 1), m_hevcPicParams->ROI[i].Top);
683 uint16_t bottom = (uint16_t)CodecHal_Clip3(0, streamInHeight, m_hevcPicParams->ROI[i].Bottom);
684 uint16_t left = (uint16_t)CodecHal_Clip3(0, (streamInWidth - 1), m_hevcPicParams->ROI[i].Left);
685 uint16_t right = (uint16_t)CodecHal_Clip3(0, streamInWidth, m_hevcPicParams->ROI[i].Right);
686
687 //Check if all the sides of ROI regions are aligned to 64CU
688 if ((top % 2 == 1) || (bottom % 2 == 1) || (left % 2 == 1) || (right % 2 == 1))
689 {
690 cu64Align = false;
691 }
692
693 // For native ROI, determine Region ID based on distinct delta Qps and set ROI control
694 uint32_t roiCtrl = 0;
695 for (auto j = 0; j < m_maxNumNativeROI; j++)
696 {
697 if (m_hevcPicParams->ROIDistinctDeltaQp[j] == m_hevcPicParams->ROI[i].PriorityLevelOrDQp)
698 {
699 //All four 16x16 blocks within the 32x32 blocks should share the same region ID j
700 roiCtrl = j + 1;
701 for (auto k = 0; k < 3; k++)
702 {
703 roiCtrl = roiCtrl << 2;
704 roiCtrl = roiCtrl + j + 1;
705 }
706 break;
707 }
708 }
709 // Calculate ForceQp
710 int8_t forceQp = (int8_t)CodecHal_Clip3(10, 51, m_hevcPicParams->QpY + m_hevcPicParams->ROI[i].PriorityLevelOrDQp + m_hevcSliceParams->slice_qp_delta);
711
712 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
713 streaminDataParams.setQpRoiCtrl = true;
714 if (m_vdencNativeROIEnabled)
715 {
716 streaminDataParams.roiCtrl = (uint8_t)roiCtrl;
717 }
718 else
719 {
720 streaminDataParams.forceQp = forceQp;
721 }
722
723 SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, &streaminDataParams, data);
724 }
725
726 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
727 streaminDataParams.maxTuSize = 3; //Maximum TU Size allowed, restriction to be set to 3
728 streaminDataParams.maxCuSize = (cu64Align) ? 3 : 2;
729 switch (m_hevcSeqParams->TargetUsage)
730 {
731 case 1:
732 case 4:
733 streaminDataParams.numMergeCandidateCu64x64 = 4;
734 streaminDataParams.numMergeCandidateCu32x32 = 3;
735 streaminDataParams.numMergeCandidateCu16x16 = 2;
736 streaminDataParams.numMergeCandidateCu8x8 = 1;
737 streaminDataParams.numImePredictors = m_imgStateImePredictors;
738 break;
739 case 7:
740 streaminDataParams.numMergeCandidateCu64x64 = 2;
741 streaminDataParams.numMergeCandidateCu32x32 = 2;
742 streaminDataParams.numMergeCandidateCu16x16 = 2;
743 streaminDataParams.numMergeCandidateCu8x8 = 0;
744 streaminDataParams.numImePredictors = 4;
745 break;
746 }
747
748 for (auto i = 0; i < streamInNumCUs; i++)
749 {
750 SetStreaminDataPerLcu(&streaminDataParams, data + (i * 64));
751 }
752
753 m_osInterface->pfnUnlockResource(
754 m_osInterface,
755 streamIn);
756
757 return eStatus;
758 }
SetupMbQpStreamIn(PMOS_RESOURCE streamIn)759 MOS_STATUS CodechalVdencHevcState::SetupMbQpStreamIn(PMOS_RESOURCE streamIn)
760 {
761 CODECHAL_ENCODE_FUNCTION_ENTER;
762
763 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
764 CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
765
766 MOS_LOCK_PARAMS LockFlags;
767 MOS_ZeroMemory(&LockFlags, sizeof(MOS_LOCK_PARAMS));
768 LockFlags.WriteOnly = true;
769
770 auto data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface,
771 streamIn,
772 &LockFlags);
773 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
774
775 uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
776 uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
777 int32_t streamInNumCUs = streamInWidth * streamInHeight;
778
779 MOS_LOCK_PARAMS LockFlagsReadOnly;
780 MOS_ZeroMemory(&LockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
781 LockFlagsReadOnly.ReadOnly = true;
782
783 auto pInputData = (uint8_t*)m_osInterface->pfnLockResource(
784 m_osInterface, &(m_encodeParams.psMbQpDataSurface->OsResource),
785 &LockFlagsReadOnly);
786
787 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
788
789 for (uint32_t h = 0; h < streamInHeight; h++)
790 {
791 for (uint32_t w = 0; w < streamInWidth; w++)
792 {
793 //Calculate X Y Offset for the zig zag scan with in each 64x64 LCU
794 //dwOffset gives the 64 LCU row
795 // uint32_t Offset = StreamInWidth * (h/2) * 2;
796 // uint32_t YOffset = (h % 2) * 2;
797 // uint32_t XOffset = 2 * (w/2 * 2) + w % 2;
798
799 int32_t ForceQp = pInputData[(h/2) * m_encodeParams.psMbQpDataSurface->dwPitch + (w/2)];
800 // (pData + (Offset + XOffset + YOffset))->DW7.QpEnable = 0xf;
801 // (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_0 = ForceQp;
802 // (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_1 = ForceQp;
803 // (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_2 = ForceQp;
804 // (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_3 = ForceQp;
805 streaminDataParams.setQpRoiCtrl = true;
806 streaminDataParams.forceQp = (int8_t)ForceQp;
807 SetStreaminDataPerRegion(streamInWidth, h, h+1, w, w+1, &streaminDataParams, data);
808
809 }
810 }
811
812 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
813 streaminDataParams.maxTuSize = 3; //Maximum TU Size allowed, restriction to be set to 3
814 streaminDataParams.maxCuSize = 3;
815 switch (m_hevcSeqParams->TargetUsage)
816 {
817 case 1:
818 case 4:
819 streaminDataParams.numMergeCandidateCu64x64 = 4;
820 streaminDataParams.numMergeCandidateCu32x32 = 3;
821 streaminDataParams.numMergeCandidateCu16x16 = 2;
822 streaminDataParams.numMergeCandidateCu8x8 = 1;
823 streaminDataParams.numImePredictors = m_imgStateImePredictors;
824 break;
825 case 7:
826 streaminDataParams.numMergeCandidateCu64x64 = 2;
827 streaminDataParams.numMergeCandidateCu32x32 = 2;
828 streaminDataParams.numMergeCandidateCu16x16 = 2;
829 streaminDataParams.numMergeCandidateCu8x8 = 0;
830 streaminDataParams.numImePredictors = 4;
831 break;
832 }
833
834 for (auto i = 0; i < streamInNumCUs; i++)
835 {
836 SetStreaminDataPerLcu(&streaminDataParams, data + (i * 64));
837 }
838
839 m_osInterface->pfnUnlockResource(
840 m_osInterface,
841 &(m_encodeParams.psMbQpDataSurface->OsResource));
842 m_osInterface->pfnUnlockResource(
843 m_osInterface,
844 streamIn);
845 return eStatus;
846 }
847
StreaminSetDirtyRectRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,uint8_t maxcu,void * streaminData)848 void CodechalVdencHevcState::StreaminSetDirtyRectRegion(
849 uint32_t streamInWidth,
850 uint32_t top,
851 uint32_t bottom,
852 uint32_t left,
853 uint32_t right,
854 uint8_t maxcu,
855 void* streaminData)
856 {
857 CODECHAL_ENCODE_FUNCTION_ENTER;
858
859 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
860 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
861 streaminDataParams.maxTuSize = 3;
862 streaminDataParams.maxCuSize = maxcu;
863 streaminDataParams.puTypeCtrl = 0;
864
865 switch (m_hevcSeqParams->TargetUsage)
866 {
867 case 1:
868 case 4:
869 streaminDataParams.numMergeCandidateCu64x64 = 4;
870 streaminDataParams.numMergeCandidateCu32x32 = 3;
871 streaminDataParams.numMergeCandidateCu16x16 = 2;
872 streaminDataParams.numMergeCandidateCu8x8 = 1;
873 streaminDataParams.numImePredictors = m_imgStateImePredictors;
874 break;
875 case 7:
876 streaminDataParams.numMergeCandidateCu64x64 = 2;
877 streaminDataParams.numMergeCandidateCu32x32 = 2;
878 streaminDataParams.numMergeCandidateCu16x16 = 2;
879 streaminDataParams.numMergeCandidateCu8x8 = 0;
880 streaminDataParams.numImePredictors = 4;
881 break;
882 }
883
884 SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, &streaminDataParams, streaminData);
885 }
886
SetStreaminDataPerRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,void * streaminData)887 void CodechalVdencHevcState::SetStreaminDataPerRegion(
888 uint32_t streamInWidth,
889 uint32_t top,
890 uint32_t bottom,
891 uint32_t left,
892 uint32_t right,
893 PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,
894 void* streaminData)
895 {
896 uint8_t* data = (uint8_t*)streaminData;
897
898 for (auto y = top; y < bottom; y++)
899 {
900 for (auto x = left; x < right; x++)
901 {
902 //Calculate X Y for the zig zag scan
903 uint32_t offset = 0, xyOffset = 0;
904 StreaminZigZagToLinearMap(streamInWidth, x, y, &offset, &xyOffset);
905
906 SetStreaminDataPerLcu(streaminParams, data + (offset + xyOffset) * 64);
907 }
908 }
909 }
910
StreaminZigZagToLinearMap(uint32_t streamInWidth,uint32_t x,uint32_t y,uint32_t * offset,uint32_t * xyOffset)911 void CodechalVdencHevcState::StreaminZigZagToLinearMap(
912 uint32_t streamInWidth,
913 uint32_t x,
914 uint32_t y,
915 uint32_t* offset,
916 uint32_t* xyOffset)
917 {
918 CODECHAL_ENCODE_FUNCTION_ENTER;
919
920 *offset = streamInWidth * y;
921 uint32_t yOffset = 0;
922 uint32_t xOffset = 2 * x;
923
924 //Calculate X Y Offset for the zig zag scan with in each 64x64 LCU
925 //dwOffset gives the 64 LCU row
926 if (y % 2)
927 {
928 *offset = streamInWidth * (y - 1);
929 yOffset = 2;
930 }
931
932 if (x % 2)
933 {
934 xOffset = (2 * x) - 1;
935 }
936
937 *xyOffset = xOffset + yOffset;
938 }
939
StreaminSetBorderNon64AlignStaticRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,void * streaminData)940 void CodechalVdencHevcState::StreaminSetBorderNon64AlignStaticRegion(
941 uint32_t streamInWidth,
942 uint32_t top,
943 uint32_t bottom,
944 uint32_t left,
945 uint32_t right,
946 void* streaminData)
947 {
948 CODECHAL_ENCODE_FUNCTION_ENTER;
949
950 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
951 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
952 streaminDataParams.maxTuSize = 3;
953 streaminDataParams.maxCuSize = 2;
954 streaminDataParams.numMergeCandidateCu64x64 = 0; // MergeCand setting for Force MV
955 streaminDataParams.numMergeCandidateCu32x32 = 1; // this is always set to 1
956 streaminDataParams.numMergeCandidateCu16x16 = 0;
957 streaminDataParams.numMergeCandidateCu8x8 = 0;
958 streaminDataParams.numImePredictors = 0;
959 streaminDataParams.puTypeCtrl = 0xff; //Force MV
960
961 SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, &streaminDataParams, streaminData);
962 }
963
SetupDirtyRectStreamIn(PMOS_RESOURCE streamIn)964 MOS_STATUS CodechalVdencHevcState::SetupDirtyRectStreamIn(PMOS_RESOURCE streamIn)
965 {
966 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
967
968 CODECHAL_ENCODE_FUNCTION_ENTER;
969
970 CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
971
972 MOS_LOCK_PARAMS lockFlags;
973 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
974 lockFlags.WriteOnly = true;
975
976 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
977 m_osInterface,
978 streamIn,
979 &lockFlags);
980 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
981
982 uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
983 uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
984 int32_t streamInNumCUs = streamInWidth * streamInHeight;
985
986 MOS_ZeroMemory(data, streamInNumCUs * 64);
987
988 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
989 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
990 streaminDataParams.maxTuSize = 3;
991 streaminDataParams.maxCuSize = 3;
992 streaminDataParams.numImePredictors = 0;
993 streaminDataParams.puTypeCtrl = 0xff; //Force MV
994 streaminDataParams.numMergeCandidateCu64x64 = 1; // MergeCand setting for Force MV
995 streaminDataParams.numMergeCandidateCu32x32 = 0; // this is always set to 1
996 streaminDataParams.numMergeCandidateCu16x16 = 0;
997 streaminDataParams.numMergeCandidateCu8x8 = 0;
998
999 for (auto i = 0; i < streamInNumCUs; i++)
1000 {
1001 SetStreaminDataPerLcu(&streaminDataParams, data + (i * 64));
1002 }
1003
1004 uint32_t streamInWidthNo64Align = (MOS_ALIGN_CEIL(m_frameWidth, 32) / 32);
1005 uint32_t streamInHeightNo64Align = (MOS_ALIGN_CEIL(m_frameHeight, 32) / 32);
1006
1007 bool bActualWidth32Align = (m_frameWidth % 32) == 0;
1008 bool bActualHeight32Align = (m_frameHeight % 32) == 0;
1009
1010 // Set the static region when the width is not 64 CU aligned.
1011 if (streamInWidthNo64Align != streamInWidth || !bActualWidth32Align)
1012 {
1013 auto border_top = 0;
1014 auto border_bottom = streamInHeight;
1015 auto border_left = streamInWidthNo64Align - 1;
1016 auto border_right = streamInWidth;
1017
1018 if (!bActualWidth32Align)
1019 {
1020 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 3, data);
1021 if (streamInWidthNo64Align == streamInWidth)
1022 {
1023 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left-1, border_right-1, data);
1024 }
1025 }
1026 else
1027 {
1028 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1029 }
1030 }
1031
1032 // Set the static region when the height is not 64 CU aligned.
1033 if (streamInHeightNo64Align != streamInHeight || !bActualHeight32Align)
1034 {
1035 auto border_top = streamInHeightNo64Align - 1;
1036 auto border_bottom = streamInHeight;
1037 auto border_left = 0;
1038 auto border_right = streamInWidth;
1039
1040 if (!bActualHeight32Align)
1041 {
1042 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 3, data);
1043 if (streamInHeightNo64Align == streamInHeight)
1044 {
1045 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top - 1, border_bottom - 1, border_left, border_right, data);
1046 }
1047 }
1048 else
1049 {
1050 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1051 }
1052 }
1053
1054 for (int i = m_hevcPicParams->NumDirtyRects - 1; i >= 0; i--)
1055 {
1056 //Check if the region is with in the borders
1057 uint16_t top = (uint16_t)CodecHal_Clip3(0, (streamInHeight - 1), m_hevcPicParams->pDirtyRect[i].Top);
1058 uint16_t bottom = (uint16_t)CodecHal_Clip3(0, (streamInHeight - 1), m_hevcPicParams->pDirtyRect[i].Bottom) + 1;
1059 uint16_t left = (uint16_t)CodecHal_Clip3(0, (streamInWidth - 1), m_hevcPicParams->pDirtyRect[i].Left);
1060 uint16_t right = (uint16_t)CodecHal_Clip3(0, (streamInWidth - 1), m_hevcPicParams->pDirtyRect[i].Right) + 1;
1061
1062 auto dirtyrect_top = top;
1063 auto dirtyrect_bottom = bottom;
1064 auto dirtyrect_left = left;
1065 auto dirtyrect_right = right;
1066
1067 //If the border of the DirtyRect is not aligned with 64 CU, different setting in the border
1068 if (top % 2 != 0)
1069 {
1070 auto border_top = top;
1071 auto border_bottom = top + 1;
1072 auto border_left = left;
1073 auto border_right = right;
1074
1075 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1076
1077 border_top = top - 1;
1078 border_bottom = top;
1079 border_left = (left % 2 != 0) ? left - 1 : left;
1080 border_right = (right % 2 != 0) ? right + 1 : right;
1081
1082 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1083
1084 dirtyrect_top = top + 1;
1085 }
1086
1087 if (bottom % 2 != 0)
1088 {
1089 auto border_top = bottom - 1;
1090 auto border_bottom = bottom;
1091 auto border_left = left;
1092 auto border_right = right;
1093
1094 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1095
1096 border_top = bottom;
1097 border_bottom = bottom + 1;
1098 border_left = (left % 2 != 0) ? left - 1 : left;
1099 border_right = (right % 2 != 0) ? right + 1 : right;
1100
1101 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1102
1103 dirtyrect_bottom = bottom - 1;
1104 }
1105
1106 if (left % 2 != 0)
1107 {
1108 auto border_top = top;
1109 auto border_bottom = bottom;
1110 auto border_left = left;
1111 auto border_right = left + 1;
1112
1113 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1114
1115 border_top = (top % 2 != 0) ? top - 1 : top;
1116 border_bottom = (bottom % 2 != 0) ? bottom + 1 : bottom;
1117 border_left = left - 1;
1118 border_right = left;
1119
1120 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1121
1122 dirtyrect_left = left + 1;
1123 }
1124
1125 if (right % 2 != 0)
1126 {
1127 auto border_top = top;
1128 auto border_bottom = bottom;
1129 auto border_left = right - 1;
1130 auto border_right = right;
1131
1132 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1133
1134 border_top = (top % 2 != 0) ? top - 1 : top;
1135 border_bottom = (bottom % 2 != 0) ? bottom + 1 : bottom;
1136 border_left = right;
1137 border_right = right + 1;
1138
1139 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1140 dirtyrect_right = right - 1;
1141 }
1142
1143 StreaminSetDirtyRectRegion(streamInWidth, dirtyrect_top, dirtyrect_bottom, dirtyrect_left, dirtyrect_right, 3, data);
1144 }
1145
1146 m_osInterface->pfnUnlockResource(
1147 m_osInterface,
1148 streamIn);
1149
1150 return eStatus;
1151 }
1152
SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)1153 MOS_STATUS CodechalVdencHevcState::SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
1154 {
1155 CODECHAL_ENCODE_FUNCTION_ENTER;
1156
1157 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1158
1159 int32_t currentPass = GetCurrentPass();
1160 if (currentPass < 0)
1161 {
1162 eStatus = MOS_STATUS_INVALID_PARAMETER;
1163 return eStatus;
1164 }
1165 // Add Virtual addr
1166 MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
1167 virtualAddrParams->regionParams[0].presRegion = &m_vdencBrcHistoryBuffer; // Region 0 - History Buffer (Input/Output)
1168 virtualAddrParams->regionParams[0].isWritable = true;
1169 virtualAddrParams->regionParams[1].presRegion =
1170 (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencStats); // Region 1 VDEnc Statistics Buffer (Input) - VDENC_HEVC_VP9_FRAME_BASED_STATISTICS_STREAMOUT
1171 virtualAddrParams->regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer; // Region 2 PAK Statistics Buffer (Input) - MFX_PAK_FRAME_STATISTICS
1172 virtualAddrParams->regionParams[3].presRegion = &m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]; // Region 3 - Input SLB Buffer (Input)
1173 virtualAddrParams->regionParams[4].presRegion = &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx]; // Region 4 - Constant Data (Input)
1174 virtualAddrParams->regionParams[5].presRegion = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource; // Region 5 - Output SLB Buffer (Output)
1175 virtualAddrParams->regionParams[5].isWritable = true;
1176 virtualAddrParams->regionParams[6].presRegion = &m_dataFromPicsBuffer; // Region 6 - Data Buffer of Current and Reference Pictures for Weighted Prediction (Input/Output)
1177 virtualAddrParams->regionParams[6].isWritable = true;
1178 virtualAddrParams->regionParams[7].presRegion = &m_resLcuBaseAddressBuffer; // Region 7 Slice Stat Streamout (Input)
1179 virtualAddrParams->regionParams[8].presRegion =
1180 (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo); // Region 8 - PAK Information (Input)
1181 virtualAddrParams->regionParams[9].presRegion = &m_resVdencStreamInBuffer[m_currRecycledBufIdx]; // Region 9 � Streamin Buffer for ROI (Input)
1182 virtualAddrParams->regionParams[10].presRegion = &m_vdencDeltaQpBuffer[m_currRecycledBufIdx]; // Region 10 � Delta QP Buffer for ROI (Input)
1183 virtualAddrParams->regionParams[11].presRegion = &m_vdencOutputROIStreaminBuffer; // Region 11 � Streamin Buffer for ROI (Output)
1184 virtualAddrParams->regionParams[11].isWritable = true;
1185
1186 // region 15 always in clear
1187 virtualAddrParams->regionParams[15].presRegion = &m_vdencBrcDbgBuffer; // Region 15 - Debug Buffer (Output)
1188 virtualAddrParams->regionParams[15].isWritable = true;
1189
1190 return eStatus;
1191 }
1192
HuCBrcUpdate()1193 MOS_STATUS CodechalVdencHevcState::HuCBrcUpdate()
1194 {
1195 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1196
1197 CODECHAL_ENCODE_FUNCTION_ENTER;
1198
1199 MOS_COMMAND_BUFFER cmdBuffer;
1200 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
1201
1202 if (!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_brcInit))
1203 {
1204 // Send command buffer header at the beginning (OS dependent)
1205 bool requestFrameTracking = m_singleTaskPhaseSupported ?
1206 m_firstTaskInPhase : 0;
1207 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
1208 }
1209
1210 int32_t currentPass = GetCurrentPass();
1211 if (currentPass < 0)
1212 {
1213 eStatus = MOS_STATUS_INVALID_PARAMETER;
1214 return eStatus;
1215 }
1216 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructBatchBufferHuCBRC(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
1217
1218 // load kernel from WOPCM into L2 storage RAM
1219 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
1220 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
1221
1222 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) // Low Delay BRC
1223 {
1224 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcLowdelayKernelDescriptor;
1225 }
1226 else
1227 {
1228 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcUpdateKernelDescriptor;
1229 }
1230
1231 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
1232
1233 // pipe mode select
1234 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
1235 pipeModeSelectParams.Mode = m_mode;
1236 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
1237
1238 // DMEM set
1239 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
1240
1241 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
1242 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
1243 dmemParams.presHucDataSource = &(m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
1244 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
1245 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
1246
1247 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
1248
1249 // Set Const Data buffer
1250 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate());
1251
1252 // Add Virtual addr
1253 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCBrcUpdate(&m_virtualAddrParams));
1254 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &m_virtualAddrParams));
1255
1256 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcDummyStreamObject(&cmdBuffer));
1257
1258 // Store HUC_STATUS2 register bit 6 before HUC_Start command
1259 // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
1260 // (HUC_Start command with last start bit set).
1261 CODECHAL_DEBUG_TOOL(
1262 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
1263 )
1264
1265 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
1266
1267 // wait Huc completion (use HEVC bit for now)
1268 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
1269 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
1270 vdPipeFlushParams.Flags.bFlushHEVC = 1;
1271 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
1272 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
1273
1274 // Flush the engine to ensure memory written out
1275 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
1276 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
1277 flushDwParams.bVideoPipelineCacheInvalidate = true;
1278 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
1279
1280 // Write HUC_STATUS mask: DW1 (mask value)
1281 MHW_MI_STORE_DATA_PARAMS storeDataParams;
1282 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
1283 storeDataParams.pOsResource = &m_resPakMmioBuffer;
1284 storeDataParams.dwResourceOffset = sizeof(uint32_t);
1285 storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
1286 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
1287
1288 // store HUC_STATUS register: DW0 (actual value)
1289 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
1290 auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
1291 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
1292 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
1293 storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
1294 storeRegParams.dwOffset = 0;
1295 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
1296 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
1297
1298 // DW0 & DW1 will considered together for conditional batch buffer end cmd later
1299 if ((!m_singleTaskPhaseSupported) && (m_osInterface->bNoParsingAssistanceInKmd))
1300 {
1301 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1302 }
1303
1304 // HuC Input
1305 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(true));
1306
1307 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
1308
1309 if (!m_singleTaskPhaseSupported)
1310 {
1311 bool renderingFlags = m_videoContextUsesNullHw;
1312
1313 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
1314 &cmdBuffer,
1315 CODECHAL_MEDIA_STATE_BRC_UPDATE,
1316 "ENC")));
1317
1318 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
1319 }
1320
1321 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
1322
1323 return eStatus;
1324 }
1325
HuCBrcDummyStreamObject(PMOS_COMMAND_BUFFER cmdBuffer)1326 MOS_STATUS CodechalVdencHevcState::HuCBrcDummyStreamObject(PMOS_COMMAND_BUFFER cmdBuffer)
1327 {
1328 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1329
1330 CODECHAL_ENCODE_FUNCTION_ENTER;
1331
1332 // pass dummy buffer by Ind Obj Addr command
1333 MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjParams;
1334 MOS_ZeroMemory(&indObjParams, sizeof(indObjParams));
1335 indObjParams.presDataBuffer = &m_vdencBrcDbgBuffer;
1336 indObjParams.dwDataSize = 1;
1337 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucIndObjBaseAddrStateCmd(cmdBuffer, &indObjParams));
1338
1339 MHW_VDBOX_HUC_STREAM_OBJ_PARAMS streamObjParams;
1340 MOS_ZeroMemory(&streamObjParams, sizeof(streamObjParams));
1341 streamObjParams.dwIndStreamInLength = 1;
1342 streamObjParams.bHucProcessing = true;
1343 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStreamObjectCmd(cmdBuffer, &streamObjParams));
1344
1345 return eStatus;
1346 }
1347
SetVdencPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & pipeModeSelectParams)1348 void CodechalVdencHevcState::SetVdencPipeModeSelectParams(
1349 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& pipeModeSelectParams)
1350 {
1351 pipeModeSelectParams.ucVdencBitDepthMinus8 = m_hevcSeqParams->bit_depth_luma_minus8;
1352 pipeModeSelectParams.bPakThresholdCheckEnable = m_hevcSeqParams->SliceSizeControl;
1353 pipeModeSelectParams.ChromaType = m_hevcSeqParams->chroma_format_idc;
1354 pipeModeSelectParams.bTlbPrefetchEnable = true;
1355 pipeModeSelectParams.Format = m_rawSurfaceToPak->Format;
1356
1357 // can be enabled by reg key (disabled by default)
1358 pipeModeSelectParams.bVdencPakObjCmdStreamOutEnable = m_vdencPakObjCmdStreamOutEnabled;
1359
1360 int32_t currentPass = GetCurrentPass();
1361
1362 // needs to be enabled for 1st pass in multi-pass case
1363 // This bit is ignored if PAK only second pass is enabled.
1364 if ((currentPass == 0) && (currentPass != m_numPasses))
1365 {
1366 pipeModeSelectParams.bVdencPakObjCmdStreamOutEnable = true;
1367 }
1368 }
1369
SetVdencSurfaceStateParams(MHW_VDBOX_SURFACE_PARAMS & srcSurfaceParams,MHW_VDBOX_SURFACE_PARAMS & reconSurfaceParams,MHW_VDBOX_SURFACE_PARAMS & ds8xSurfaceParams,MHW_VDBOX_SURFACE_PARAMS & ds4xSurfaceParams)1370 void CodechalVdencHevcState::SetVdencSurfaceStateParams(
1371 MHW_VDBOX_SURFACE_PARAMS& srcSurfaceParams,
1372 MHW_VDBOX_SURFACE_PARAMS& reconSurfaceParams,
1373 MHW_VDBOX_SURFACE_PARAMS& ds8xSurfaceParams,
1374 MHW_VDBOX_SURFACE_PARAMS& ds4xSurfaceParams)
1375 {
1376 // VDENC_SRC_SURFACE_STATE parameters
1377 srcSurfaceParams.dwActualWidth = ((m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3));
1378 srcSurfaceParams.dwActualHeight = ((m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3));
1379 srcSurfaceParams.bColorSpaceSelection = (m_hevcSeqParams->InputColorSpace == ECOLORSPACE_P709) ? 1 : 0;
1380
1381 // VDENC_REF_SURFACE_STATE parameters
1382 reconSurfaceParams.dwActualWidth = srcSurfaceParams.dwActualWidth;
1383 reconSurfaceParams.dwActualHeight = srcSurfaceParams.dwActualHeight;
1384 reconSurfaceParams.dwReconSurfHeight = m_rawSurfaceToPak->dwHeight;
1385
1386 // VDENC_DS_REF_SURFACE_STATE parameters
1387 MOS_ZeroMemory(&ds8xSurfaceParams, sizeof(ds8xSurfaceParams));
1388 ds8xSurfaceParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1389 ds8xSurfaceParams.psSurface = m_trackedBuf->Get8xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1390 ds8xSurfaceParams.ucSurfaceStateId = CODECHAL_MFX_DSRECON_SURFACE_ID;
1391 ds8xSurfaceParams.dwActualWidth = ds8xSurfaceParams.psSurface->dwWidth;
1392 ds8xSurfaceParams.dwActualHeight = ds8xSurfaceParams.psSurface->dwHeight;
1393
1394 MOS_ZeroMemory(&ds4xSurfaceParams, sizeof(ds4xSurfaceParams));
1395 ds4xSurfaceParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1396 ds4xSurfaceParams.psSurface = m_trackedBuf->Get4xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1397 ds4xSurfaceParams.ucSurfaceStateId = CODECHAL_MFX_DSRECON_SURFACE_ID;
1398 ds4xSurfaceParams.dwActualWidth = ds4xSurfaceParams.psSurface->dwWidth;
1399 ds4xSurfaceParams.dwActualHeight = ds4xSurfaceParams.psSurface->dwHeight;
1400 }
1401
SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)1402 void CodechalVdencHevcState::SetVdencPipeBufAddrParams(
1403 MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
1404 {
1405 pipeBufAddrParams = {};
1406 pipeBufAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1407 pipeBufAddrParams.psRawSurface = m_rawSurfaceToPak;
1408 pipeBufAddrParams.ps4xDsSurface = m_trackedBuf->Get4xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1409 pipeBufAddrParams.ps8xDsSurface = m_trackedBuf->Get8xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1410 pipeBufAddrParams.presVdencStreamOutBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencStats);
1411 pipeBufAddrParams.dwVdencStatsStreamOutOffset = 0;
1412 pipeBufAddrParams.presVdencIntraRowStoreScratchBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencIntraRowStoreScratch);
1413 pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer = m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
1414 pipeBufAddrParams.dwNumRefIdxL0ActiveMinus1 = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
1415 pipeBufAddrParams.dwNumRefIdxL1ActiveMinus1 = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
1416
1417 if (m_vdencStreamInEnabled)
1418 {
1419 if (m_vdencHucUsed && m_hevcPicParams->NumROI && !m_vdencNativeROIEnabled)
1420 {
1421 pipeBufAddrParams.presVdencStreamInBuffer = &m_vdencOutputROIStreaminBuffer;
1422 }
1423 else
1424 {
1425 pipeBufAddrParams.presVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
1426 }
1427 }
1428
1429 PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
1430 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
1431 {
1432 CODEC_PICTURE refPic = l0RefFrameList[refIdx];
1433
1434 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
1435 {
1436 // L0 references
1437 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
1438 pipeBufAddrParams.presVdencReferences[refIdx] = &m_refList[refPicIdx]->sRefReconBuffer.OsResource;
1439
1440 // 4x/8x DS surface for VDEnc
1441 uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx;
1442 pipeBufAddrParams.presVdenc4xDsSurface[refIdx] = &(m_trackedBuf->Get4xDsReconSurface(scaledIdx))->OsResource;
1443 pipeBufAddrParams.presVdenc8xDsSurface[refIdx] = &(m_trackedBuf->Get8xDsReconSurface(scaledIdx))->OsResource;
1444 }
1445 }
1446
1447 if (!m_lowDelay)
1448 {
1449 PCODEC_PICTURE l1RefFrameList = m_hevcSliceParams->RefPicList[LIST_1];
1450 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++)
1451 {
1452 CODEC_PICTURE refPic = l1RefFrameList[refIdx];
1453
1454 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
1455 {
1456 // L1 references
1457 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
1458 pipeBufAddrParams.presVdencReferences[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] =
1459 &m_refList[refPicIdx]->sRefReconBuffer.OsResource;
1460
1461 // 4x/8x DS surface for VDEnc
1462 uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx;
1463 pipeBufAddrParams.presVdenc4xDsSurface[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] =
1464 &(m_trackedBuf->Get4xDsReconSurface(scaledIdx))->OsResource;
1465 pipeBufAddrParams.presVdenc8xDsSurface[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] =
1466 &(m_trackedBuf->Get8xDsReconSurface(scaledIdx))->OsResource;
1467 }
1468 }
1469 }
1470
1471 uint8_t idxForTempMVP = 0xFF;
1472
1473 if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
1474 {
1475 uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
1476 idxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
1477 }
1478
1479 if (idxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
1480 {
1481 // Temporal reference MV index is invalid and so disable the temporal MVP
1482 m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
1483 }
1484 else
1485 {
1486 pipeBufAddrParams.presColMvTempBuffer[0] = m_trackedBuf->GetMvTemporalBuffer(idxForTempMVP);
1487 }
1488
1489 // Disable temporal MVP for LDB frames which only refer to I frame
1490 if (m_pictureCodingType == I_TYPE)
1491 {
1492 m_currGopIFramePOC = m_hevcPicParams->CurrPicOrderCnt;
1493 }
1494
1495 if (m_hevcSeqParams->sps_temporal_mvp_enable_flag == 0 && m_hevcSliceParams->slice_temporal_mvp_enable_flag == 1)
1496 {
1497 CODECHAL_ENCODE_NORMALMESSAGE("Attention: temporal MVP flag is inconsistent between seq and slice.");
1498 m_hevcSliceParams->slice_temporal_mvp_enable_flag = 0;
1499 }
1500
1501 if (m_lowDelay && m_hevcSliceParams->num_ref_idx_l0_active_minus1 == 0
1502 && m_currGopIFramePOC != -1 && m_hevcSliceParams->slice_temporal_mvp_enable_flag != 0)
1503 {
1504 auto idx = m_picIdx[m_hevcSliceParams->RefPicList[0][0].FrameIdx].ucPicIdx;
1505
1506 if (m_refList[idx]->iFieldOrderCnt[0] == m_currGopIFramePOC)
1507 {
1508 m_hevcSliceParams->slice_temporal_mvp_enable_flag = 0;
1509 }
1510 }
1511
1512 }
1513
SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceStateParams)1514 void CodechalVdencHevcState::SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE& sliceStateParams)
1515 {
1516 CODECHAL_ENCODE_FUNCTION_ENTER;
1517
1518 CodechalEncodeHevcBase::SetHcpSliceStateCommonParams(sliceStateParams);
1519
1520 sliceStateParams.bVdencInUse = true;
1521 sliceStateParams.bVdencHucInUse = m_hevcVdencAcqpEnabled || m_brcEnabled;
1522 sliceStateParams.bWeightedPredInUse = m_hevcVdencWeightedPredEnabled;
1523 sliceStateParams.pVdencBatchBuffer = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx];
1524
1525 // This bit disables Top intra Reference pixel fetch in VDENC mode.
1526 // In PAK only second pass, this bit should be set to one.
1527 // "IntraRefFetchDisable" in HCP SLICE STATE should be set to 0 in first pass and 1 in subsequent passes.
1528 // For dynamic slice, 2nd pass is still VDEnc + PAK pass, not PAK only pass.
1529 sliceStateParams.bIntraRefFetchDisable = m_pakOnlyPass;
1530 }
1531
AddHcpPakInsertSliceHeader(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_BATCH_BUFFER batchBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)1532 MOS_STATUS CodechalVdencHevcState::AddHcpPakInsertSliceHeader(
1533 PMOS_COMMAND_BUFFER cmdBuffer,
1534 PMHW_BATCH_BUFFER batchBuffer,
1535 PMHW_VDBOX_HEVC_SLICE_STATE params)
1536 {
1537 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1538
1539 CODECHAL_ENCODE_FUNCTION_ENTER;
1540
1541 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1542 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pBsBuffer);
1543 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams);
1544
1545 if (cmdBuffer == nullptr && batchBuffer == nullptr)
1546 {
1547 CODECHAL_ENCODE_ASSERTMESSAGE("There was no valid buffer to add the HW command to.");
1548 return MOS_STATUS_NULL_POINTER;
1549 }
1550
1551 // Insert slice header
1552 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
1553 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
1554 pakInsertObjectParams.bLastHeader = true;
1555 pakInsertObjectParams.bEmulationByteBitsInsert = true;
1556 pakInsertObjectParams.pBatchBufferForPakSlices = batchBuffer;
1557
1558 // App does the slice header packing, set the skip count passed by the app
1559 pakInsertObjectParams.uiSkipEmulationCheckCount = params->uiSkipEmulationCheckCount;
1560 pakInsertObjectParams.pBsBuffer = params->pBsBuffer;
1561 pakInsertObjectParams.dwBitSize = params->dwLength;
1562 pakInsertObjectParams.dwOffset = params->dwOffset;
1563 pakInsertObjectParams.bVdencInUse = params->bVdencInUse;
1564
1565 // For HEVC VDEnc Dynamic Slice
1566 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = params->pEncodeHevcSliceParams;
1567 if (m_hevcSeqParams->SliceSizeControl)
1568 {
1569 pakInsertObjectParams.bLastHeader = false;
1570 pakInsertObjectParams.bEmulationByteBitsInsert = false;
1571 pakInsertObjectParams.dwBitSize = hevcSlcParams->BitLengthSliceHeaderStartingPortion;
1572 pakInsertObjectParams.bResetBitstreamStartingPos = true;
1573 }
1574
1575 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
1576 cmdBuffer,
1577 &pakInsertObjectParams));
1578
1579 if (m_hevcSeqParams->SliceSizeControl)
1580 {
1581 // Send HCP_PAK_INSERT_OBJ command. For dynamic slice, we are skipping the beginning part of slice header.
1582 pakInsertObjectParams.bLastHeader = true;
1583 pakInsertObjectParams.dwBitSize = params->dwLength - hevcSlcParams->BitLengthSliceHeaderStartingPortion;
1584 pakInsertObjectParams.dwOffset += ((hevcSlcParams->BitLengthSliceHeaderStartingPortion + 7) / 8); // Skips the first 5 bytes which is Start Code + Nal Unit Header
1585 pakInsertObjectParams.bResetBitstreamStartingPos = true;
1586 pakInsertObjectParams.bVdencInUse = params->bVdencInUse;
1587 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
1588 cmdBuffer,
1589 &pakInsertObjectParams));
1590 }
1591
1592 return eStatus;
1593 }
1594
AddHcpWeightOffsetStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)1595 MOS_STATUS CodechalVdencHevcState::AddHcpWeightOffsetStateCmd(
1596 PMOS_COMMAND_BUFFER cmdBuffer,
1597 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)
1598 {
1599 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1600
1601 CODECHAL_ENCODE_FUNCTION_ENTER;
1602
1603 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1604 CODECHAL_ENCODE_CHK_NULL_RETURN(hevcSlcParams);
1605
1606 MHW_VDBOX_HEVC_WEIGHTOFFSET_PARAMS hcpWeightOffsetParams;
1607 MOS_ZeroMemory(&hcpWeightOffsetParams, sizeof(hcpWeightOffsetParams));
1608
1609 for (auto k = 0; k < 2; k++) // k=0: LIST_0, k=1: LIST_1
1610 {
1611 // Luma, Chroma offset
1612 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
1613 {
1614 hcpWeightOffsetParams.LumaOffsets[k][i] = (int16_t)hevcSlcParams->luma_offset[k][i];
1615 // Cb, Cr
1616 for (auto j = 0; j < 2; j++)
1617 {
1618 hcpWeightOffsetParams.ChromaOffsets[k][i][j] = (int16_t)hevcSlcParams->chroma_offset[k][i][j];
1619 }
1620 }
1621
1622 // Luma Weight
1623 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
1624 &hcpWeightOffsetParams.LumaWeights[k],
1625 sizeof(hcpWeightOffsetParams.LumaWeights[k]),
1626 &hevcSlcParams->delta_luma_weight[k],
1627 sizeof(hevcSlcParams->delta_luma_weight[k])));
1628
1629 // Chroma Weight
1630 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
1631 &hcpWeightOffsetParams.ChromaWeights[k],
1632 sizeof(hcpWeightOffsetParams.ChromaWeights[k]),
1633 &hevcSlcParams->delta_chroma_weight[k],
1634 sizeof(hevcSlcParams->delta_chroma_weight[k])));
1635 }
1636
1637 if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
1638 {
1639 hcpWeightOffsetParams.ucList = LIST_0;
1640 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(cmdBuffer, nullptr, &hcpWeightOffsetParams));
1641 }
1642
1643 if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
1644 {
1645 hcpWeightOffsetParams.ucList = LIST_1;
1646 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(cmdBuffer, nullptr, &hcpWeightOffsetParams));
1647 }
1648
1649 return eStatus;
1650 }
1651
AddVdencWeightOffsetStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)1652 MOS_STATUS CodechalVdencHevcState::AddVdencWeightOffsetStateCmd(
1653 PMOS_COMMAND_BUFFER cmdBuffer,
1654 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)
1655 {
1656 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1657
1658 CODECHAL_ENCODE_FUNCTION_ENTER;
1659
1660 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1661 CODECHAL_ENCODE_CHK_NULL_RETURN(hevcSlcParams);
1662
1663 MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
1664 MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
1665
1666 vdencWeightOffsetParams.bWeightedPredEnabled = m_hevcVdencWeightedPredEnabled;
1667
1668 if (vdencWeightOffsetParams.bWeightedPredEnabled)
1669 {
1670 vdencWeightOffsetParams.dwDenom = 1 << (hevcSlcParams->luma_log2_weight_denom);
1671
1672 // Luma offset
1673 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
1674 {
1675 vdencWeightOffsetParams.LumaOffsets[0][i] = (int16_t)hevcSlcParams->luma_offset[0][i];
1676 vdencWeightOffsetParams.LumaOffsets[1][i] = (int16_t)hevcSlcParams->luma_offset[1][i];
1677 }
1678
1679 // Luma Weight
1680 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
1681 &vdencWeightOffsetParams.LumaWeights[0],
1682 sizeof(vdencWeightOffsetParams.LumaWeights[0]),
1683 &hevcSlcParams->delta_luma_weight[0],
1684 sizeof(hevcSlcParams->delta_luma_weight[0])),
1685 "Failed to copy luma weight 0 memory.");
1686
1687 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
1688 &vdencWeightOffsetParams.LumaWeights[1],
1689 sizeof(vdencWeightOffsetParams.LumaWeights[1]),
1690 &hevcSlcParams->delta_luma_weight[1],
1691 sizeof(hevcSlcParams->delta_luma_weight[1])),
1692 "Failed to copy luma weight 1 memory.");
1693 }
1694
1695 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(
1696 cmdBuffer,
1697 nullptr,
1698 &vdencWeightOffsetParams));
1699
1700 return eStatus;
1701 }
1702
AddVdencWalkerStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)1703 MOS_STATUS CodechalVdencHevcState::AddVdencWalkerStateCmd(
1704 PMOS_COMMAND_BUFFER cmdBuffer,
1705 PMHW_VDBOX_HEVC_SLICE_STATE params)
1706 {
1707 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1708
1709 CODECHAL_ENCODE_FUNCTION_ENTER;
1710
1711 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1712 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1713
1714 MHW_VDBOX_VDENC_WALKER_STATE_PARAMS vdencWalkerStateParams;
1715 vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1716 vdencWalkerStateParams.pHevcEncSeqParams = params->pEncodeHevcSeqParams;
1717 vdencWalkerStateParams.pHevcEncPicParams = params->pEncodeHevcPicParams;
1718 vdencWalkerStateParams.pEncodeHevcSliceParams = params->pEncodeHevcSliceParams;
1719 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
1720
1721 return eStatus;
1722 }
1723
ReadBrcPakStats(PMOS_COMMAND_BUFFER cmdBuffer)1724 MOS_STATUS CodechalVdencHevcState::ReadBrcPakStats(
1725 PMOS_COMMAND_BUFFER cmdBuffer)
1726 {
1727 CODECHAL_ENCODE_FUNCTION_ENTER;
1728
1729 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1730
1731 uint32_t offset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
1732 m_encodeStatusBuf.dwNumPassesOffset + // Num passes offset
1733 sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
1734
1735 EncodeReadBrcPakStatsParams readBrcPakStatsParams;
1736 readBrcPakStatsParams.pHwInterface = m_hwInterface;
1737 readBrcPakStatsParams.presBrcPakStatisticBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
1738 readBrcPakStatsParams.presStatusBuffer = &m_encodeStatusBuf.resStatusBuffer;
1739 readBrcPakStatsParams.dwStatusBufNumPassesOffset = offset;
1740 readBrcPakStatsParams.ucPass = (uint8_t) GetCurrentPass();
1741 readBrcPakStatsParams.VideoContext = m_videoContext;
1742
1743 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStatistics(
1744 cmdBuffer,
1745 &readBrcPakStatsParams));
1746
1747 return eStatus;
1748 }
1749
StoreVdencStatistics(PMOS_COMMAND_BUFFER cmdBuffer)1750 MOS_STATUS CodechalVdencHevcState::StoreVdencStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
1751 {
1752 CODECHAL_ENCODE_FUNCTION_ENTER;
1753
1754 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1755
1756 uint32_t offset = sizeof(CodechalVdencHevcLaStats) * m_currLaDataIdx;
1757 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
1758 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
1759 miCpyMemMemParams.presSrc = m_resVdencStatsBuffer; // 8X8 Normalized intra CU count is in m_resVdencStatsBuffer DW1
1760 miCpyMemMemParams.dwSrcOffset = 4;
1761 miCpyMemMemParams.presDst = &m_vdencLaStatsBuffer;
1762 miCpyMemMemParams.dwDstOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, intraCuCount);
1763 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
1764
1765 return eStatus;
1766 }
1767
StoreLookaheadStatistics(PMOS_COMMAND_BUFFER cmdBuffer)1768 MOS_STATUS CodechalVdencHevcState::StoreLookaheadStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
1769 {
1770 CODECHAL_ENCODE_FUNCTION_ENTER;
1771
1772 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1773
1774 if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()) \
1775 {
1776 CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
1777 eStatus = MOS_STATUS_INVALID_PARAMETER;
1778 return eStatus;
1779 }
1780
1781 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
1782
1783 uint32_t offset = sizeof(CodechalVdencHevcLaStats) * m_currLaDataIdx;
1784
1785 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
1786 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
1787 miStoreRegMemParams.presStoreBuffer = &m_vdencLaStatsBuffer;
1788 miStoreRegMemParams.dwOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, frameByteCount);
1789 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
1790 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
1791
1792 // Calculate header size including LCU header
1793 uint32_t headerBitSize = 0;
1794 for (uint32_t i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++)
1795 {
1796 headerBitSize += m_nalUnitParams[i]->uiSize * 8;
1797 }
1798 for (uint32_t i = 0; i < m_numSlices; i++)
1799 {
1800 headerBitSize += m_slcData[i].BitSize;
1801 }
1802
1803 // Store to headerBitCount in CodechalVdencHevcLaStats
1804 MHW_MI_STORE_DATA_PARAMS storeDataParams;
1805 storeDataParams.pOsResource = &m_vdencLaStatsBuffer;
1806 storeDataParams.dwResourceOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, headerBitCount);
1807 storeDataParams.dwValue = headerBitSize;
1808 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
1809
1810 auto mmioRegistersMfx = m_mfxInterface->GetMmioRegisters(m_vdboxIndex);
1811 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
1812 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
1813 MHW_MI_ATOMIC_PARAMS atomicParams;
1814
1815 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
1816 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
1817 MOS_ZeroMemory(&atomicParams, sizeof(atomicParams));
1818 // VCS_GPR0_Lo = LCUHdrBits
1819 miLoadRegMemParams.presStoreBuffer = &m_resFrameStatStreamOutBuffer; // LCUHdrBits is in m_resFrameStatStreamOutBuffer DW4
1820 miLoadRegMemParams.dwOffset = 4 * sizeof(uint32_t);
1821 miLoadRegMemParams.dwRegister = mmioRegistersMfx->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo
1822 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams));
1823
1824 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
1825 cmdBuffer,
1826 &flushDwParams));
1827
1828 // frame headerBitCount += LCUHdrBits
1829 atomicParams.pOsResource = &m_vdencLaStatsBuffer;
1830 atomicParams.dwResourceOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, headerBitCount);
1831 atomicParams.dwDataSize = sizeof(uint32_t);
1832 atomicParams.Operation = MHW_MI_ATOMIC_ADD;
1833 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(
1834 cmdBuffer,
1835 &atomicParams));
1836
1837 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreVdencStatistics(cmdBuffer));
1838
1839 return eStatus;
1840 }
1841
ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)1842 MOS_STATUS CodechalVdencHevcState::ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)
1843 {
1844 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1845
1846 CODECHAL_ENCODE_FUNCTION_ENTER;
1847
1848 MOS_LOCK_PARAMS lockFlags;
1849 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1850 lockFlags.WriteOnly = true;
1851
1852 uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize + sizeof(uint32_t) * 2); // encodeStatus is offset by 2 DWs in the resource
1853
1854 // Report slice size to app only when dynamic slice is enabled
1855 if (!m_hevcSeqParams->SliceSizeControl)
1856 {
1857 // Clear slice size report in EncodeStatus buffer
1858 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_encodeStatusBuf.resStatusBuffer, &lockFlags);
1859 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1860 EncodeStatus* dataStatus = (EncodeStatus*)(data + baseOffset);
1861 MOS_ZeroMemory(&(dataStatus->sliceReport), sizeof(EncodeStatusSliceReport));
1862 m_osInterface->pfnUnlockResource(m_osInterface, &m_encodeStatusBuf.resStatusBuffer);
1863
1864 return eStatus;
1865 }
1866
1867 uint32_t sizeOfSliceSizesBuffer = MOS_ALIGN_CEIL(CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6 * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1868
1869 if (IsFirstPass())
1870 {
1871 // Create/ Initialize slice report buffer once per frame, to be used across passes
1872 if (Mos_ResourceIsNull(&m_resSliceReport[m_encodeStatusBuf.wCurrIndex]))
1873 {
1874 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1875 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1876 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1877 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1878 allocParamsForBufferLinear.Format = Format_Buffer;
1879 allocParamsForBufferLinear.dwBytes = sizeOfSliceSizesBuffer;
1880
1881 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1882 m_osInterface,
1883 &allocParamsForBufferLinear,
1884 &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]),
1885 "Failed to create HEVC VDEnc Slice Report Buffer ");
1886 }
1887
1888 // Clear slice size structure to be sent in EncodeStatusReport buffer
1889 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], &lockFlags);
1890 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1891 MOS_ZeroMemory(data, sizeOfSliceSizesBuffer);
1892 m_osInterface->pfnUnlockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]);
1893
1894 // Set slice size pointer in slice size structure
1895 data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface,(&m_encodeStatusBuf.resStatusBuffer), &lockFlags);
1896 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1897 EncodeStatus* dataStatus = (EncodeStatus*)(data + baseOffset);
1898 (dataStatus)->sliceReport.pSliceSize = &m_resSliceReport[m_encodeStatusBuf.wCurrIndex];
1899 m_osInterface->pfnUnlockResource(m_osInterface, &m_encodeStatusBuf.resStatusBuffer);
1900 }
1901
1902 // Copy Slize size data buffer from PAK to be sent back to App
1903 CODECHAL_ENCODE_CHK_STATUS_RETURN(CopyDataBlock(cmdBuffer,
1904 &m_resLcuBaseAddressBuffer, 0, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], 0, sizeOfSliceSizesBuffer));
1905
1906 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
1907 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
1908 miCpyMemMemParams.presSrc = &m_resFrameStatStreamOutBuffer; // Slice size overflow is in m_resFrameStatStreamOutBuffer DW0[16]
1909 miCpyMemMemParams.dwSrcOffset = 0;
1910 miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
1911 miCpyMemMemParams.dwDstOffset = baseOffset + m_encodeStatusBuf.dwSliceReportOffset; // Slice size overflow is at DW0 EncodeStatusSliceReport
1912 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
1913
1914
1915 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
1916 miCpyMemMemParams.presSrc = m_resSliceCountBuffer; // Number of slice sizes are stored in this buffer. Updated at runtime
1917 miCpyMemMemParams.dwSrcOffset = 0;
1918 miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
1919 miCpyMemMemParams.dwDstOffset = baseOffset + m_encodeStatusBuf.dwSliceReportOffset + 1; // Num slices is located at DW1 EncodeStatusSliceReport
1920 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
1921
1922 return eStatus;
1923 }
1924
CopyDataBlock(PMOS_COMMAND_BUFFER cmdBuffer,PMOS_RESOURCE sourceSurface,uint32_t sourceOffset,PMOS_RESOURCE destSurface,uint32_t destOffset,uint32_t copySize)1925 MOS_STATUS CodechalVdencHevcState::CopyDataBlock(
1926 PMOS_COMMAND_BUFFER cmdBuffer,
1927 PMOS_RESOURCE sourceSurface,
1928 uint32_t sourceOffset,
1929 PMOS_RESOURCE destSurface,
1930 uint32_t destOffset,
1931 uint32_t copySize)
1932 {
1933 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1934
1935 CODECHAL_ENCODE_FUNCTION_ENTER;
1936
1937 CodechalHucStreamoutParams hucStreamOutParams;
1938 MOS_ZeroMemory(&hucStreamOutParams, sizeof(hucStreamOutParams));
1939
1940 // Ind Obj Addr command
1941 hucStreamOutParams.dataBuffer = sourceSurface;
1942 hucStreamOutParams.dataSize = copySize + sourceOffset;
1943 hucStreamOutParams.dataOffset = MOS_ALIGN_FLOOR(sourceOffset, CODECHAL_PAGE_SIZE);
1944 hucStreamOutParams.streamOutObjectBuffer = destSurface;
1945 hucStreamOutParams.streamOutObjectSize = copySize + destOffset;
1946 hucStreamOutParams.streamOutObjectOffset = MOS_ALIGN_FLOOR(destOffset, CODECHAL_PAGE_SIZE);
1947
1948 // Stream object params
1949 hucStreamOutParams.indStreamInLength = copySize;
1950 hucStreamOutParams.inputRelativeOffset = sourceOffset - hucStreamOutParams.dataOffset;
1951 hucStreamOutParams.outputRelativeOffset = destOffset - hucStreamOutParams.streamOutObjectOffset;
1952
1953
1954 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->PerformHucStreamOut(
1955 &hucStreamOutParams,
1956 cmdBuffer));
1957
1958 // wait Huc completion (use HEVC bit for now)
1959 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
1960 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
1961 vdPipeFlushParams.Flags.bFlushHEVC = 1;
1962 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
1963 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
1964
1965 // Flush the engine to ensure memory written out
1966 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
1967 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
1968 flushDwParams.bVideoPipelineCacheInvalidate = true;
1969 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
1970
1971 return eStatus;
1972 }
1973
ExecutePictureLevel()1974 MOS_STATUS CodechalVdencHevcState::ExecutePictureLevel()
1975 {
1976 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1977
1978 CODECHAL_ENCODE_FUNCTION_ENTER;
1979
1980 PerfTagSetting perfTag;
1981 perfTag.Value = 0;
1982 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1983 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
1984 perfTag.PictureCodingType = m_pictureCodingType;
1985 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1986
1987 if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()) \
1988 {
1989 CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
1990 eStatus = MOS_STATUS_INVALID_PARAMETER;
1991 return eStatus;
1992 }
1993
1994 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
1995
1996 if (!m_singleTaskPhaseSupportedInPak)
1997 {
1998 // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together
1999 m_firstTaskInPhase = true;
2000 m_lastTaskInPhase = true;
2001 }
2002
2003 // PAK pass type for each pass: VDEnc+PAK vs. PAK-only
2004 SetPakPassType();
2005
2006 bool pakOnlyMultipassEnable;
2007
2008 pakOnlyMultipassEnable = m_pakOnlyPass;
2009
2010 bool panicEnabled = (m_brcEnabled) && (m_panicEnable) && (GetCurrentPass() == 1) && !m_pakOnlyPass;
2011
2012 uint32_t rollingILimit = (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_ROW) ? MOS_ROUNDUP_DIVIDE(m_frameHeight, 32) : (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_COLUMN) ? MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) : 0;
2013
2014 m_refList[m_currReconstructedPic.FrameIdx]->rollingIntraRefreshedPosition =
2015 CodecHal_Clip3(0, rollingILimit, m_hevcPicParams->IntraInsertionLocation + m_hevcPicParams->IntraInsertionSize);
2016
2017 // For ACQP / BRC, update pic params rolling intra reference location here before cmd buffer is prepared.
2018 PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
2019 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
2020 {
2021 CODEC_PICTURE refPic = l0RefFrameList[refIdx];
2022
2023 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
2024 {
2025 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
2026 m_hevcPicParams->RollingIntraReferenceLocation[refIdx] = m_refList[refPicIdx]->rollingIntraRefreshedPosition;
2027 }
2028 }
2029
2030 // clean-up per VDBOX semaphore memory
2031 int32_t currentPass = GetCurrentPass();
2032
2033 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerSetConstData(
2034 m_osInterface,
2035 m_miInterface,
2036 m_vdencInterface,
2037 m_hevcSeqParams,
2038 m_hevcPicParams,
2039 m_hevcSliceParams,
2040 m_pakOnlyPass,
2041 m_hevcVdencAcqpEnabled,
2042 m_brcEnabled,
2043 m_vdencStreamInEnabled,
2044 m_vdencNativeROIEnabled,
2045 m_hevcVdencRoundingEnabled,
2046 panicEnabled,
2047 currentPass));
2048
2049 // Send HuC BRC Init/ Update only on first pipe.
2050 if (m_vdencHucUsed)
2051 {
2052 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerExecute(true, &m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
2053
2054 if (!m_singleTaskPhaseSupported)
2055 {
2056 //Reset earlier set PAK perf tag
2057 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2058
2059 // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2060 perfTag.Value = 0;
2061 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2062 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET;
2063 perfTag.PictureCodingType = m_pictureCodingType;
2064 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2065 }
2066 m_resVdencBrcUpdateDmemBufferPtr[0] = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
2067
2068 // Invoke BRC init/reset FW
2069 if (m_brcInit || m_brcReset)
2070 {
2071 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
2072 }
2073
2074 if (!m_singleTaskPhaseSupported)
2075 {
2076 //Reset performance buffer used for BRC init
2077 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2078 // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2079 perfTag.Value = 0;
2080 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2081 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE;
2082 perfTag.PictureCodingType = m_pictureCodingType;
2083 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2084 }
2085
2086 // Invoke BRC update FW
2087 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
2088 m_brcInit = m_brcReset = false;
2089 if (!m_singleTaskPhaseSupported)
2090 {
2091 //reset performance buffer used for BRC update
2092 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2093 }
2094 }
2095 else
2096 {
2097 ConstructBatchBufferHuCCQP(&m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource);
2098 }
2099
2100 MOS_COMMAND_BUFFER cmdBuffer;
2101 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2102
2103 if (!m_singleTaskPhaseSupported)
2104 {
2105 //PAK Perf Tag
2106 perfTag.Value = 0;
2107 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2108 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
2109 perfTag.PictureCodingType = m_pictureCodingType;
2110 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2111 }
2112
2113 if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_hevcVdencAcqpEnabled)) )
2114 {
2115 // Send command buffer header at the beginning (OS dependent)
2116 // frame tracking tag is only added in the last command buffer header
2117 bool requestFrameTracking = m_singleTaskPhaseSupported ?
2118 m_firstTaskInPhase :
2119 m_lastTaskInPhase;
2120
2121 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
2122 }
2123
2124 // ACQP + SSC, ACQP + WP, BRC, BRC + SSC, BRC + WP
2125 // 2nd pass for SSC, WP, BRC needs conditional batch buffer end cmd, which is decided by HUC_STATUS output from HuC
2126 if (currentPass && m_vdencHuCConditional2ndPass && (currentPass != m_uc2NdSaoPass))
2127 {
2128 MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
2129
2130 // Insert conditional batch buffer end
2131 MOS_ZeroMemory(
2132 &miConditionalBatchBufferEndParams,
2133 sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
2134
2135 // VDENC uses HuC FW generated semaphore for conditional 2nd pass
2136 miConditionalBatchBufferEndParams.presSemaphoreBuffer =
2137 &m_resPakMmioBuffer;
2138
2139 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
2140 &cmdBuffer,
2141 &miConditionalBatchBufferEndParams));
2142
2143 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2144
2145 uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
2146
2147 // Write back the HCP image control register for RC6 may clean it out
2148 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
2149 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
2150 miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2151 miLoadRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset;
2152 miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2153 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
2154
2155 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2156 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2157 miStoreRegMemParams.presStoreBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
2158 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
2159 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2160 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2161
2162 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2163 miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2164 miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
2165 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2166 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2167 }
2168
2169 if (!currentPass && m_osInterface->bTagResourceSync)
2170 {
2171 // This is a short term solution to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
2172 // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
2173 // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
2174 // as long as Dec/VP/Enc won't depend on this PAK so soon.
2175
2176 MOS_RESOURCE globalGpuContextSyncTagBuffer;
2177
2178 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
2179 m_osInterface,
2180 &globalGpuContextSyncTagBuffer));
2181
2182 MHW_MI_STORE_DATA_PARAMS params;
2183 params.pOsResource = &globalGpuContextSyncTagBuffer;
2184 params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2185 uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2186 params.dwValue = (value > 0) ? (value - 1) : 0;
2187 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, ¶ms));
2188 }
2189
2190 if (!m_lookaheadUpdate)
2191 {
2192 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2193 }
2194
2195 MHW_VDBOX_SURFACE_PARAMS srcSurfaceParams;
2196 SetHcpSrcSurfaceParams(srcSurfaceParams);
2197
2198 MHW_VDBOX_SURFACE_PARAMS reconSurfaceParams;
2199 SetHcpReconSurfaceParams(reconSurfaceParams);
2200
2201 *m_pipeBufAddrParams = {};
2202 SetHcpPipeBufAddrParams(*m_pipeBufAddrParams);
2203 m_pipeBufAddrParams->pRawSurfParam = &srcSurfaceParams;
2204 m_pipeBufAddrParams->pDecodedReconParam = &reconSurfaceParams;
2205 m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams, &cmdBuffer);
2206
2207 SetHcpPipeModeSelectParams(*m_pipeModeSelectParams);
2208
2209 // HuC modifies HCP pipe mode select command, when 2nd pass SAO is required
2210 if (m_vdencHucUsed && m_b2NdSaoPassNeeded)
2211 {
2212 // current location to add cmds in 2nd level batch buffer
2213 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].iCurrent = 0;
2214 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2215 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = 0;
2216
2217 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2218
2219 // save offset for next 2nd level batch buffer usage
2220 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2221 }
2222 else
2223 {
2224 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, m_pipeModeSelectParams));
2225 }
2226
2227 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &srcSurfaceParams));
2228
2229 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &reconSurfaceParams));
2230
2231 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams));
2232
2233 MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
2234 SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2235 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
2236
2237 MHW_VDBOX_QM_PARAMS fqmParams, qmParams;
2238 SetHcpQmStateParams(fqmParams, qmParams);
2239 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams));
2240 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams));
2241
2242 SetVdencPipeModeSelectParams(*m_pipeModeSelectParams);
2243 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, m_pipeModeSelectParams));
2244
2245 MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2];
2246 SetVdencSurfaceStateParams(srcSurfaceParams, reconSurfaceParams, dsSurfaceParams[0], dsSurfaceParams[1]);
2247 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &srcSurfaceParams));
2248 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &reconSurfaceParams));
2249 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2));
2250
2251 SetVdencPipeBufAddrParams(*m_pipeBufAddrParams);
2252 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams));
2253
2254 MHW_VDBOX_HEVC_PIC_STATE picStateParams;
2255 SetHcpPicStateParams(picStateParams);
2256
2257 if (m_vdencHucUsed)
2258 {
2259 // 2nd level batch buffer
2260 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2261 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2262
2263 // save offset for next 2nd level batch buffer usage
2264 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset += m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2265 }
2266 else
2267 {
2268 // current location to add cmds in 2nd level batch buffer
2269 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].iCurrent = 0;
2270 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2271 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = 0;
2272
2273 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2274 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2275 }
2276
2277 // Send HEVC_VP9_RDOQ_STATE command
2278 if (m_hevcRdoqEnabled)
2279 {
2280 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams));
2281 }
2282
2283 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2284
2285 return eStatus;
2286 }
2287
SendHwSliceEncodeCommand(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)2288 MOS_STATUS CodechalVdencHevcState::SendHwSliceEncodeCommand(
2289 PMOS_COMMAND_BUFFER cmdBuffer,
2290 PMHW_VDBOX_HEVC_SLICE_STATE params)
2291 {
2292 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2293
2294 CODECHAL_ENCODE_FUNCTION_ENTER;
2295
2296 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2297 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
2298 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pHevcPicIdx);
2299 CODECHAL_ENCODE_CHK_NULL_RETURN(params->presDataBuffer);
2300 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSeqParams);
2301 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcPicParams);
2302 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams);
2303 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pBsBuffer);
2304 CODECHAL_ENCODE_CHK_NULL_RETURN(params->ppNalUnitParams);
2305
2306 PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams = params->pEncodeHevcPicParams;
2307 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = params->pEncodeHevcSliceParams;
2308
2309 // VDENC does not use batch buffer for slice state
2310 // add HCP_REF_IDX command
2311 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpRefIdxCmd(cmdBuffer, nullptr, params));
2312
2313 if (params->bVdencHucInUse)
2314 {
2315 // 2nd level batch buffer
2316 PMHW_BATCH_BUFFER secondLevelBatchBufferUsed = params->pVdencBatchBuffer;
2317 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(cmdBuffer, secondLevelBatchBufferUsed));
2318 }
2319 else
2320 {
2321 // Weighted Prediction
2322 // This slice level command is issued, if the weighted_pred_flag or weighted_bipred_flag equals one.
2323 // If zero, then this command is not issued.
2324 if (params->bWeightedPredInUse)
2325 {
2326 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpWeightOffsetStateCmd(cmdBuffer, hevcSlcParams));
2327 }
2328
2329 // add HEVC Slice state commands
2330 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSliceStateCmd(cmdBuffer, params));
2331
2332 // add HCP_PAK_INSERT_OBJECTS command
2333 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPakInsertNALUs(cmdBuffer, params->pVdencBatchBuffer, params));
2334
2335 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPakInsertSliceHeader(cmdBuffer, params->pVdencBatchBuffer, params));
2336
2337 // Send VDENC_WEIGHT_OFFSETS_STATE command
2338 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddVdencWeightOffsetStateCmd(cmdBuffer, hevcSlcParams));
2339 }
2340
2341 // Send VDENC_WALKER_STATE command
2342 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddVdencWalkerStateCmd(cmdBuffer, params));
2343
2344 return eStatus;
2345 }
2346
ExecuteSliceLevel()2347 MOS_STATUS CodechalVdencHevcState::ExecuteSliceLevel()
2348 {
2349 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2350
2351 CODECHAL_ENCODE_FUNCTION_ENTER;
2352
2353 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBatchBufferForPakSlices());
2354
2355 MOS_COMMAND_BUFFER cmdBuffer;
2356 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2357
2358 CODECHAL_ENCODE_CHK_NULL_RETURN(m_sliceStateParams);
2359 SetHcpSliceStateCommonParams(*m_sliceStateParams);
2360
2361 // starting location for executing slice level cmds
2362 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2363
2364 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
2365 for (uint32_t startLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
2366 {
2367 if (IsFirstPass())
2368 {
2369 slcData[slcCount].CmdOffset = startLcu * (m_hcpInterface->GetHcpPakObjSize()) * sizeof(uint32_t);
2370 }
2371
2372 SetHcpSliceStateParams(*m_sliceStateParams, slcData, slcCount);
2373
2374 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(&cmdBuffer, m_sliceStateParams));
2375
2376 startLcu += m_hevcSliceParams[slcCount].NumLCUsInSlice;
2377
2378 m_batchBufferForPakSlicesStartOffset = (uint32_t)m_batchBufferForPakSlices[m_currPakSliceIdx].iCurrent;
2379
2380 if (m_hevcVdencAcqpEnabled || m_brcEnabled)
2381 {
2382 // save offset for next 2nd level batch buffer usage
2383 // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice
2384 // dwVdencBatchBufferPerSliceConstSize: constant size for each slice
2385 // m_vdencBatchBufferPerSliceVarSize: variable size for each slice
2386 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset += m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[slcCount];
2387 }
2388
2389 // Send VD_PIPELINE_FLUSH command
2390 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2391 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2392 vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
2393 vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
2394 vdPipelineFlushParams.Flags.bFlushVDENC = 1;
2395 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2396 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2397 }
2398
2399 if (m_useBatchBufferForPakSlices)
2400 {
2401 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_UnlockBb(
2402 m_osInterface,
2403 &m_batchBufferForPakSlices[m_currPakSliceIdx],
2404 m_lastTaskInPhase));
2405 }
2406
2407 // Insert end of sequence/stream if set
2408 if (m_lastPicInStream || m_lastPicInSeq)
2409 {
2410 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
2411 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
2412 pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
2413 pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
2414 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
2415 }
2416
2417 // Send MI_FLUSH command
2418 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2419 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2420 flushDwParams.bVideoPipelineCacheInvalidate = true;
2421 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2422
2423 // Send VD_PIPELINE_FLUSH command
2424 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2425 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2426 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
2427 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2428 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2429 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2430
2431 // Send MI_FLUSH command
2432 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2433 flushDwParams.bVideoPipelineCacheInvalidate = true;
2434 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2435
2436 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2437
2438 // BRC PAK statistics different for each pass
2439 if (m_brcEnabled)
2440 {
2441 uint32_t offset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
2442 m_encodeStatusBuf.dwNumPassesOffset + // Num passes offset
2443 sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
2444
2445 EncodeReadBrcPakStatsParams readBrcPakStatsParams;
2446 readBrcPakStatsParams.pHwInterface = m_hwInterface;
2447 readBrcPakStatsParams.presBrcPakStatisticBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
2448 readBrcPakStatsParams.presStatusBuffer = &m_encodeStatusBuf.resStatusBuffer;
2449 readBrcPakStatsParams.dwStatusBufNumPassesOffset = offset;
2450 readBrcPakStatsParams.ucPass = (uint8_t) GetCurrentPass();
2451 readBrcPakStatsParams.VideoContext = m_videoContext;
2452
2453 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStatistics(
2454 &cmdBuffer,
2455 &readBrcPakStatsParams));
2456 }
2457
2458 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
2459 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSliceSize(&cmdBuffer));
2460
2461 if (m_lookaheadPass)
2462 {
2463 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreLookaheadStatistics(&cmdBuffer));
2464 }
2465 #if USE_CODECHAL_DEBUG_TOOL
2466 if (m_brcEnabled && m_enableFakeHrdSize)
2467 {
2468 uint32_t sizeInByte = (m_pictureCodingType == I_TYPE) ? m_fakeIFrameHrdSize : m_fakePBFrameHrdSize;
2469 CODECHAL_ENCODE_CHK_STATUS_RETURN(ModifyEncodedFrameSizeWithFakeHeaderSize(
2470 &cmdBuffer,
2471 sizeInByte,
2472 m_resVdencBrcUpdateDmemBufferPtr[0],
2473 0,
2474 &m_resFrameStatStreamOutBuffer,
2475 sizeof(uint32_t) * 4));
2476 }
2477 #endif
2478
2479 if (!m_lookaheadUpdate)
2480 {
2481 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2482 }
2483
2484 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2485 {
2486 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2487 }
2488
2489 std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass())+"]";
2490 CODECHAL_DEBUG_TOOL(
2491 CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
2492 &cmdBuffer,
2493 CODECHAL_NUM_MEDIA_STATES,
2494 pakPassName.data()));)
2495
2496 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2497
2498 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2499 {
2500 bool renderingFlags = m_videoContextUsesNullHw;
2501
2502 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
2503
2504 CODECHAL_DEBUG_TOOL(
2505 if (m_mmcState)
2506 {
2507 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2508 }
2509 )
2510
2511 if (IsLastPass() &&
2512 m_signalEnc &&
2513 m_currRefSync &&
2514 !Mos_ResourceIsNull(&m_currRefSync->resSyncObject))
2515 {
2516 // signal semaphore
2517 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
2518 syncParams.GpuContext = m_videoContext;
2519 syncParams.presSyncResource = &m_currRefSync->resSyncObject;
2520
2521 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2522 m_currRefSync->uiSemaphoreObjCount++;
2523 m_currRefSync->bInUsed = true;
2524 }
2525 }
2526
2527 // HuC FW outputs are ready at this point if single task phase is enabled
2528 if (m_vdencHucUsed && m_singleTaskPhaseSupported)
2529 {
2530 // HuC Output STF=1
2531 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
2532 }
2533
2534 // Reset parameters for next PAK execution
2535 if (IsLastPass())
2536 {
2537 if (!m_singleTaskPhaseSupported)
2538 {
2539 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2540 }
2541
2542 m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
2543
2544 if (m_hevcSeqParams->ParallelBRC)
2545 {
2546 m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite =
2547 (m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
2548 }
2549
2550 m_newPpsHeader = 0;
2551 m_newSeqHeader = 0;
2552 m_frameNum++;
2553 }
2554
2555 return eStatus;
2556 }
2557
ReadHcpStatus(PMOS_COMMAND_BUFFER cmdBuffer)2558 MOS_STATUS CodechalVdencHevcState::ReadHcpStatus(PMOS_COMMAND_BUFFER cmdBuffer)
2559 {
2560 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2561
2562 CODECHAL_ENCODE_FUNCTION_ENTER;
2563
2564 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2565
2566 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::ReadHcpStatus(cmdBuffer));
2567
2568 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2569 // Slice Size Conformance
2570 if (m_hevcSeqParams->SliceSizeControl)
2571 {
2572 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2573 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2574 miStoreRegMemParams.presStoreBuffer = m_resSliceCountBuffer;
2575 miStoreRegMemParams.dwOffset = 0;
2576 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncSliceCountRegOffset;
2577 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2578
2579 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2580 miStoreRegMemParams.presStoreBuffer = m_resVdencModeTimerBuffer;
2581 miStoreRegMemParams.dwOffset = 0;
2582 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncVdencModeTimerRegOffset;
2583 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2584 }
2585
2586 if (m_vdencHucUsed)
2587 {
2588 // Store PAK frameSize MMIO to PakInfo buffer
2589 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2590 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2591 miStoreRegMemParams.presStoreBuffer = m_resVdencBrcUpdateDmemBufferPtr[0];
2592 miStoreRegMemParams.dwOffset = 0;
2593 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
2594 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2595 }
2596
2597 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadImageStatus(cmdBuffer))
2598
2599 return eStatus;
2600 }
2601
SetSequenceStructs()2602 MOS_STATUS CodechalVdencHevcState::SetSequenceStructs()
2603 {
2604 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2605
2606 CODECHAL_ENCODE_FUNCTION_ENTER;
2607
2608 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::SetSequenceStructs());
2609
2610 switch (m_hevcSeqParams->TargetUsage)
2611 {
2612 case 1: case 2: // Quality mode
2613 m_hevcSeqParams->TargetUsage = 1;
2614 break;
2615 case 3: case 4: case 5: // Normal mode
2616 m_hevcSeqParams->TargetUsage = 4;
2617 break;
2618 case 6: case 7: // Speed mode
2619 m_hevcSeqParams->TargetUsage = 7;
2620 break;
2621 default:
2622 m_hevcSeqParams->TargetUsage = 4;
2623 break;
2624 }
2625
2626 m_targetUsage = (uint32_t)m_hevcSeqParams->TargetUsage;
2627
2628 // enable motion adaptive under game streamming scenario for better quality
2629 if (m_hevcSeqParams->ScenarioInfo == ESCENARIO_GAMESTREAMING)
2630 {
2631 m_enableMotionAdaptive = true;
2632 }
2633
2634 // ACQP is by default disabled, enable it when SSC/QpAdjust required.
2635 if (m_hevcSeqParams->SliceSizeControl == true ||
2636 m_hevcSeqParams->QpAdjustment == true)
2637 {
2638 m_hevcVdencAcqpEnabled = true;
2639 }
2640
2641 // Get row store cache offset as all the needed information is got here
2642 if (m_vdencInterface->IsRowStoreCachingSupported())
2643 {
2644 MHW_VDBOX_ROWSTORE_PARAMS rowStoreParams;
2645 rowStoreParams.Mode = m_mode;
2646 rowStoreParams.dwPicWidth = m_frameWidth;
2647 rowStoreParams.ucChromaFormat = m_chromaFormat;
2648 rowStoreParams.ucBitDepthMinus8 = m_hevcSeqParams->bit_depth_luma_minus8;
2649 rowStoreParams.ucLCUSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
2650 // VDEnc only support LCU64 for now
2651 CODECHAL_ENCODE_ASSERT(rowStoreParams.ucLCUSize == MAX_LCU_SIZE);
2652 m_hwInterface->SetRowstoreCachingOffsets(&rowStoreParams);
2653 }
2654
2655 m_lookaheadDepth = m_hevcSeqParams->LookaheadDepth;
2656 m_lookaheadPass = (m_lookaheadDepth > 0) && (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP);
2657
2658 return eStatus;
2659 }
2660
SetPictureStructs()2661 MOS_STATUS CodechalVdencHevcState::SetPictureStructs()
2662 {
2663 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2664
2665 CODECHAL_ENCODE_FUNCTION_ENTER;
2666
2667 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::SetPictureStructs());
2668
2669 m_virtualEngineBbIndex = m_currOriginalPic.FrameIdx;
2670
2671 //Enable only for TU1
2672 if (m_hevcSeqParams->TargetUsage != 1)
2673 {
2674 m_hmeEnabled = m_b16XMeEnabled = m_b32XMeEnabled = false;
2675 m_16xMeSupported = false;
2676 }
2677
2678 // SSC can be satisfied in single VDEnc+PAK pass when required.
2679 // However it is not 100% guaranteed due to delay in HW.
2680 // When it happens, PAK would indicate SSC violation in MMIO register
2681 // and HuC would adjust SSC threshold and triggers another VDEnc+PAK pass.
2682 // SSC requires HuC for all target usages. (allow 1 pass SSC temporarily for testing purpose)
2683 if (m_hevcSeqParams->SliceSizeControl)
2684 {
2685 m_vdencHuCConditional2ndPass = true;
2686 }
2687
2688 // Weighted Prediction is supported only with VDEnc, only applicable to P/B frames
2689 if (m_hevcPicParams->weighted_pred_flag || m_hevcPicParams->weighted_bipred_flag)
2690 {
2691 // with SAO, needs to increase total number of passes to 3 later (2 for SAO, 1 for WP)
2692 m_hevcVdencWeightedPredEnabled = true;
2693 m_vdencHuCConditional2ndPass = true;
2694
2695 // Set ACQP enabled if GPU base WP is required.
2696 if(m_hevcPicParams->bEnableGPUWeightedPrediction)
2697 {
2698 m_hevcVdencAcqpEnabled = true;
2699 }
2700 }
2701
2702 if (m_brcEnabled) // VDEnc BRC supports maximum 2 PAK passes
2703 {
2704 if (m_hevcPicParams->BRCPrecision == 1) // single-pass BRC, App requirment with first priority
2705 {
2706 m_numPasses = 0;
2707 // There is no need of additional pass for SSC, violation rate could be high but ok
2708 }
2709 else if (m_multipassBrcSupported) // multi-pass BRC is supported
2710 {
2711 m_numPasses = CODECHAL_VDENC_BRC_NUM_OF_PASSES - 1;
2712 m_vdencHuCConditional2ndPass = true;
2713 }
2714 else
2715 {
2716 m_numPasses = 0;
2717 }
2718
2719 m_vdencBrcEnabled = true;
2720 m_hevcVdencAcqpEnabled = false; // when BRC is enabled, ACQP has to be turned off
2721 }
2722 else // CQP, ACQP
2723 {
2724 m_numPasses = 0;
2725
2726 // ACQP + SSC, ACQP + WP. CQP + SSC/WP donot need 2nd pass
2727 // driver programs 2nd pass, but it will be decided by conditional batch buffer end cmd to execute 2nd pass
2728 if (m_vdencHuCConditional2ndPass && m_hevcVdencAcqpEnabled)
2729 {
2730 m_numPasses += 1;
2731 }
2732 }
2733
2734 CODECHAL_ENCODE_VERBOSEMESSAGE("m_numPasses = %d",m_numPasses);
2735
2736 m_vdencHucUsed = m_hevcVdencAcqpEnabled || m_vdencBrcEnabled;
2737
2738 // VDEnc always needs to enable due to pak fractional QP features
2739 // In VDENC mode, this field "Cu_Qp_Delta_Enabled_Flag" should always be set to 1.
2740 CODECHAL_ENCODE_ASSERT(m_hevcPicParams->cu_qp_delta_enabled_flag == 1);
2741
2742 // Restriction: If RollingI is enabled, ROI needs to be disabled
2743 if (m_hevcPicParams->bEnableRollingIntraRefresh)
2744 {
2745 m_hevcPicParams->NumROI = 0;
2746 }
2747
2748 //VDEnc StreamIn enabled if case of ROI (All frames), DirtyRect and SHME (ldB frames)
2749
2750 m_vdencStreamInEnabled = (m_vdencEnabled) && (m_hevcPicParams->NumROI ||
2751 (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)) || (m_b16XMeEnabled));
2752
2753 CODECHAL_ENCODE_CHK_STATUS_RETURN(PrepareVDEncStreamInData());
2754
2755 return eStatus;
2756 }
2757
PrepareVDEncStreamInData()2758 MOS_STATUS CodechalVdencHevcState::PrepareVDEncStreamInData()
2759 {
2760 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2761
2762 CODECHAL_ENCODE_FUNCTION_ENTER;
2763
2764 if (m_vdencStreamInEnabled && m_encodeParams.bMbQpDataEnabled)
2765 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupMbQpStreamIn(&m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
2766
2767 if (m_vdencStreamInEnabled && m_hevcPicParams->NumROI)
2768 {
2769 ProcessRoiDeltaQp();
2770
2771 if (m_vdencHucUsed && !m_vdencNativeROIEnabled)
2772 {
2773 //ForceQp ROI in ACQP, BRC mode only
2774 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBRCROIStreamIn(&m_resVdencStreamInBuffer[m_currRecycledBufIdx], &m_vdencDeltaQpBuffer[m_currRecycledBufIdx]));
2775 }
2776 else
2777 {
2778 //Native ROI
2779 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupROIStreamIn(&(m_resVdencStreamInBuffer[m_currRecycledBufIdx])));
2780 }
2781 }
2782 else if (m_vdencStreamInEnabled && (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)))
2783 {
2784 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupDirtyRectStreamIn(&(m_resVdencStreamInBuffer[m_currRecycledBufIdx])));
2785 }
2786 return eStatus;
2787 }
2788
CalcScaledDimensions()2789 MOS_STATUS CodechalVdencHevcState::CalcScaledDimensions()
2790 {
2791 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2792
2793 CODECHAL_ENCODE_FUNCTION_ENTER;
2794
2795 // HME Scaling WxH
2796 m_downscaledWidthInMb4x =
2797 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_4x);
2798 m_downscaledHeightInMb4x =
2799 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_4x);
2800 m_downscaledWidth4x =
2801 m_downscaledWidthInMb4x * CODECHAL_MACROBLOCK_WIDTH;
2802 m_downscaledHeight4x =
2803 m_downscaledHeightInMb4x * CODECHAL_MACROBLOCK_HEIGHT;
2804
2805 // SuperHME Scaling WxH
2806 m_downscaledWidthInMb16x =
2807 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_16x);
2808 m_downscaledHeightInMb16x =
2809 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_16x);
2810 m_downscaledWidth16x =
2811 m_downscaledWidthInMb16x * CODECHAL_MACROBLOCK_WIDTH;
2812 m_downscaledHeight16x =
2813 m_downscaledHeightInMb16x * CODECHAL_MACROBLOCK_HEIGHT;
2814
2815 return eStatus;
2816 }
2817
ValidateRefFrameData(PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams)2818 MOS_STATUS CodechalVdencHevcState::ValidateRefFrameData(PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams)
2819 {
2820 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2821
2822 CODECHAL_ENCODE_CHK_NULL_RETURN(slcParams);
2823
2824 uint8_t maxNumRef0 = m_numMaxVdencL0Ref;
2825 uint8_t maxNumRef1 = m_numMaxVdencL1Ref;
2826
2827 if (slcParams->num_ref_idx_l0_active_minus1 > maxNumRef0 - 1)
2828 {
2829 CODECHAL_ENCODE_ASSERT(false);
2830 slcParams->num_ref_idx_l0_active_minus1 = maxNumRef0 - 1;
2831 }
2832
2833 if (slcParams->num_ref_idx_l1_active_minus1 > maxNumRef1 - 1)
2834 {
2835 CODECHAL_ENCODE_ASSERT(false);
2836 slcParams->num_ref_idx_l1_active_minus1 = maxNumRef1 - 1;
2837 }
2838
2839 // For HEVC VDEnc, L0 and L1 must contain the same (number of) elements. If not, the input slc param is not good for VDEnc.
2840 if (slcParams->num_ref_idx_l0_active_minus1 != slcParams->num_ref_idx_l1_active_minus1)
2841 {
2842 CODECHAL_ENCODE_ASSERT(false);
2843 slcParams->num_ref_idx_l1_active_minus1 = slcParams->num_ref_idx_l0_active_minus1;
2844 }
2845
2846 for (auto j = 0; j < CODEC_MAX_NUM_REF_FRAME_HEVC; j++)
2847 {
2848 if (slcParams->RefPicList[0][j].PicEntry != slcParams->RefPicList[1][j].PicEntry)
2849 {
2850 CODECHAL_ENCODE_ASSERT(false);
2851 eStatus = MOS_STATUS_INVALID_PARAMETER;
2852 return eStatus;
2853 }
2854 }
2855
2856 return eStatus;
2857 }
2858
InitializePicture(const EncoderParams & params)2859 MOS_STATUS CodechalVdencHevcState::InitializePicture(const EncoderParams& params)
2860 {
2861 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2862
2863 CODECHAL_ENCODE_FUNCTION_ENTER;
2864
2865 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::InitializePicture(params));
2866
2867 m_resVdencStatsBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencStats);
2868 m_resPakStatsBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakStats);
2869 m_resSliceCountBuffer = &m_sliceCountBuffer;
2870 m_resVdencModeTimerBuffer = &m_vdencModeTimerBuffer;
2871
2872 CODECHAL_DEBUG_TOOL(
2873 if (m_newSeq) {
2874 CODECHAL_ENCODE_CHK_STATUS_RETURN(PopulateConstParam());
2875 }
2876
2877 for (uint32_t i = 0; i < m_numSlices; i++) {
2878 CODECHAL_ENCODE_CHK_STATUS_RETURN(PopulateDdiParam(
2879 m_hevcSeqParams,
2880 m_hevcPicParams,
2881 &m_hevcSliceParams[i]));
2882 })
2883 return eStatus;
2884 }
2885
UserFeatureKeyReport()2886 MOS_STATUS CodechalVdencHevcState::UserFeatureKeyReport()
2887 {
2888 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2889
2890 CODECHAL_ENCODE_FUNCTION_ENTER;
2891
2892 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::UserFeatureKeyReport());
2893
2894 #if (_DEBUG || _RELEASE_INTERNAL)
2895 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_VDENC_IN_USE_ID, m_vdencEnabled);
2896 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ACQP_ENABLE_ID, m_hevcVdencAcqpEnabled);
2897 #endif
2898
2899 return eStatus;
2900 }
2901
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)2902 MOS_STATUS CodechalVdencHevcState::GetStatusReport(
2903 EncodeStatus *encodeStatus,
2904 EncodeStatusReport *encodeStatusReport)
2905 {
2906 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2907
2908 CODECHAL_ENCODE_FUNCTION_ENTER;
2909
2910 // common initilization
2911 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::GetStatusReport(encodeStatus, encodeStatusReport));
2912
2913 if (m_vdencHucUsed)
2914 {
2915 // Num of VDEn BRC pass is stored at PakMmio DW0
2916 MOS_LOCK_PARAMS lockFlags;
2917 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2918 lockFlags.WriteOnly = true;
2919
2920 MOS_RESOURCE *pakInfoBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
2921 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, pakInfoBuffer, &lockFlags);
2922 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
2923 uint32_t* insertion = (uint32_t*)(data + sizeof(uint32_t));
2924 *insertion = encodeStatus->ImageStatusCtrl.hcpTotalPass << 24;
2925 m_osInterface->pfnUnlockResource(m_osInterface, pakInfoBuffer);
2926 pakInfoBuffer = nullptr;
2927 }
2928
2929
2930 MOS_LOCK_PARAMS lockFlags;
2931 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2932 lockFlags.ReadOnly = 1;
2933
2934 uint32_t* sliceSize = nullptr;
2935 // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call
2936 if (encodeStatus->sliceReport.pSliceSize)
2937 {
2938 sliceSize = (uint32_t*)m_osInterface->pfnLockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize, &lockFlags);
2939 CODECHAL_ENCODE_CHK_NULL_RETURN(sliceSize);
2940
2941 encodeStatusReport->NumberSlices = encodeStatus->sliceReport.NumberSlices;
2942 encodeStatusReport->SizeOfSliceSizesBuffer = sizeof(uint16_t) * encodeStatus->sliceReport.NumberSlices;
2943 encodeStatusReport->SliceSizeOverflow = (encodeStatus->sliceReport.SliceSizeOverflow >> 16) & 1;
2944 encodeStatusReport->pSliceSizes = (uint16_t*)sliceSize;
2945
2946 uint16_t prevCumulativeSliceSize = 0;
2947 // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App
2948 for (auto sliceCount = 0; sliceCount < encodeStatus->sliceReport.NumberSlices; sliceCount++)
2949 {
2950 // PAK output the sliceSize at 16DW intervals.
2951 CODECHAL_ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]);
2952 uint32_t CurrAccumulatedSliceSize = sliceSize[sliceCount * 16];
2953
2954 //convert cummulative slice size to individual, first slice may have PPS/SPS,
2955 encodeStatusReport->pSliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize;
2956 prevCumulativeSliceSize += encodeStatusReport->pSliceSizes[sliceCount];
2957 }
2958 m_osInterface->pfnUnlockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize);
2959 }
2960
2961 encodeStatusReport->cqmHint = 0xFF;
2962 if (m_lookaheadPass && m_lookaheadUpdate)
2963 {
2964 encodeStatusReport->cqmHint = (uint8_t)(encodeStatus->lookaheadStatus & 0xFF);
2965 if (encodeStatusReport->cqmHint > 1)
2966 {
2967 // Currently only 0x00 and 0x01 are valid. Report invalid (0xFF) for other values.
2968 encodeStatusReport->cqmHint = 0xFF;
2969 }
2970 }
2971
2972 return eStatus;
2973 }
2974
AllocatePakResources()2975 MOS_STATUS CodechalVdencHevcState::AllocatePakResources()
2976 {
2977 CODECHAL_ENCODE_FUNCTION_ENTER;
2978
2979 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2980
2981 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::AllocatePakResources());
2982
2983 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
2984 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
2985 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
2986 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
2987 allocParamsForBufferLinear.Format = Format_Buffer;
2988
2989 // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP PipeBufAddr command
2990 uint32_t size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * m_maxTileNumber, CODECHAL_PAGE_SIZE); //Each tile has 8 cache size bytes of data, Align to page is HuC requirement
2991 allocParamsForBufferLinear.dwBytes = size;
2992 allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
2993
2994 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
2995 m_osInterface,
2996 &allocParamsForBufferLinear,
2997 &m_resFrameStatStreamOutBuffer),
2998 "Failed to create VDENC FrameStatStreamOutBuffer Buffer");
2999
3000 // PAK Statistics buffer
3001 size = MOS_ALIGN_CEIL(m_vdencBrcPakStatsBufferSize, CODECHAL_PAGE_SIZE);
3002 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3003 m_standard, size, 1, pakStats, "pakStats"));
3004
3005 // Slice Count buffer 1 DW = 4 Bytes
3006 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
3007 allocParamsForBufferLinear.pBufName = "Slice Count Buffer";
3008
3009 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3010 m_osInterface,
3011 &allocParamsForBufferLinear,
3012 &m_sliceCountBuffer),
3013 "Failed to create VDENC Slice Count Buffer");
3014
3015 // VDEncMode Timer buffer 1 DW = 4 Bytes
3016 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
3017 allocParamsForBufferLinear.pBufName = "VDEncMode Timer Buffer";
3018
3019 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3020 m_osInterface,
3021 &allocParamsForBufferLinear,
3022 &m_vdencModeTimerBuffer),
3023 "Failed to create VDEncMode Timer Buffer");
3024
3025 return eStatus;
3026 }
3027
FreePakResources()3028 MOS_STATUS CodechalVdencHevcState::FreePakResources()
3029 {
3030 CODECHAL_ENCODE_FUNCTION_ENTER;
3031
3032 m_osInterface->pfnFreeResource(m_osInterface, &m_resFrameStatStreamOutBuffer);
3033 m_osInterface->pfnFreeResource(m_osInterface, &m_sliceCountBuffer);
3034 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencModeTimerBuffer);
3035
3036 for (uint32_t i = 0; i < CODECHAL_ENCODE_STATUS_NUM; i++)
3037 {
3038 if (!Mos_ResourceIsNull(&m_resSliceReport[i]))
3039 {
3040 m_osInterface->pfnFreeResource(m_osInterface, &m_resSliceReport[i]);
3041 }
3042 }
3043
3044 return CodechalEncodeHevcBase::FreePakResources();
3045 }
3046
AllocateEncResources()3047 MOS_STATUS CodechalVdencHevcState::AllocateEncResources()
3048 {
3049 CODECHAL_ENCODE_FUNCTION_ENTER;
3050
3051 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3052
3053 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3054 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3055 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3056 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3057 allocParamsForBufferLinear.Format = Format_Buffer;
3058
3059 // PAK stream-out buffer
3060 allocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_PAK_STREAMOUT_SIZE;
3061 allocParamsForBufferLinear.pBufName = "Pak StreamOut Buffer";
3062 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3063 m_osInterface,
3064 &allocParamsForBufferLinear,
3065 &m_resStreamOutBuffer[0]),
3066 "Failed to allocate Pak Stream Out Buffer.");
3067
3068 // VDENC Intra Row Store Scratch buffer
3069 // 1 cacheline per MB
3070 uint32_t size = m_picWidthInMb * CODECHAL_CACHELINE_SIZE;
3071 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3072 m_standard, size, 1, vdencIntraRowStoreScratch, "vdencIntraRowStoreScratch"));
3073
3074 // VDENC Statistics buffer, only needed for BRC
3075 // The size is 19 CL for each tile, allocated with worst case, optimize later
3076 size = MOS_ALIGN_CEIL(m_vdencBrcStatsBufferSize * m_maxTileNumber, CODECHAL_PAGE_SIZE);
3077 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3078 m_standard, size, 1, vdencStats, "vdencStats"));
3079
3080 if (m_hucCmdInitializer)
3081 {
3082 m_hucCmdInitializer->CmdInitializerAllocateResources(m_hwInterface);
3083 }
3084
3085 return eStatus;
3086 }
3087
FreeEncResources()3088 MOS_STATUS CodechalVdencHevcState::FreeEncResources()
3089 {
3090 CODECHAL_ENCODE_FUNCTION_ENTER;
3091
3092 // PAK stream-out buffer de-allocated inside CodecHalEncodeReleaseResources()
3093
3094 if (m_hucCmdInitializer)
3095 {
3096 m_hucCmdInitializer->CmdInitializerFreeResources();
3097 }
3098 MOS_Delete(m_hucCmdInitializer);
3099
3100 return MOS_STATUS_SUCCESS;
3101 }
3102
AllocateBrcResources()3103 MOS_STATUS CodechalVdencHevcState::AllocateBrcResources()
3104 {
3105 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3106
3107 CODECHAL_ENCODE_FUNCTION_ENTER;
3108
3109 // initiate allocation paramters and lock flags
3110 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3111 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3112 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3113 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3114 allocParamsForBufferLinear.Format = Format_Buffer;
3115
3116 allocParamsForBufferLinear.dwBytes = m_hevcBrcPakStatisticsSize;
3117 allocParamsForBufferLinear.pBufName = "BRC PAK Statistics Buffer";
3118
3119 MOS_LOCK_PARAMS lockFlagsWriteOnly;
3120 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3121 lockFlagsWriteOnly.WriteOnly = true;
3122
3123 uint8_t *data = nullptr;
3124 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
3125 {
3126 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3127 m_osInterface,
3128 &allocParamsForBufferLinear,
3129 &m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]),
3130 "Failed to allocate BRC PAK Statistics Buffer.");
3131
3132 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint8_t *)m_osInterface->pfnLockResource(
3133 m_osInterface,
3134 &(m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]),
3135 &lockFlagsWriteOnly));
3136
3137 MOS_ZeroMemory(data, m_hevcBrcPakStatisticsSize);
3138 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]);
3139 }
3140
3141 // PAK Info buffer
3142 uint32_t size = MOS_ALIGN_CEIL(sizeof(CodechalVdencHevcPakInfo), CODECHAL_PAGE_SIZE);
3143 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3144 m_standard, size, 1, pakInfo, "pakInfo"));
3145
3146 // HuC FW Region 6: Data Buffer of Current Picture
3147 // Data (1024 bytes) for current
3148 // Data (1024 bytes) for ref0
3149 // Data (1024 bytes) for ref1
3150 // Data (1024 bytes) for ref2
3151 allocParamsForBufferLinear.dwBytes = CODECHAL_PAGE_SIZE * 4;
3152 allocParamsForBufferLinear.pBufName = "Data from Pictures Buffer for Weighted Prediction";
3153
3154 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3155 m_osInterface,
3156 &allocParamsForBufferLinear,
3157 &m_dataFromPicsBuffer),
3158 "Failed to create Data from Pictures Buffer for Weighted Prediction");
3159
3160 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
3161 {
3162 // Delta QP for ROI Buffer
3163 // 1 byte for each 32x32 block, maximum region size is 8192 bytes for 4K/2K resolution, currently the allocation size is fixed
3164 allocParamsForBufferLinear.dwBytes = m_deltaQpRoiBufferSize;
3165 allocParamsForBufferLinear.pBufName = "Delta QP for ROI Buffer";
3166
3167 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3168 m_osInterface,
3169 &allocParamsForBufferLinear,
3170 &m_vdencDeltaQpBuffer[k]),
3171 "Failed to create Delta QP for ROI Buffer");
3172
3173 // BRC update DMEM
3174 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3175 allocParamsForBufferLinear.pBufName = "VDENC BrcUpdate DmemBuffer";
3176
3177 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES_FOR_TILE_REPLAY; i++)
3178 {
3179 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3180 m_osInterface,
3181 &allocParamsForBufferLinear,
3182 &m_vdencBrcUpdateDmemBuffer[k][i]),
3183 "Failed to create VDENC BrcUpdate DmemBuffer");
3184
3185 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint8_t *)m_osInterface->pfnLockResource(
3186 m_osInterface,
3187 &m_vdencBrcUpdateDmemBuffer[k][i],
3188 &lockFlagsWriteOnly));
3189
3190 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
3191 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcUpdateDmemBuffer[k][i]);
3192 }
3193
3194 // BRC init/reset DMEM
3195 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencBrcInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3196 allocParamsForBufferLinear.pBufName = "VDENC BrcInit DmemBuffer";
3197
3198 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3199 m_osInterface,
3200 &allocParamsForBufferLinear,
3201 &m_vdencBrcInitDmemBuffer[k]),
3202 "Failed to create VDENC BrcInit DmemBuffer");
3203
3204 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint8_t *)m_osInterface->pfnLockResource(
3205 m_osInterface,
3206 &m_vdencBrcInitDmemBuffer[k],
3207 &lockFlagsWriteOnly));
3208
3209 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
3210 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcInitDmemBuffer[k]);
3211
3212 // Const Data buffer
3213 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencBrcConstDataBufferSize, CODECHAL_PAGE_SIZE);
3214 allocParamsForBufferLinear.pBufName = "VDENC BRC Const Data Buffer";
3215
3216 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3217 m_osInterface,
3218 &allocParamsForBufferLinear,
3219 &m_vdencBrcConstDataBuffer[k]),
3220 "Failed to create VDENC BRC Const Data Buffer");
3221
3222 // VDEnc read batch buffer (input for HuC FW)
3223 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
3224 allocParamsForBufferLinear.pBufName = "VDENC Read Batch Buffer";
3225
3226 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
3227 {
3228 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3229 m_osInterface,
3230 &allocParamsForBufferLinear,
3231 &m_vdencReadBatchBuffer[k][i]),
3232 "Failed to allocate VDENC Read Batch Buffer");
3233 }
3234
3235 // Lookahead Update DMEM
3236 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencLaUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3237 allocParamsForBufferLinear.pBufName = "VDENC Lookahead update Dmem Buffer";
3238
3239 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3240 m_osInterface,
3241 &allocParamsForBufferLinear,
3242 &m_vdencLaUpdateDmemBuffer[k]),
3243 "Failed to create VDENC Lookahead Update Dmem Buffer");
3244 }
3245
3246 for (auto j = 0; j < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; j++)
3247 {
3248 // VDENC uses second level batch buffer
3249 MOS_ZeroMemory(&m_vdenc2ndLevelBatchBuffer[j], sizeof(MHW_BATCH_BUFFER));
3250 m_vdenc2ndLevelBatchBuffer[j].bSecondLevel = true;
3251 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
3252 m_osInterface,
3253 &m_vdenc2ndLevelBatchBuffer[j],
3254 nullptr,
3255 m_hwInterface->m_vdenc2ndLevelBatchBufferSize));
3256 }
3257
3258 // BRC history buffer
3259 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_brcHistoryBufSize, CODECHAL_PAGE_SIZE);
3260 allocParamsForBufferLinear.pBufName = "VDENC BRC History Buffer";
3261
3262 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3263 m_osInterface,
3264 &allocParamsForBufferLinear,
3265 &m_vdencBrcHistoryBuffer),
3266 "Failed to create VDENC BRC History Buffer");
3267
3268 // Lookahead Init DMEM
3269 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencLaInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3270 allocParamsForBufferLinear.pBufName = "VDENC Lookahead Init DmemBuffer";
3271
3272 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3273 m_osInterface,
3274 &allocParamsForBufferLinear,
3275 &m_vdencLaInitDmemBuffer),
3276 "Failed to create VDENC Lookahead Init DmemBuffer");
3277
3278 // Lookahead history buffer
3279 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_LaHistoryBufSize, CODECHAL_PAGE_SIZE);
3280 allocParamsForBufferLinear.pBufName = "VDENC Lookahead History Buffer";
3281
3282 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3283 m_osInterface,
3284 &allocParamsForBufferLinear,
3285 &m_vdencLaHistoryBuffer),
3286 "Failed to create VDENC Lookahead History Buffer");
3287
3288 // Debug buffer
3289 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_brcDebugBufSize, CODECHAL_PAGE_SIZE);
3290 allocParamsForBufferLinear.pBufName = "VDENC BRC Debug Buffer";
3291
3292 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3293 m_osInterface,
3294 &allocParamsForBufferLinear,
3295 &m_vdencBrcDbgBuffer),
3296 "Failed to create VDENC BRC Debug Buffer");
3297
3298 // Output ROI Streamin Buffer
3299 // 16 DWORDs (VDENC_HEVC_VP9_STREAMIN_STATE) for each 32x32 block, maximum region size is 65536 bytes for 8K/8K resolution, currently the allocation size is fixed
3300 allocParamsForBufferLinear.dwBytes = m_roiStreamInBufferSize;
3301 allocParamsForBufferLinear.pBufName = "Output ROI Streamin Buffer";
3302
3303 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
3304 m_osInterface,
3305 &allocParamsForBufferLinear,
3306 &m_vdencOutputROIStreaminBuffer));
3307
3308 // Buffer to store VDEnc frame statistics for lookahead BRC
3309 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_brcLooaheadStatsBufferSize, CODECHAL_PAGE_SIZE);
3310 allocParamsForBufferLinear.pBufName = "VDENC Lookahead Statistics Buffer";
3311
3312 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3313 m_osInterface,
3314 &allocParamsForBufferLinear,
3315 &m_vdencLaStatsBuffer),
3316 "Failed to create VDENC Lookahead Statistics Buffer");
3317
3318 CodechalVdencHevcLaStats *lookaheadInfo = (CodechalVdencHevcLaStats *)m_osInterface->pfnLockResource(
3319 m_osInterface,
3320 &m_vdencLaStatsBuffer,
3321 &lockFlagsWriteOnly);
3322 CODECHAL_ENCODE_CHK_NULL_RETURN(lookaheadInfo);
3323 MOS_ZeroMemory(lookaheadInfo, allocParamsForBufferLinear.dwBytes);
3324 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaStatsBuffer);
3325
3326 return eStatus;
3327 }
3328
FreeBrcResources()3329 MOS_STATUS CodechalVdencHevcState::FreeBrcResources()
3330 {
3331 CODECHAL_ENCODE_FUNCTION_ENTER;
3332
3333 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
3334 {
3335 m_osInterface->pfnFreeResource(
3336 m_osInterface,
3337 &m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]);
3338 }
3339
3340 m_osInterface->pfnFreeResource(m_osInterface, &m_dataFromPicsBuffer);
3341
3342 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
3343 {
3344 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencDeltaQpBuffer[k]);
3345
3346 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
3347 {
3348 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencReadBatchBuffer[k][i]);
3349 }
3350
3351 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES_FOR_TILE_REPLAY; i++)
3352 {
3353 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcUpdateDmemBuffer[k][i]);
3354 }
3355
3356 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcInitDmemBuffer[k]);
3357 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcConstDataBuffer[k]);
3358 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaUpdateDmemBuffer[k]);
3359 }
3360
3361 for (auto j = 0; j < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; j++)
3362 {
3363 Mhw_FreeBb(m_osInterface, &m_vdenc2ndLevelBatchBuffer[j], nullptr);
3364 }
3365
3366 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcHistoryBuffer);
3367 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcDbgBuffer);
3368 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencOutputROIStreaminBuffer);
3369 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaStatsBuffer);
3370 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaInitDmemBuffer);
3371 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaHistoryBuffer);
3372
3373 return MOS_STATUS_SUCCESS;
3374 }
3375
Initialize(CodechalSetting * settings)3376 MOS_STATUS CodechalVdencHevcState::Initialize(CodechalSetting * settings)
3377 {
3378 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3379
3380 CODECHAL_ENCODE_FUNCTION_ENTER;
3381
3382 // common initilization
3383 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::Initialize(settings));
3384
3385 m_vdencBrcBuffers.uiCurrBrcPakStasIdxForRead = 0;
3386 //Reading buffer is with 2 frames late for BRC kernel uses the PAK statstic info of the frame before the previous frame
3387 m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite =
3388 (m_vdencBrcBuffers.uiCurrBrcPakStasIdxForRead + 2) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
3389
3390 uint32_t vdencPictureStatesSize = 0, vdencPicturePatchListSize = 0;
3391 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencStateCommandsDataSize(
3392 CODECHAL_ENCODE_MODE_HEVC,
3393 &vdencPictureStatesSize,
3394 &vdencPicturePatchListSize));
3395
3396 //the following code used to calculate ulMBCodeSize:
3397 //pakObjCmdStreamOutDataSize = 2*BYTES_PER_DWORD*(numOfLcu*NUM_PAK_DWS_PER_LCU + numOfLcu*maxNumOfCUperLCU*NUM_DWS_PER_CU); // Multiply by 2 for sideband
3398 //const uint32_t maxNumOfCUperLCU = (64/8)*(64/8);
3399 // NUM_PAK_DWS_PER_LCU 5
3400 // NUM_DWS_PER_CU 8
3401 uint32_t numOfLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) * MOS_ROUNDUP_DIVIDE(m_frameHeight, MAX_LCU_SIZE);
3402 m_mbCodeSize = MOS_ALIGN_CEIL(2 * sizeof(uint32_t) * numOfLCU * (5 + 64 * 8), CODECHAL_PAGE_SIZE);
3403
3404 m_defaultPictureStatesSize += vdencPictureStatesSize;
3405 m_defaultPicturePatchListSize += vdencPicturePatchListSize;
3406 m_extraPictureStatesSize += m_hwInterface->m_hucCommandBufferSize; // For slice size reporting, add the HuC copy commands
3407
3408 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
3409 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3410 MOS_UserFeature_ReadValue_ID(
3411 nullptr,
3412 __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
3413 &userFeatureData);
3414 m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
3415
3416 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3417 MOS_UserFeature_ReadValue_ID(
3418 nullptr,
3419 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_RDOQ_ENABLE_ID,
3420 &userFeatureData);
3421 m_hevcRdoqEnabled = userFeatureData.i32Data ? true : false;
3422
3423 // Multi-Pass BRC
3424 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3425 MOS_UserFeature_ReadValue_ID(
3426 nullptr,
3427 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_MULTIPASS_BRC_ENABLE_ID,
3428 &userFeatureData);
3429 m_multipassBrcSupported = (userFeatureData.i32Data) ? true : false;
3430
3431 if (m_codecFunction != CODECHAL_FUNCTION_PAK)
3432 {
3433 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3434 userFeatureData.i32Data = 1;
3435 userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
3436 MOS_UserFeature_ReadValue_ID(
3437 nullptr,
3438 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ME_ENABLE_ID,
3439 &userFeatureData);
3440 m_hmeSupported = (userFeatureData.i32Data) ? true : false;
3441
3442 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3443 userFeatureData.i32Data = 1;
3444 userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
3445 MOS_UserFeature_ReadValue_ID(
3446 nullptr,
3447 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_16xME_ENABLE_ID,
3448 &userFeatureData);
3449 m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
3450 }
3451
3452 if (m_codecFunction == CODECHAL_FUNCTION_ENC_VDENC_PAK)
3453 {
3454 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3455 MOS_UserFeature_ReadValue_ID(
3456 nullptr,
3457 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ACQP_ENABLE_ID,
3458 &userFeatureData);
3459 m_hevcVdencAcqpEnabled = userFeatureData.i32Data ? true : false;
3460
3461 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3462 MOS_UserFeature_ReadValue_ID(
3463 nullptr,
3464 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_VQI_ENABLE_ID,
3465 &userFeatureData);
3466 m_hevcVisualQualityImprovement = userFeatureData.i32Data ? true : false;
3467
3468 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3469 MOS_UserFeature_ReadValue_ID(
3470 nullptr,
3471 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ROUNDING_ENABLE_ID,
3472 &userFeatureData);
3473 m_hevcVdencRoundingEnabled = userFeatureData.i32Data ? true : false;
3474
3475 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3476 MOS_UserFeature_ReadValue_ID(
3477 nullptr,
3478 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_PAKOBJCMD_STREAMOUT_ENABLE_ID,
3479 &userFeatureData);
3480 m_vdencPakObjCmdStreamOutEnabled = userFeatureData.i32Data ? true : false;
3481
3482 #if (_DEBUG || _RELEASE_INTERNAL)
3483 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3484 MOS_UserFeature_ReadValue_ID(
3485 nullptr,
3486 __MEDIA_USER_FEATURE_VALUE_ENCODE_CQM_QP_THRESHOLD_ID,
3487 &userFeatureData);
3488 m_cqmQpThreshold = (uint8_t)userFeatureData.u32Data;
3489 #endif
3490 }
3491
3492 m_minScaledDimension = CODECHAL_ENCODE_MIN_SCALED_SURFACE_SIZE;
3493 m_minScaledDimensionInMb = (CODECHAL_ENCODE_MIN_SCALED_SURFACE_SIZE + 15) >> 4;
3494
3495 if (m_frameWidth < 128 || m_frameHeight < 128)
3496 {
3497 m_16xMeSupported = false;
3498 m_32xMeSupported = false;
3499 }
3500
3501 else if (m_frameWidth < 512 || m_frameHeight < 512)
3502 {
3503 m_16xMeSupported = true;
3504 m_32xMeSupported = false;
3505 }
3506
3507 else
3508 {
3509 m_16xMeSupported = true;
3510 m_32xMeSupported = true;
3511 }
3512
3513 if (m_16xMeSupported)
3514 {
3515 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3516 MOS_UserFeature_ReadValue_ID(
3517 nullptr,
3518 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_16xME_ENABLE_ID,
3519 &userFeatureData);
3520 m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
3521 }
3522
3523 if (m_32xMeSupported)
3524 {
3525 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3526 MOS_UserFeature_ReadValue_ID(
3527 nullptr,
3528 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_32xME_ENABLE_ID,
3529 &userFeatureData);
3530 m_32xMeSupported = (userFeatureData.i32Data) ? true : false;
3531 }
3532
3533 return eStatus;
3534 }
3535
CodechalVdencHevcState(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)3536 CodechalVdencHevcState::CodechalVdencHevcState(
3537 CodechalHwInterface* hwInterface,
3538 CodechalDebugInterface* debugInterface,
3539 PCODECHAL_STANDARD_INFO standardInfo)
3540 :CodechalEncodeHevcBase(hwInterface, debugInterface, standardInfo)
3541 {
3542 m_fieldScalingOutputInterleaved = false;
3543 m_2xMeSupported = false;
3544 m_combinedDownScaleAndDepthConversion = false;
3545 m_vdencBrcStatsBufferSize = m_brcStatsBufSize;
3546 m_vdencBrcPakStatsBufferSize = m_brcPakStatsBufSize;
3547 m_vdencLaInitDmemBufferSize = sizeof(CodechalVdencHevcLaDmem);
3548 m_vdencLaUpdateDmemBufferSize = sizeof(CodechalVdencHevcLaDmem);
3549
3550 MOS_ZeroMemory(&m_sliceCountBuffer, sizeof(m_sliceCountBuffer));
3551 MOS_ZeroMemory(&m_vdencModeTimerBuffer, sizeof(m_vdencModeTimerBuffer));
3552
3553 MOS_ZeroMemory(&m_vdencBrcBuffers, sizeof(m_vdencBrcBuffers));
3554 MOS_ZeroMemory(&m_dataFromPicsBuffer, sizeof(m_dataFromPicsBuffer));
3555 MOS_ZeroMemory(&m_vdencDeltaQpBuffer, sizeof(m_vdencDeltaQpBuffer));
3556 MOS_ZeroMemory(&m_vdencOutputROIStreaminBuffer, sizeof(m_vdencOutputROIStreaminBuffer));
3557 MOS_ZeroMemory(m_vdencBrcUpdateDmemBuffer, sizeof(m_vdencBrcUpdateDmemBuffer));
3558 MOS_ZeroMemory(&m_vdencBrcInitDmemBuffer, sizeof(m_vdencBrcInitDmemBuffer));
3559 MOS_ZeroMemory(&m_vdencBrcConstDataBuffer, sizeof(m_vdencBrcConstDataBuffer));
3560 MOS_ZeroMemory(&m_vdencBrcHistoryBuffer, sizeof(m_vdencBrcHistoryBuffer));
3561 MOS_ZeroMemory(&m_vdencReadBatchBuffer, sizeof(m_vdencReadBatchBuffer));
3562 MOS_ZeroMemory(&m_vdencReadBatchBuffer, sizeof(m_vdencGroup3BatchBuffer));
3563 MOS_ZeroMemory(&m_vdencBrcDbgBuffer, sizeof(m_vdencBrcDbgBuffer));
3564 MOS_ZeroMemory(&m_vdenc2ndLevelBatchBuffer, sizeof(m_vdenc2ndLevelBatchBuffer));
3565 MOS_ZeroMemory(m_resSliceReport, sizeof(m_resSliceReport));
3566 MOS_ZeroMemory(&m_vdencLaStatsBuffer, sizeof(m_vdencLaStatsBuffer));
3567
3568 }
3569
3570 #if USE_CODECHAL_DEBUG_TOOL
DumpHucBrcInit()3571 MOS_STATUS CodechalVdencHevcState::DumpHucBrcInit()
3572 {
3573 CODECHAL_ENCODE_FUNCTION_ENTER;
3574 int32_t currentPass = GetCurrentPass();
3575 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3576 &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx],
3577 m_vdencBrcInitDmemBufferSize,
3578 currentPass,
3579 hucRegionDumpInit));
3580
3581 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3582 &m_vdencBrcHistoryBuffer,
3583 0,
3584 CODECHAL_VDENC_HEVC_BRC_HISTORY_BUF_SIZE,
3585 0,
3586 "_History",
3587 true,
3588 currentPass,
3589 hucRegionDumpInit));
3590 return MOS_STATUS_SUCCESS;
3591 }
3592
DumpHucBrcUpdate(bool isInput)3593 MOS_STATUS CodechalVdencHevcState::DumpHucBrcUpdate(bool isInput)
3594 {
3595 CODECHAL_ENCODE_FUNCTION_ENTER;
3596 int32_t currentPass = GetCurrentPass();
3597 if (isInput)
3598 {
3599 //Dump HucBrcUpdate input buffers
3600 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3601 &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass],
3602 m_vdencBrcUpdateDmemBufferSize,
3603 currentPass,
3604 hucRegionDumpUpdate));
3605
3606 // Region 1 - VDENC Statistics Buffer dump
3607 auto vdencStatusBuffer = m_virtualAddrParams.regionParams[1].presRegion;
3608 auto vdencStatusOffset = m_virtualAddrParams.regionParams[1].dwOffset;
3609 if (vdencStatusBuffer)
3610 {
3611 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3612 vdencStatusBuffer,
3613 vdencStatusOffset,
3614 m_vdencBrcStatsBufferSize,
3615 1,
3616 "_VdencStats",
3617 true,
3618 currentPass,
3619 hucRegionDumpUpdate));
3620 }
3621
3622 // Region 2 - PAK Statistics Buffer dump
3623 auto frameStatStreamOutBuffer = m_virtualAddrParams.regionParams[2].presRegion;
3624 auto frameStatStreamOutOffset = m_virtualAddrParams.regionParams[2].dwOffset;
3625 if (frameStatStreamOutBuffer)
3626 {
3627 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3628 frameStatStreamOutBuffer,
3629 frameStatStreamOutOffset,
3630 m_vdencBrcPakStatsBufferSize,
3631 2,
3632 "_PakStats",
3633 true,
3634 currentPass,
3635 hucRegionDumpUpdate));
3636 }
3637
3638 // Region 3 - Input SLB Buffer
3639 auto vdencReadBatchBuffer = m_virtualAddrParams.regionParams[3].presRegion;
3640 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3641 vdencReadBatchBuffer,
3642 0,
3643 m_hwInterface->m_vdencReadBatchBufferSize,
3644 3,
3645 "_Slb",
3646 true,
3647 currentPass,
3648 hucRegionDumpUpdate));
3649
3650 // Region 4 - Constant Data Buffer dump
3651 auto vdencBrcConstDataBuffer = m_virtualAddrParams.regionParams[4].presRegion;
3652 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3653 vdencBrcConstDataBuffer,
3654 0,
3655 m_vdencBrcConstDataBufferSize,
3656 4,
3657 "_ConstData",
3658 true,
3659 currentPass,
3660 hucRegionDumpUpdate));
3661
3662 // Region 7 - Slice Stat Streamout (Input)
3663 auto lucBasedAddressBuffer = m_virtualAddrParams.regionParams[7].presRegion;
3664 auto lucBasedAddressOffset = m_virtualAddrParams.regionParams[7].dwOffset;
3665 if (lucBasedAddressBuffer)
3666 {
3667 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3668 lucBasedAddressBuffer,
3669 lucBasedAddressOffset,
3670 CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6 * CODECHAL_CACHELINE_SIZE,
3671 7,
3672 "_SliceStat",
3673 true,
3674 currentPass,
3675 hucRegionDumpUpdate));
3676 }
3677
3678 // Region 8 - PAK MMIO Buffer dump
3679 auto pakInfoBufffer = m_virtualAddrParams.regionParams[8].presRegion;
3680 if (pakInfoBufffer)
3681 {
3682 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3683 pakInfoBufffer,
3684 0,
3685 sizeof(CodechalVdencHevcPakInfo),
3686 8,
3687 "_PakMmio",
3688 true,
3689 currentPass,
3690 hucRegionDumpUpdate));
3691 }
3692
3693 // Region 9 - Streamin Buffer for ROI (Input)
3694 auto streamInBufferSize = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32) * (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32) * CODECHAL_CACHELINE_SIZE;
3695 auto stramInBuffer = m_virtualAddrParams.regionParams[9].presRegion;
3696 if (stramInBuffer)
3697 {
3698 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3699 stramInBuffer,
3700 0,
3701 streamInBufferSize,
3702 9,
3703 "_RoiStreamin",
3704 true,
3705 currentPass,
3706 hucRegionDumpUpdate));
3707 }
3708
3709 // Region 10 - Delta QP for ROI Buffer
3710 auto vdencDeltaQpBuffer = m_virtualAddrParams.regionParams[10].presRegion;
3711 if (vdencDeltaQpBuffer)
3712 {
3713 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3714 vdencDeltaQpBuffer,
3715 0,
3716 m_deltaQpRoiBufferSize,
3717 10,
3718 "_DeltaQp",
3719 true,
3720 currentPass,
3721 hucRegionDumpUpdate));
3722 }
3723
3724 // Region 12 - Input SLB Buffer
3725 auto slbBuffer = m_virtualAddrParams.regionParams[12].presRegion;
3726 if (slbBuffer)
3727 {
3728 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3729 slbBuffer,
3730 0,
3731 m_hwInterface->m_vdencGroup3BatchBufferSize,
3732 12,
3733 "_Slb",
3734 true,
3735 currentPass,
3736 hucRegionDumpUpdate));
3737 }
3738 }
3739 else
3740 {
3741 // Region 5 - Output SLB Buffer
3742 auto vdenc2ndLevelBatchBuffer = m_virtualAddrParams.regionParams[5].presRegion;
3743 if (vdenc2ndLevelBatchBuffer)
3744 {
3745 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3746 vdenc2ndLevelBatchBuffer,
3747 0,
3748 m_hwInterface->m_vdenc2ndLevelBatchBufferSize,
3749 5,
3750 "_Slb",
3751 false,
3752 currentPass,
3753 hucRegionDumpUpdate));
3754 }
3755
3756 // Region 11 - Output ROI Streamin Buffer
3757 auto vdencOutputROIStreaminBuffer = m_virtualAddrParams.regionParams[11].presRegion;
3758 if (vdencOutputROIStreaminBuffer)
3759 {
3760 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3761 vdencOutputROIStreaminBuffer,
3762 0,
3763 m_roiStreamInBufferSize,
3764 11,
3765 "_RoiStreamin",
3766 false,
3767 currentPass,
3768 hucRegionDumpUpdate));
3769 }
3770 }
3771
3772 // Region 0 - History Buffer dump (Input/Output)
3773 auto vdencBrcHistoryBuffer = m_virtualAddrParams.regionParams[0].presRegion;
3774 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3775 vdencBrcHistoryBuffer,
3776 0,
3777 m_brcHistoryBufSize,
3778 0,
3779 "_History",
3780 isInput,
3781 currentPass,
3782 hucRegionDumpUpdate));
3783
3784 // Region 6 - Data from Pictures for Weighted Prediction (Input/Output)
3785 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3786 &m_dataFromPicsBuffer,
3787 0,
3788 CODECHAL_PAGE_SIZE * 4,
3789 6,
3790 "_PicsData",
3791 isInput,
3792 currentPass,
3793 hucRegionDumpUpdate));
3794
3795 // Region 15 - Debug Output
3796 auto debugBuffer = m_virtualAddrParams.regionParams[15].presRegion;
3797 if (debugBuffer)
3798 {
3799 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
3800 debugBuffer,
3801 0,
3802 0x1000,
3803 15,
3804 "_Debug",
3805 isInput,
3806 currentPass,
3807 hucRegionDumpUpdate));
3808 }
3809 return MOS_STATUS_SUCCESS;
3810 }
3811
DumpVdencOutputs()3812 MOS_STATUS CodechalVdencHevcState::DumpVdencOutputs()
3813 {
3814 CODECHAL_ENCODE_FUNCTION_ENTER;
3815
3816 // Dump VDENC Stats Buffer
3817 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3818 m_resVdencStatsBuffer,
3819 CodechalDbgAttr::attrVdencOutput,
3820 "_Stats",
3821 m_vdencBrcStatsBufferSize,
3822 0,
3823 CODECHAL_NUM_MEDIA_STATES));
3824
3825 // Dump PAK Stats Buffer
3826 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3827 m_resPakStatsBuffer,
3828 CodechalDbgAttr::attrVdencOutput,
3829 "_PakStats",
3830 m_vdencBrcPakStatsBufferSize,
3831 0,
3832 CODECHAL_NUM_MEDIA_STATES));
3833
3834 // Dump PAK MMIO Buffer
3835 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3836 &m_resPakMmioBuffer,
3837 CodechalDbgKernel::kernelBrcUpdate,
3838 m_currPass ? "_MmioReg_Output_Pass1" : "_MmioReg_Output_Pass0",
3839 sizeof(VdencBrcPakMmio),
3840 0,
3841 CODECHAL_NUM_MEDIA_STATES));
3842
3843 // Dump PAK Obj Cmd Buffer
3844 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3845 m_resVdencPakObjCmdStreamOutBuffer,
3846 CodechalDbgAttr::attrVdencOutput,
3847 "_MbCode",
3848 m_mvOffset,
3849 0,
3850 CODECHAL_NUM_MEDIA_STATES));
3851
3852 // Dump CU Record Cmd Buffer
3853 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3854 m_resVdencPakObjCmdStreamOutBuffer,
3855 CodechalDbgAttr::attrVdencOutput,
3856 "_CURecord",
3857 m_mbCodeSize - m_mvOffset,
3858 m_mvOffset,
3859 CODECHAL_NUM_MEDIA_STATES));
3860
3861 // Slice Size Conformance
3862 if (m_hevcSeqParams->SliceSizeControl)
3863 {
3864 uint32_t dwSize = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6*CODECHAL_CACHELINE_SIZE;
3865 if (!m_hevcPicParams->tiles_enabled_flag)
3866 {
3867 // Slice Size StreamOut Surface
3868 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3869 &m_resLcuBaseAddressBuffer,
3870 CodechalDbgAttr::attrVdencOutput,
3871 "_SliceSize",
3872 dwSize,
3873 0,
3874 CODECHAL_NUM_MEDIA_STATES));
3875 }
3876
3877 dwSize = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
3878 // Slice Count buffer 1 DW = 4 Bytes
3879 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3880 m_resSliceCountBuffer,
3881 CodechalDbgAttr::attrVdencOutput,
3882 "_SliceCount",
3883 dwSize,
3884 0,
3885 CODECHAL_NUM_MEDIA_STATES));
3886
3887 // VDEncMode Timer buffer 1 DW = 4 Bytes
3888 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3889 m_resVdencModeTimerBuffer,
3890 CodechalDbgAttr::attrVdencOutput,
3891 "_ModeTimer",
3892 dwSize,
3893 0,
3894 CODECHAL_NUM_MEDIA_STATES));
3895 }
3896
3897 return MOS_STATUS_SUCCESS;
3898 }
3899
DumpSeqParFile()3900 MOS_STATUS CodechalVdencHevcState::DumpSeqParFile()
3901 {
3902 CODECHAL_DEBUG_FUNCTION_ENTER;
3903
3904 CODECHAL_DEBUG_CHK_NULL(m_debugInterface);
3905
3906 if (!m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar))
3907 {
3908 return MOS_STATUS_SUCCESS;
3909 }
3910
3911 std::ostringstream oss;
3912 oss.setf(std::ios::showbase | std::ios::uppercase);
3913
3914 oss << "ISliceQP = " << std::dec << m_hevcPar->ISliceQP << std::endl;
3915 oss << "PSliceQP = " << std::dec << m_hevcPar->PSliceQP << std::endl;
3916 oss << "BSliceQP = " << std::dec << m_hevcPar->BSliceQP << std::endl;
3917 oss << "StartFrameNum = " << std::dec << m_hevcPar->StartFrameNum << std::endl;
3918 oss << "ProfileIDC = " << std::dec << m_hevcPar->ProfileIDC << std::endl;
3919 oss << "LevelIDC = " << std::dec << m_hevcPar->LevelIDC << std::endl;
3920 oss << "NumP = " << std::dec << m_hevcPar->NumP << std::endl;
3921 oss << "NumB = " << std::dec << m_hevcPar->NumB << std::endl;
3922 oss << "NumSlices = " << std::dec << m_hevcPar->NumSlices << std::endl;
3923 oss << "SliceStartLCU = " << std::dec << m_hevcPar->SliceStartLCU << std::endl;
3924 oss << "Log2MinCUSize = " << std::dec << m_hevcPar->Log2MinCUSize << std::endl;
3925 oss << "Log2MaxCUSize = " << std::dec << m_hevcPar->Log2MaxCUSize << std::endl;
3926 oss << "Log2MinTUSize = " << std::dec << m_hevcPar->Log2MinTUSize << std::endl;
3927 oss << "Log2MaxTUSize = " << std::dec << m_hevcPar->Log2MaxTUSize << std::endl;
3928 oss << "InputBitDepthLuma = " << std::dec << m_hevcPar->InputBitDepthLuma << std::endl;
3929 oss << "InputBitDepthChroma = " << std::dec << m_hevcPar->InputBitDepthChroma << std::endl;
3930 oss << "OutputBitDepthLuma = " << std::dec << m_hevcPar->OutputBitDepthLuma << std::endl;
3931 oss << "OutputBitDepthChroma = " << std::dec << m_hevcPar->OutputBitDepthChroma << std::endl;
3932 oss << "InternalBitDepthLuma = " << std::dec << m_hevcPar->InternalBitDepthLuma << std::endl;
3933 oss << "InternalBitDepthChroma = " << std::dec << m_hevcPar->InternalBitDepthChroma << std::endl;
3934 oss << "Log2TUMaxDepthInter = " << std::dec << m_hevcPar->Log2TUMaxDepthInter << std::endl;
3935 oss << "Log2TUMaxDepthIntra = " << std::dec << m_hevcPar->Log2TUMaxDepthIntra << std::endl;
3936 oss << "Log2ParallelMergeLevel = " << std::dec << m_hevcPar->Log2ParallelMergeLevel << std::endl;
3937 oss << "EnableTransquantBypass = " << std::dec << m_hevcPar->TransquantBypassEnableFlag << std::endl;
3938 oss << "EnableTransformSkip = " << std::dec << m_hevcPar->TransformSkipEnabledFlag << std::endl;
3939 oss << "TSDecisionEnabledFlag = " << std::dec << m_hevcPar->TSDecisionEnabledFlag << std::endl;
3940 oss << "EnableTemporalMvp = " << std::dec << m_hevcPar->TemporalMvpEnableFlag << std::endl;
3941 oss << "CollocatedFromL0Flag = " << std::dec << m_hevcPar->CollocatedFromL0Flag << std::endl;
3942 oss << "CollocatedRefIdx = " << std::dec << m_hevcPar->CollocatedRefIdx << std::endl;
3943 oss << "MvdL1ZeroFlag = " << std::dec << m_hevcPar->MvdL1ZeroFlag << std::endl;
3944 oss << "AmpEnabledFlag = " << std::dec << m_hevcPar->AmpEnabledFlag << std::endl;
3945 oss << "CuQpDeltaEnabledFlag = " << std::dec << m_hevcPar->CuQpDeltaEnabledFlag << std::endl;
3946 oss << "DiffCuQpDeltaDepth = " << std::dec << m_hevcPar->DiffCuQpDeltaDepth << std::endl;
3947 oss << "ChromaCbQpOffset = " << std::dec << m_hevcPar->ChromaCbQpOffset << std::endl;
3948 oss << "ChromaCrQpOffset = " << std::dec << m_hevcPar->ChromaCrQpOffset << std::endl;
3949 oss << "DeblockingFilterTc = " << std::dec << m_hevcPar->DeblockingTc << std::endl;
3950 oss << "DeblockingFilterBeta = " << std::dec << m_hevcPar->DeblockingTc << std::endl;
3951 oss << "DeblockingIDC = " << std::dec << m_hevcPar->DeblockingIDC << std::endl;
3952 oss << "LoopFilterAcrossSlicesEnabledFlag = " << std::dec << m_hevcPar->LoopFilterAcrossSlicesEnabledFlag << std::endl;
3953 oss << "SignDataHidingFlag = " << std::dec << m_hevcPar->SignDataHidingFlag << std::endl;
3954 oss << "CabacInitFlag = " << std::dec << m_hevcPar->CabacInitFlag << std::endl;
3955 oss << "ConstrainedIntraPred = " << std::dec << m_hevcPar->ConstrainedIntraPred << std::endl;
3956 oss << "LowDelay = " << std::dec << m_hevcPar->LowDelay << std::endl;
3957 oss << "EnableBAsRefs = " << std::dec << m_hevcPar->EnableBAsRefs << std::endl;
3958 oss << "BitRate = " << std::dec << m_hevcPar->BitRate << std::endl;
3959 oss << "MaxBitRate = " << std::dec << m_hevcPar->MaxBitRate << std::endl;
3960 oss << "VbvSzInBit = " << std::dec << m_hevcPar->VbvSzInBit << std::endl;
3961 oss << "InitVbvFullnessInBit = " << std::dec << m_hevcPar->InitVbvFullnessInBit << std::endl;
3962 oss << "CuRC = " << std::dec << m_hevcPar->CuRC << std::endl;
3963 oss << "EnableMultipass = " << std::dec << m_hevcPar->EnableMultipass << std::endl;
3964 oss << "MaxNumPakPassesI = " << std::dec << m_hevcPar->MaxNumPakPassesI << std::endl;
3965 oss << "MaxNumPakPassesPB = " << std::dec << m_hevcPar->MaxNumPakPassesPB << std::endl;
3966 oss << "UserMaxIFrame = " << std::dec << m_hevcPar->UserMaxIFrame << std::endl;
3967 oss << "UserMaxPBFrame = " << std::dec << m_hevcPar->UserMaxPBFrame << std::endl;
3968 oss << "FrameRateM = " << std::dec << m_hevcPar->FrameRateM << std::endl;
3969 oss << "FrameRateD = " << std::dec << m_hevcPar->FrameRateD << std::endl;
3970 oss << "IntraRefreshEnable = " << std::dec << m_hevcPar->IntraRefreshEnable << std::endl;
3971 oss << "IntraRefreshMode = " << std::dec << m_hevcPar->IntraRefreshMode << std::endl;
3972 oss << "IntraRefreshSizeIn32x32 = " << std::dec << m_hevcPar->IntraRefreshSizeIn32x32 << std::endl;
3973 oss << "IntraRefreshDeltaQP = " << std::dec << m_hevcPar->IntraRefreshDeltaQP << std::endl;
3974 oss << "HMECoarseRefPic = " << std::dec << m_hevcPar->HMECoarseRefPic << std::endl;
3975 oss << "FadeDetectionEnable = " << std::dec << m_hevcPar->FadeDetectionEnable << std::endl;
3976 oss << "WeightedPred = " << std::dec << m_hevcPar->WeightedPred << std::endl;
3977 oss << "WeightedBiPred = " << std::dec << m_hevcPar->WeightedBiPred << std::endl;
3978 oss << "RefListModforWeightPred = " << std::dec << m_hevcPar->RefListModforWeightPred << std::endl;
3979 oss << "EnableStatistics = " << std::dec << m_hevcPar->EnableStatistics << std::endl;
3980 oss << "OutputQualityType = " << std::dec << m_hevcPar->OutputQualityType << std::endl;
3981 oss << "SliceSizeCtrl = " << std::dec << m_hevcPar->SliceSizeCtrl << std::endl;
3982 oss << "SliceSizeThreshold = " << std::dec << m_hevcPar->SliceSizeThreshold << std::endl;
3983 oss << "MaxSliceSize = " << std::dec << m_hevcPar->MaxSliceSize << std::endl;
3984 oss << "VDEncMode = " << std::dec << m_hevcPar->VDEncMode << std::endl;
3985 oss << "DisableIntraLuma4x4Tu = " << std::dec << m_hevcPar->DisableIntraLuma4x4Tu << std::endl;
3986 oss << "HMERef1Disable = " << std::dec << m_hevcPar->HMERef1Disable << std::endl;
3987 oss << "NumBetaPreditors = " << std::dec << m_hevcPar->NumBetaPreditors << std::endl;
3988 oss << "MaxNumImePredictor = " << std::dec << m_hevcPar->MaxNumImePredictor << std::endl;
3989 oss << "NumMergeCandidateCu8x8 = " << std::dec << m_hevcPar->NumMergeCandidateCu8x8 << std::endl;
3990 oss << "NumMergeCandidateCu16x16 = " << std::dec << m_hevcPar->NumMergeCandidateCu16x16 << std::endl;
3991 oss << "NumMergeCandidateCu32x32 = " << std::dec << m_hevcPar->NumMergeCandidateCu32x32 << std::endl;
3992 oss << "NumMergeCandidateCu64x64 = " << std::dec << m_hevcPar->NumMergeCandidateCu64x64 << std::endl;
3993 oss << "BRCMethod = " << std::dec << m_hevcPar->BRCMethod << std::endl;
3994 oss << "BRCType = " << std::dec << m_hevcPar->BRCType << std::endl;
3995 oss << "SAOEnabledFlag = " << std::dec << m_hevcPar->SAOEnabledFlag << std::endl;
3996 oss << "IntraFrameRDOQEnabledFlag = " << std::dec << m_hevcPar->IntraFrameRDOQEnabledFlag << std::endl;
3997 oss << "InterFrameRDOQEnabledFlag = " << std::dec << m_hevcPar->InterFrameRDOQEnabledFlag << std::endl;
3998 oss << "StreamInEnable = " << std::dec << m_hevcPar->StreamInEn << std::endl;
3999 oss << "StreamInMvPredictorRef = " << std::dec << m_hevcPar->StreamInMvPredictorRef << std::endl;
4000 oss << "StaticFrameZMVPercent = " << std::dec << m_hevcPar->StaticFrameZMVPercent << std::endl;
4001 oss << "HMEStreamInRefCost = " << std::dec << m_hevcPar->HMEStreamInRefCost << std::endl;
4002 oss << "IntraPeriod = " << std::dec << m_hevcPar->IntraPeriod << std::endl;
4003 oss << "BGOPSize = " << std::dec << m_hevcPar->BGOPSize << std::endl;
4004 oss << "MaxRefIdxL0 = " << std::dec << m_hevcPar->MaxRefIdxL0 << std::endl;
4005 oss << "MaxRefIdxL1 = " << std::dec << m_hevcPar->MaxRefIdxL1 << std::endl;
4006
4007 const char *fileName = m_debugInterface->CreateFileName(
4008 "EncodeSequence",
4009 "EncodePar",
4010 CodechalDbgExtType::par);
4011
4012 std::ofstream ofs(fileName, std::ios::app);
4013 ofs << oss.str();
4014 ofs.close();
4015
4016 return MOS_STATUS_SUCCESS;
4017 }
4018
PopulateDdiParam(PCODEC_HEVC_ENCODE_SEQUENCE_PARAMS hevcSeqParams,PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams,PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)4019 MOS_STATUS CodechalVdencHevcState::PopulateDdiParam(
4020 PCODEC_HEVC_ENCODE_SEQUENCE_PARAMS hevcSeqParams,
4021 PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams,
4022 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)
4023 {
4024 if (!m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar))
4025 {
4026 return MOS_STATUS_SUCCESS;
4027 }
4028
4029 CODECHAL_ENCODE_CHK_STATUS_RETURN(
4030 CodechalEncodeHevcBase::PopulateDdiParam(
4031 hevcSeqParams,
4032 hevcPicParams,
4033 hevcSlcParams));
4034
4035 if (m_hevcVdencAcqpEnabled)
4036 {
4037 m_hevcPar->BRCMethod = 2;
4038 m_hevcPar->BRCType = 0;
4039 m_hevcPar->DisableCuQpAdj = 1;
4040 }
4041
4042 return MOS_STATUS_SUCCESS;
4043 }
4044
ModifyEncodedFrameSizeWithFakeHeaderSize(PMOS_COMMAND_BUFFER cmdBuffer,uint32_t fakeHeaderSizeInByte,PMOS_RESOURCE resBrcUpdateCurbe,uint32_t targetSizePos,PMOS_RESOURCE resPakStat,uint32_t slcHrdSizePos)4045 MOS_STATUS CodechalVdencHevcState::ModifyEncodedFrameSizeWithFakeHeaderSize(
4046 PMOS_COMMAND_BUFFER cmdBuffer,
4047 uint32_t fakeHeaderSizeInByte,
4048 PMOS_RESOURCE resBrcUpdateCurbe,
4049 uint32_t targetSizePos,
4050 PMOS_RESOURCE resPakStat,
4051 uint32_t slcHrdSizePos
4052 )
4053 {
4054 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4055
4056 CODECHAL_ENCODE_FUNCTION_ENTER;
4057
4058 //calculate slice headers size
4059 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
4060 CODECHAL_ENCODE_CHK_NULL_RETURN(slcData);
4061 uint32_t totalSliceHeaderSize = 0;
4062 for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
4063 {
4064 totalSliceHeaderSize += (slcData->BitSize + 7) >> 3;
4065 slcData++;
4066 }
4067
4068 uint32_t firstHdrSz = 0;
4069 for (uint32_t i = 0; i < m_encodeParams.uiNumNalUnits; i++)
4070 {
4071 firstHdrSz += m_encodeParams.ppNALUnitParams[i]->uiSize;
4072 }
4073
4074 totalSliceHeaderSize += firstHdrSz;
4075
4076 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddBufferWithIMMValue(
4077 cmdBuffer,
4078 resBrcUpdateCurbe,
4079 targetSizePos,
4080 fakeHeaderSizeInByte - totalSliceHeaderSize,
4081 true));
4082
4083 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddBufferWithIMMValue(
4084 cmdBuffer,
4085 resPakStat,
4086 slcHrdSizePos,
4087 fakeHeaderSizeInByte * 8,
4088 true));
4089
4090 return eStatus;
4091 }
4092 #endif
4093