1 // Copyright (c) 2018-2019 Intel Corporation
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in all
11 // copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 // SOFTWARE.
20 
21 #include <climits>
22 #include "mfx_vp9_encode_hw_utils.h"
23 #include "mfx_vp9_encode_hw_par.h"
24 #include "mfx_vp9_encode_hw_ddi.h"
25 
26 namespace MfxHwVP9Encode
27 {
28 
GetGuid(VP9MfxVideoParam const & par)29     GUID GetGuid(VP9MfxVideoParam const & par)
30     {
31         // Currently we don't support LP=OFF
32         // so it is mapped to GUID_NULL
33         // it will cause Query/Init fails with Unsupported
34         // ever when driver support LP=OFF
35         switch (par.mfx.CodecProfile)
36         {
37         case MFX_PROFILE_VP9_0:
38             return (par.mfx.LowPower != MFX_CODINGOPTION_OFF) ?
39                 DXVA2_Intel_LowpowerEncode_VP9_Profile0 : GUID_NULL; //DXVA2_Intel_Encode_VP9_Profile0;
40             break;
41         case MFX_PROFILE_VP9_1:
42             return (par.mfx.LowPower != MFX_CODINGOPTION_OFF) ?
43                 DXVA2_Intel_LowpowerEncode_VP9_Profile1 : GUID_NULL; //DXVA2_Intel_Encode_VP9_Profile1;
44             break;
45         case MFX_PROFILE_VP9_2:
46             return (par.mfx.LowPower != MFX_CODINGOPTION_OFF) ?
47                 DXVA2_Intel_LowpowerEncode_VP9_10bit_Profile2 : GUID_NULL; // DXVA2_Intel_Encode_VP9_10bit_Profile2;
48             break;
49         case MFX_PROFILE_VP9_3:
50             return (par.mfx.LowPower != MFX_CODINGOPTION_OFF) ?
51                 DXVA2_Intel_LowpowerEncode_VP9_10bit_Profile3 : GUID_NULL; // DXVA2_Intel_Encode_VP9_10bit_Profile3;
52             break;
53         default:
54             // profile cannot be identified. Use Profile0 so far
55             return (par.mfx.LowPower != MFX_CODINGOPTION_OFF) ?
56                 DXVA2_Intel_LowpowerEncode_VP9_Profile0 : GUID_NULL; // DXVA2_Intel_Encode_VP9_Profile0;
57         }
58     }
59 
QueryCaps(VideoCORE * pCore,ENCODE_CAPS_VP9 & caps,GUID guid,VP9MfxVideoParam const & par)60     mfxStatus QueryCaps(VideoCORE* pCore, ENCODE_CAPS_VP9 & caps, GUID guid, VP9MfxVideoParam const & par)
61     {
62         std::unique_ptr<DriverEncoder> ddi;
63 
64         ddi.reset(CreatePlatformVp9Encoder(pCore));
65         MFX_CHECK(ddi.get() != NULL, MFX_WRN_PARTIAL_ACCELERATION);
66 
67         mfxStatus sts = ddi.get()->CreateAuxilliaryDevice(pCore, guid, par);
68         MFX_CHECK_STS(sts);
69 
70         sts = ddi.get()->QueryEncodeCaps(caps);
71         MFX_CHECK_STS(sts);
72 
73         return MFX_ERR_NONE;
74     }
75 
76 // uncompressed headencompressedr packing
77 
78 #define VP9_FRAME_MARKER 0x2
79 
80 #define VP9_SYNC_CODE_0 0x49
81 #define VP9_SYNC_CODE_1 0x83
82 #define VP9_SYNC_CODE_2 0x42
83 
84 #define FRAME_CONTEXTS_LOG2 2
85 #define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2)
86 
87 #define QINDEX_BITS 8
88 
89 #define MAX_TILE_WIDTH_B64 64
90 #define MIN_TILE_WIDTH_B64 4
91 
92 #define MI_SIZE_LOG2 3
93 #define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2)  // 64 = 2^6
94 #define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2)  // mi-units per max block
95 
WriteBit(BitBuffer & buf,mfxU8 bit)96     void WriteBit(BitBuffer &buf, mfxU8 bit)
97     {
98         const mfxU16 byteOffset = buf.bitOffset / CHAR_BIT;
99         const mfxU8 bitsLeftInByte = CHAR_BIT - 1 - buf.bitOffset % CHAR_BIT;
100         if (bitsLeftInByte == CHAR_BIT - 1)
101         {
102             buf.pBuffer[byteOffset] = mfxU8(bit << bitsLeftInByte);
103         }
104         else
105         {
106             buf.pBuffer[byteOffset] &= ~(1 << bitsLeftInByte);
107             buf.pBuffer[byteOffset] |= bit << bitsLeftInByte;
108         }
109         buf.bitOffset = buf.bitOffset + 1;
110     };
111 
WriteLiteral(BitBuffer & buf,mfxU64 data,mfxU64 bits)112     void WriteLiteral(BitBuffer &buf, mfxU64 data, mfxU64 bits)
113     {
114         for (mfxI64 bit = bits - 1; bit >= 0; bit--)
115         {
116             WriteBit(buf, (data >> bit) & 1);
117         }
118     }
119 
WriteColorConfig(BitBuffer & buf,VP9SeqLevelParam const & seqPar)120     void WriteColorConfig(BitBuffer &buf, VP9SeqLevelParam const &seqPar)
121     {
122         if (seqPar.profile >= PROFILE_2)
123         {
124             assert(seqPar.bitDepth > BITDEPTH_8);
125             WriteBit(buf, seqPar.bitDepth == BITDEPTH_10 ? 0 : 1);
126         }
127         WriteLiteral(buf, seqPar.colorSpace, 3);
128         if (seqPar.colorSpace != SRGB)
129         {
130             WriteBit(buf, seqPar.colorRange);
131             if (seqPar.profile == PROFILE_1 || seqPar.profile == PROFILE_3)
132             {
133                 assert(seqPar.subsamplingX != 1 || seqPar.subsamplingY != 1);
134                 WriteBit(buf, seqPar.subsamplingX);
135                 WriteBit(buf, seqPar.subsamplingY);
136                 WriteBit(buf, 0);  // unused
137             }
138             else
139             {
140                 assert(seqPar.subsamplingX == 1 && seqPar.subsamplingY == 1);
141             }
142         }
143         else
144         {
145             assert(seqPar.profile == PROFILE_1 || seqPar.profile == PROFILE_3);
146             WriteBit(buf, 0);  // unused
147         }
148     }
149 
WriteFrameSize(BitBuffer & buf,VP9FrameLevelParam const & framePar)150     void WriteFrameSize(BitBuffer &buf, VP9FrameLevelParam const &framePar)
151     {
152         WriteLiteral(buf, framePar.width - 1, 16);
153         WriteLiteral(buf, framePar.height - 1, 16);
154 
155         const mfxU8 renderFrameSizeDifferent = 0; // TODO: add support
156         WriteBit(buf, renderFrameSizeDifferent);
157         /*if (renderFrameSizeDifferent)
158         {
159             WriteLiteral(buf, framePar.renderWidth - 1, 16);
160             WriteLiteral(buf, framePar.renderHeight - 1, 16);
161         }*/
162     }
163 
WriteQIndexDelta(BitBuffer & buf,mfxI16 qDelta)164     void WriteQIndexDelta(BitBuffer &buf, mfxI16 qDelta)
165     {
166         if (qDelta != 0)
167         {
168             WriteBit(buf, 1);
169             WriteLiteral(buf, abs(qDelta), 4);
170             WriteBit(buf, qDelta < 0);
171         }
172         else
173         {
174             WriteBit(buf, 0);
175         }
176     }
177 
WriteUncompressedHeader(BitBuffer & localBuf,Task const & task,VP9SeqLevelParam const & seqPar,BitOffsets & offsets)178     mfxU16 WriteUncompressedHeader(BitBuffer &localBuf,
179                                    Task const &task,
180                                    VP9SeqLevelParam const &seqPar,
181                                    BitOffsets &offsets)
182     {
183         VP9FrameLevelParam const &framePar = task.m_frameParam;
184 
185         Zero(offsets);
186 
187         offsets.BitOffsetUncompressedHeader = (mfxU16)localBuf.bitOffset;
188 
189         WriteLiteral(localBuf, VP9_FRAME_MARKER, 2);
190 
191         // profile
192         switch (seqPar.profile)
193         {
194             case PROFILE_0: WriteLiteral(localBuf, 0, 2); break;
195             case PROFILE_1: WriteLiteral(localBuf, 2, 2); break;
196             case PROFILE_2: WriteLiteral(localBuf, 1, 2); break;
197             case PROFILE_3: WriteLiteral(localBuf, 6, 3); break;
198             default: assert(0);
199         }
200 
201         WriteBit(localBuf, 0);  // show_existing_frame
202         WriteBit(localBuf, framePar.frameType);
203         WriteBit(localBuf, framePar.showFrame);
204         WriteBit(localBuf, framePar.errorResilentMode);
205 
206         if (framePar.frameType == KEY_FRAME) // Key frame
207         {
208             // sync code
209             WriteLiteral(localBuf, VP9_SYNC_CODE_0, 8);
210             WriteLiteral(localBuf, VP9_SYNC_CODE_1, 8);
211             WriteLiteral(localBuf, VP9_SYNC_CODE_2, 8);
212 
213             // color config
214             WriteColorConfig(localBuf, seqPar);
215 
216             // frame, render size
217             WriteFrameSize(localBuf, framePar);
218         }
219         else // Inter frame
220         {
221             if (!framePar.showFrame)
222             {
223                 WriteBit(localBuf, framePar.intraOnly);
224             }
225 
226             if (!framePar.errorResilentMode)
227             {
228                 WriteLiteral(localBuf, framePar.resetFrameContext, 2);
229             }
230 
231             // prepare refresh frame mask
232             mfxU8 refreshFamesMask = 0;
233             for (mfxU8 i = 0; i < DPB_SIZE; i++)
234             {
235                 refreshFamesMask |= (framePar.refreshRefFrames[i] << i);
236             }
237 
238             if (framePar.intraOnly)
239             {
240                 // sync code
241                 WriteLiteral(localBuf, VP9_SYNC_CODE_0, 8);
242                 WriteLiteral(localBuf, VP9_SYNC_CODE_1, 8);
243                 WriteLiteral(localBuf, VP9_SYNC_CODE_2, 8);
244 
245                 // Note for profile 0, 420 8bpp is assumed.
246                 if (seqPar.profile > PROFILE_0)
247                 {
248                     WriteColorConfig(localBuf, seqPar);
249                 }
250 
251                 // refresh frame info
252                 WriteLiteral(localBuf, refreshFamesMask, REF_FRAMES);
253 
254                 // frame, render size
255                 WriteFrameSize(localBuf, framePar);
256             }
257             else
258             {
259                 WriteLiteral(localBuf, refreshFamesMask, REF_FRAMES);
260                 for (mfxI8 refFrame = LAST_FRAME; refFrame <= ALTREF_FRAME; refFrame ++)
261                 {
262                     WriteLiteral(localBuf, framePar.refList[int(refFrame)], REF_FRAMES_LOG2);
263                     WriteBit(localBuf, framePar.refBiases[int(refFrame)]);
264                 }
265 
266                 // frame size with refs
267                 mfxU8 found = 1;
268                 if (task.m_frameOrderInRefStructure == 0)
269                 {
270                     // reference structure is reset which means resolution change
271                     // don't inherit resolution of reference frames
272                     found = 0;
273                 }
274 
275                 for (mfxI8 refFrame = LAST_FRAME; refFrame <= ALTREF_FRAME; refFrame ++)
276                 {
277                     // TODO: implement correct logic for [found] flag
278                     WriteBit(localBuf, found);
279                     if (found) break;
280                 }
281 
282                 if (!found)
283                 {
284                     WriteLiteral(localBuf, framePar.width - 1, 16);
285                     WriteLiteral(localBuf, framePar.height - 1, 16);
286                 }
287 
288                 const mfxU8 renderFrameSizeDifferent = 0; // TODO: add support
289                 WriteBit(localBuf, renderFrameSizeDifferent);
290                 /*if (renderFrameSizeDifferent)
291                 {
292                     WriteLiteral(localBuf, framePar.renderWidth - 1, 16);
293                     WriteLiteral(localBuf, framePar.renderHeight - 1, 16);
294                 }*/
295 
296                 WriteBit(localBuf, framePar.allowHighPrecisionMV);
297 
298                 // interpolation filter syntax
299                 const mfxU8 filterToLiteralMap[] = { 1, 0, 2, 3 };
300 
301                 assert(framePar.interpFilter <= SWITCHABLE);
302                 WriteBit(localBuf, framePar.interpFilter == SWITCHABLE);
303                 if (framePar.interpFilter < SWITCHABLE)
304                 {
305                     WriteLiteral(localBuf, filterToLiteralMap[framePar.interpFilter], 2);
306                 }
307             }
308         }
309 
310         if (!framePar.errorResilentMode)
311         {
312             WriteBit(localBuf, framePar.refreshFrameContext);
313             WriteBit(localBuf, seqPar.frameParallelDecoding);
314         }
315 
316         WriteLiteral(localBuf, framePar.frameContextIdx, FRAME_CONTEXTS_LOG2);
317 
318         offsets.BitOffsetForLFLevel = (mfxU16)localBuf.bitOffset;
319         // loop filter syntax
320         WriteLiteral(localBuf, framePar.lfLevel, 6);
321         WriteLiteral(localBuf, framePar.sharpness, 3);
322 
323         WriteBit(localBuf, framePar.modeRefDeltaEnabled);
324 
325         if (framePar.modeRefDeltaEnabled)
326         {
327             WriteBit(localBuf, framePar.modeRefDeltaUpdate);
328             if (framePar.modeRefDeltaUpdate)
329             {
330                 offsets.BitOffsetForLFRefDelta = (mfxU16)localBuf.bitOffset;
331                 for (mfxI8 i = 0; i < MAX_REF_LF_DELTAS; i++)
332                 {
333                     // always write deltas explicitly to allow BRC modify them
334                     const mfxI8 delta = framePar.lfRefDelta[int(i)];
335                     WriteBit(localBuf, 1);
336                     WriteLiteral(localBuf, abs(delta) & 0x3F, 6);
337                     WriteBit(localBuf, delta < 0);
338                 }
339 
340                 offsets.BitOffsetForLFModeDelta = (mfxU16)localBuf.bitOffset;
341                 for (mfxI8 i = 0; i < MAX_MODE_LF_DELTAS; i++)
342                 {
343                     // always write deltas explicitly to allow BRC modify them
344                     const mfxI8 delta = framePar.lfModeDelta[int(i)];
345                     WriteBit(localBuf, 1);
346                     WriteLiteral(localBuf, abs(delta) & 0x3F, 6);
347                     WriteBit(localBuf, delta < 0);
348                 }
349             }
350         }
351 
352         offsets.BitOffsetForQIndex = (mfxU16)localBuf.bitOffset;
353 
354         // quantization params
355         WriteLiteral(localBuf, framePar.baseQIndex, QINDEX_BITS);
356         WriteQIndexDelta(localBuf, framePar.qIndexDeltaLumaDC);
357         WriteQIndexDelta(localBuf, framePar.qIndexDeltaChromaDC);
358         WriteQIndexDelta(localBuf, framePar.qIndexDeltaChromaAC);
359 
360         offsets.BitOffsetForSegmentation = (mfxU16)localBuf.bitOffset;
361 
362         //segmentation
363         bool segmentation = framePar.segmentation != NO_SEGMENTATION;
364         WriteBit(localBuf, segmentation);
365         if (segmentation)
366         {
367             // for both cases (APP_SEGMENTATION and BRC_SEGMENTATION) segmentation_params() will be completely re-written by HW accelerator
368             // so just writing dummy parameters here
369             WriteBit(localBuf, 0);
370             WriteBit(localBuf, 0);
371         }
372 
373         offsets.BitSizeForSegmentation = (mfxU16)localBuf.bitOffset - offsets.BitOffsetForSegmentation;
374 
375         // tile info
376         mfxU8 minLog2TileCols = 0;
377         mfxU8 maxLog2TileCols = 1;
378         mfxU8 ones;
379 
380         const mfxU16 sb64Cols = (mfx::align2_value(framePar.modeInfoCols, 1 << MI_BLOCK_SIZE_LOG2)) >> MI_BLOCK_SIZE_LOG2;
381         while ((MAX_TILE_WIDTH_B64 << minLog2TileCols) < sb64Cols)
382         {
383             minLog2TileCols ++;
384         }
385         while ((sb64Cols >> maxLog2TileCols) >= MIN_TILE_WIDTH_B64)
386         {
387             maxLog2TileCols ++;
388         }
389         maxLog2TileCols--;
390 
391         ones = framePar.log2TileCols - minLog2TileCols;
392         while (ones--)
393         {
394             WriteBit(localBuf, 1);
395         }
396         if (framePar.log2TileCols < maxLog2TileCols)
397         {
398             WriteBit(localBuf, 0);
399         }
400 
401         WriteBit(localBuf, framePar.log2TileRows != 0);
402         if (framePar.log2TileRows != 0)
403         {
404             WriteBit(localBuf, framePar.log2TileRows != 1);
405         }
406 
407         offsets.BitOffsetForFirstPartitionSize = (mfxU16)localBuf.bitOffset;;
408 
409         // size of compressed header (unknown so far, will be written by driver/HuC)
410         WriteLiteral(localBuf, 0, 16);
411 
412         return localBuf.bitOffset;
413     };
414 
PrepareFrameHeader(VP9MfxVideoParam const & par,mfxU8 * pBuf,mfxU32 bufferSizeBytes,Task const & task,VP9SeqLevelParam const & seqPar,BitOffsets & offsets)415     mfxU16 PrepareFrameHeader(VP9MfxVideoParam const &par,
416         mfxU8 *pBuf,
417         mfxU32 bufferSizeBytes,
418         Task const& task,
419         VP9SeqLevelParam const &seqPar,
420         BitOffsets &offsets)
421     {
422         if (bufferSizeBytes < VP9_MAX_UNCOMPRESSED_HEADER_SIZE + MAX_IVF_HEADER_SIZE)
423         {
424             return 0; // zero size of header - indication that something went wrong
425         }
426 
427         BitBuffer localBuf;
428         localBuf.pBuffer = pBuf;
429         localBuf.bitOffset = 0;
430 
431         mfxExtVP9Param& opt = GetExtBufferRef(par);
432         mfxU16 ivfHeaderSize = 0;
433 
434         if (opt.WriteIVFHeaders != MFX_CODINGOPTION_OFF)
435         {
436             if (task.m_insertIVFSeqHeader)
437             {
438                 mfxStatus sts = AddSeqHeader(task.m_frameParam.width,
439                                              task.m_frameParam.height,
440                                              par.mfx.FrameInfo.FrameRateExtN,
441                                              par.mfx.FrameInfo.FrameRateExtD,
442                                              0,
443                                              localBuf.pBuffer,
444                                              bufferSizeBytes);
445                 MFX_CHECK_STS(sts);
446 
447                 ivfHeaderSize += IVF_SEQ_HEADER_SIZE_BYTES;
448             }
449 
450             mfxStatus sts = AddPictureHeader(localBuf.pBuffer + ivfHeaderSize, bufferSizeBytes - ivfHeaderSize);
451             MFX_CHECK_STS(sts);
452 
453             ivfHeaderSize += IVF_PIC_HEADER_SIZE_BYTES;
454         }
455 
456         localBuf.bitOffset += ivfHeaderSize * 8;
457 
458         mfxU16 totalBitsWritten = WriteUncompressedHeader(localBuf,
459             task,
460             seqPar,
461             offsets);
462 
463         return (totalBitsWritten + 7) / 8;
464     }
465 } // MfxHwVP9Encode
466