1 // Copyright (c) 2018-2019 Intel Corporation 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in all 11 // copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 // SOFTWARE. 20 21 #include <climits> 22 #include "mfx_vp9_encode_hw_utils.h" 23 #include "mfx_vp9_encode_hw_par.h" 24 #include "mfx_vp9_encode_hw_ddi.h" 25 26 namespace MfxHwVP9Encode 27 { 28 GetGuid(VP9MfxVideoParam const & par)29 GUID GetGuid(VP9MfxVideoParam const & par) 30 { 31 // Currently we don't support LP=OFF 32 // so it is mapped to GUID_NULL 33 // it will cause Query/Init fails with Unsupported 34 // ever when driver support LP=OFF 35 switch (par.mfx.CodecProfile) 36 { 37 case MFX_PROFILE_VP9_0: 38 return (par.mfx.LowPower != MFX_CODINGOPTION_OFF) ? 39 DXVA2_Intel_LowpowerEncode_VP9_Profile0 : GUID_NULL; //DXVA2_Intel_Encode_VP9_Profile0; 40 break; 41 case MFX_PROFILE_VP9_1: 42 return (par.mfx.LowPower != MFX_CODINGOPTION_OFF) ? 43 DXVA2_Intel_LowpowerEncode_VP9_Profile1 : GUID_NULL; //DXVA2_Intel_Encode_VP9_Profile1; 44 break; 45 case MFX_PROFILE_VP9_2: 46 return (par.mfx.LowPower != MFX_CODINGOPTION_OFF) ? 47 DXVA2_Intel_LowpowerEncode_VP9_10bit_Profile2 : GUID_NULL; // DXVA2_Intel_Encode_VP9_10bit_Profile2; 48 break; 49 case MFX_PROFILE_VP9_3: 50 return (par.mfx.LowPower != MFX_CODINGOPTION_OFF) ? 51 DXVA2_Intel_LowpowerEncode_VP9_10bit_Profile3 : GUID_NULL; // DXVA2_Intel_Encode_VP9_10bit_Profile3; 52 break; 53 default: 54 // profile cannot be identified. Use Profile0 so far 55 return (par.mfx.LowPower != MFX_CODINGOPTION_OFF) ? 56 DXVA2_Intel_LowpowerEncode_VP9_Profile0 : GUID_NULL; // DXVA2_Intel_Encode_VP9_Profile0; 57 } 58 } 59 QueryCaps(VideoCORE * pCore,ENCODE_CAPS_VP9 & caps,GUID guid,VP9MfxVideoParam const & par)60 mfxStatus QueryCaps(VideoCORE* pCore, ENCODE_CAPS_VP9 & caps, GUID guid, VP9MfxVideoParam const & par) 61 { 62 std::unique_ptr<DriverEncoder> ddi; 63 64 ddi.reset(CreatePlatformVp9Encoder(pCore)); 65 MFX_CHECK(ddi.get() != NULL, MFX_WRN_PARTIAL_ACCELERATION); 66 67 mfxStatus sts = ddi.get()->CreateAuxilliaryDevice(pCore, guid, par); 68 MFX_CHECK_STS(sts); 69 70 sts = ddi.get()->QueryEncodeCaps(caps); 71 MFX_CHECK_STS(sts); 72 73 return MFX_ERR_NONE; 74 } 75 76 // uncompressed headencompressedr packing 77 78 #define VP9_FRAME_MARKER 0x2 79 80 #define VP9_SYNC_CODE_0 0x49 81 #define VP9_SYNC_CODE_1 0x83 82 #define VP9_SYNC_CODE_2 0x42 83 84 #define FRAME_CONTEXTS_LOG2 2 85 #define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) 86 87 #define QINDEX_BITS 8 88 89 #define MAX_TILE_WIDTH_B64 64 90 #define MIN_TILE_WIDTH_B64 4 91 92 #define MI_SIZE_LOG2 3 93 #define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6 94 #define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block 95 WriteBit(BitBuffer & buf,mfxU8 bit)96 void WriteBit(BitBuffer &buf, mfxU8 bit) 97 { 98 const mfxU16 byteOffset = buf.bitOffset / CHAR_BIT; 99 const mfxU8 bitsLeftInByte = CHAR_BIT - 1 - buf.bitOffset % CHAR_BIT; 100 if (bitsLeftInByte == CHAR_BIT - 1) 101 { 102 buf.pBuffer[byteOffset] = mfxU8(bit << bitsLeftInByte); 103 } 104 else 105 { 106 buf.pBuffer[byteOffset] &= ~(1 << bitsLeftInByte); 107 buf.pBuffer[byteOffset] |= bit << bitsLeftInByte; 108 } 109 buf.bitOffset = buf.bitOffset + 1; 110 }; 111 WriteLiteral(BitBuffer & buf,mfxU64 data,mfxU64 bits)112 void WriteLiteral(BitBuffer &buf, mfxU64 data, mfxU64 bits) 113 { 114 for (mfxI64 bit = bits - 1; bit >= 0; bit--) 115 { 116 WriteBit(buf, (data >> bit) & 1); 117 } 118 } 119 WriteColorConfig(BitBuffer & buf,VP9SeqLevelParam const & seqPar)120 void WriteColorConfig(BitBuffer &buf, VP9SeqLevelParam const &seqPar) 121 { 122 if (seqPar.profile >= PROFILE_2) 123 { 124 assert(seqPar.bitDepth > BITDEPTH_8); 125 WriteBit(buf, seqPar.bitDepth == BITDEPTH_10 ? 0 : 1); 126 } 127 WriteLiteral(buf, seqPar.colorSpace, 3); 128 if (seqPar.colorSpace != SRGB) 129 { 130 WriteBit(buf, seqPar.colorRange); 131 if (seqPar.profile == PROFILE_1 || seqPar.profile == PROFILE_3) 132 { 133 assert(seqPar.subsamplingX != 1 || seqPar.subsamplingY != 1); 134 WriteBit(buf, seqPar.subsamplingX); 135 WriteBit(buf, seqPar.subsamplingY); 136 WriteBit(buf, 0); // unused 137 } 138 else 139 { 140 assert(seqPar.subsamplingX == 1 && seqPar.subsamplingY == 1); 141 } 142 } 143 else 144 { 145 assert(seqPar.profile == PROFILE_1 || seqPar.profile == PROFILE_3); 146 WriteBit(buf, 0); // unused 147 } 148 } 149 WriteFrameSize(BitBuffer & buf,VP9FrameLevelParam const & framePar)150 void WriteFrameSize(BitBuffer &buf, VP9FrameLevelParam const &framePar) 151 { 152 WriteLiteral(buf, framePar.width - 1, 16); 153 WriteLiteral(buf, framePar.height - 1, 16); 154 155 const mfxU8 renderFrameSizeDifferent = 0; // TODO: add support 156 WriteBit(buf, renderFrameSizeDifferent); 157 /*if (renderFrameSizeDifferent) 158 { 159 WriteLiteral(buf, framePar.renderWidth - 1, 16); 160 WriteLiteral(buf, framePar.renderHeight - 1, 16); 161 }*/ 162 } 163 WriteQIndexDelta(BitBuffer & buf,mfxI16 qDelta)164 void WriteQIndexDelta(BitBuffer &buf, mfxI16 qDelta) 165 { 166 if (qDelta != 0) 167 { 168 WriteBit(buf, 1); 169 WriteLiteral(buf, abs(qDelta), 4); 170 WriteBit(buf, qDelta < 0); 171 } 172 else 173 { 174 WriteBit(buf, 0); 175 } 176 } 177 WriteUncompressedHeader(BitBuffer & localBuf,Task const & task,VP9SeqLevelParam const & seqPar,BitOffsets & offsets)178 mfxU16 WriteUncompressedHeader(BitBuffer &localBuf, 179 Task const &task, 180 VP9SeqLevelParam const &seqPar, 181 BitOffsets &offsets) 182 { 183 VP9FrameLevelParam const &framePar = task.m_frameParam; 184 185 Zero(offsets); 186 187 offsets.BitOffsetUncompressedHeader = (mfxU16)localBuf.bitOffset; 188 189 WriteLiteral(localBuf, VP9_FRAME_MARKER, 2); 190 191 // profile 192 switch (seqPar.profile) 193 { 194 case PROFILE_0: WriteLiteral(localBuf, 0, 2); break; 195 case PROFILE_1: WriteLiteral(localBuf, 2, 2); break; 196 case PROFILE_2: WriteLiteral(localBuf, 1, 2); break; 197 case PROFILE_3: WriteLiteral(localBuf, 6, 3); break; 198 default: assert(0); 199 } 200 201 WriteBit(localBuf, 0); // show_existing_frame 202 WriteBit(localBuf, framePar.frameType); 203 WriteBit(localBuf, framePar.showFrame); 204 WriteBit(localBuf, framePar.errorResilentMode); 205 206 if (framePar.frameType == KEY_FRAME) // Key frame 207 { 208 // sync code 209 WriteLiteral(localBuf, VP9_SYNC_CODE_0, 8); 210 WriteLiteral(localBuf, VP9_SYNC_CODE_1, 8); 211 WriteLiteral(localBuf, VP9_SYNC_CODE_2, 8); 212 213 // color config 214 WriteColorConfig(localBuf, seqPar); 215 216 // frame, render size 217 WriteFrameSize(localBuf, framePar); 218 } 219 else // Inter frame 220 { 221 if (!framePar.showFrame) 222 { 223 WriteBit(localBuf, framePar.intraOnly); 224 } 225 226 if (!framePar.errorResilentMode) 227 { 228 WriteLiteral(localBuf, framePar.resetFrameContext, 2); 229 } 230 231 // prepare refresh frame mask 232 mfxU8 refreshFamesMask = 0; 233 for (mfxU8 i = 0; i < DPB_SIZE; i++) 234 { 235 refreshFamesMask |= (framePar.refreshRefFrames[i] << i); 236 } 237 238 if (framePar.intraOnly) 239 { 240 // sync code 241 WriteLiteral(localBuf, VP9_SYNC_CODE_0, 8); 242 WriteLiteral(localBuf, VP9_SYNC_CODE_1, 8); 243 WriteLiteral(localBuf, VP9_SYNC_CODE_2, 8); 244 245 // Note for profile 0, 420 8bpp is assumed. 246 if (seqPar.profile > PROFILE_0) 247 { 248 WriteColorConfig(localBuf, seqPar); 249 } 250 251 // refresh frame info 252 WriteLiteral(localBuf, refreshFamesMask, REF_FRAMES); 253 254 // frame, render size 255 WriteFrameSize(localBuf, framePar); 256 } 257 else 258 { 259 WriteLiteral(localBuf, refreshFamesMask, REF_FRAMES); 260 for (mfxI8 refFrame = LAST_FRAME; refFrame <= ALTREF_FRAME; refFrame ++) 261 { 262 WriteLiteral(localBuf, framePar.refList[int(refFrame)], REF_FRAMES_LOG2); 263 WriteBit(localBuf, framePar.refBiases[int(refFrame)]); 264 } 265 266 // frame size with refs 267 mfxU8 found = 1; 268 if (task.m_frameOrderInRefStructure == 0) 269 { 270 // reference structure is reset which means resolution change 271 // don't inherit resolution of reference frames 272 found = 0; 273 } 274 275 for (mfxI8 refFrame = LAST_FRAME; refFrame <= ALTREF_FRAME; refFrame ++) 276 { 277 // TODO: implement correct logic for [found] flag 278 WriteBit(localBuf, found); 279 if (found) break; 280 } 281 282 if (!found) 283 { 284 WriteLiteral(localBuf, framePar.width - 1, 16); 285 WriteLiteral(localBuf, framePar.height - 1, 16); 286 } 287 288 const mfxU8 renderFrameSizeDifferent = 0; // TODO: add support 289 WriteBit(localBuf, renderFrameSizeDifferent); 290 /*if (renderFrameSizeDifferent) 291 { 292 WriteLiteral(localBuf, framePar.renderWidth - 1, 16); 293 WriteLiteral(localBuf, framePar.renderHeight - 1, 16); 294 }*/ 295 296 WriteBit(localBuf, framePar.allowHighPrecisionMV); 297 298 // interpolation filter syntax 299 const mfxU8 filterToLiteralMap[] = { 1, 0, 2, 3 }; 300 301 assert(framePar.interpFilter <= SWITCHABLE); 302 WriteBit(localBuf, framePar.interpFilter == SWITCHABLE); 303 if (framePar.interpFilter < SWITCHABLE) 304 { 305 WriteLiteral(localBuf, filterToLiteralMap[framePar.interpFilter], 2); 306 } 307 } 308 } 309 310 if (!framePar.errorResilentMode) 311 { 312 WriteBit(localBuf, framePar.refreshFrameContext); 313 WriteBit(localBuf, seqPar.frameParallelDecoding); 314 } 315 316 WriteLiteral(localBuf, framePar.frameContextIdx, FRAME_CONTEXTS_LOG2); 317 318 offsets.BitOffsetForLFLevel = (mfxU16)localBuf.bitOffset; 319 // loop filter syntax 320 WriteLiteral(localBuf, framePar.lfLevel, 6); 321 WriteLiteral(localBuf, framePar.sharpness, 3); 322 323 WriteBit(localBuf, framePar.modeRefDeltaEnabled); 324 325 if (framePar.modeRefDeltaEnabled) 326 { 327 WriteBit(localBuf, framePar.modeRefDeltaUpdate); 328 if (framePar.modeRefDeltaUpdate) 329 { 330 offsets.BitOffsetForLFRefDelta = (mfxU16)localBuf.bitOffset; 331 for (mfxI8 i = 0; i < MAX_REF_LF_DELTAS; i++) 332 { 333 // always write deltas explicitly to allow BRC modify them 334 const mfxI8 delta = framePar.lfRefDelta[int(i)]; 335 WriteBit(localBuf, 1); 336 WriteLiteral(localBuf, abs(delta) & 0x3F, 6); 337 WriteBit(localBuf, delta < 0); 338 } 339 340 offsets.BitOffsetForLFModeDelta = (mfxU16)localBuf.bitOffset; 341 for (mfxI8 i = 0; i < MAX_MODE_LF_DELTAS; i++) 342 { 343 // always write deltas explicitly to allow BRC modify them 344 const mfxI8 delta = framePar.lfModeDelta[int(i)]; 345 WriteBit(localBuf, 1); 346 WriteLiteral(localBuf, abs(delta) & 0x3F, 6); 347 WriteBit(localBuf, delta < 0); 348 } 349 } 350 } 351 352 offsets.BitOffsetForQIndex = (mfxU16)localBuf.bitOffset; 353 354 // quantization params 355 WriteLiteral(localBuf, framePar.baseQIndex, QINDEX_BITS); 356 WriteQIndexDelta(localBuf, framePar.qIndexDeltaLumaDC); 357 WriteQIndexDelta(localBuf, framePar.qIndexDeltaChromaDC); 358 WriteQIndexDelta(localBuf, framePar.qIndexDeltaChromaAC); 359 360 offsets.BitOffsetForSegmentation = (mfxU16)localBuf.bitOffset; 361 362 //segmentation 363 bool segmentation = framePar.segmentation != NO_SEGMENTATION; 364 WriteBit(localBuf, segmentation); 365 if (segmentation) 366 { 367 // for both cases (APP_SEGMENTATION and BRC_SEGMENTATION) segmentation_params() will be completely re-written by HW accelerator 368 // so just writing dummy parameters here 369 WriteBit(localBuf, 0); 370 WriteBit(localBuf, 0); 371 } 372 373 offsets.BitSizeForSegmentation = (mfxU16)localBuf.bitOffset - offsets.BitOffsetForSegmentation; 374 375 // tile info 376 mfxU8 minLog2TileCols = 0; 377 mfxU8 maxLog2TileCols = 1; 378 mfxU8 ones; 379 380 const mfxU16 sb64Cols = (mfx::align2_value(framePar.modeInfoCols, 1 << MI_BLOCK_SIZE_LOG2)) >> MI_BLOCK_SIZE_LOG2; 381 while ((MAX_TILE_WIDTH_B64 << minLog2TileCols) < sb64Cols) 382 { 383 minLog2TileCols ++; 384 } 385 while ((sb64Cols >> maxLog2TileCols) >= MIN_TILE_WIDTH_B64) 386 { 387 maxLog2TileCols ++; 388 } 389 maxLog2TileCols--; 390 391 ones = framePar.log2TileCols - minLog2TileCols; 392 while (ones--) 393 { 394 WriteBit(localBuf, 1); 395 } 396 if (framePar.log2TileCols < maxLog2TileCols) 397 { 398 WriteBit(localBuf, 0); 399 } 400 401 WriteBit(localBuf, framePar.log2TileRows != 0); 402 if (framePar.log2TileRows != 0) 403 { 404 WriteBit(localBuf, framePar.log2TileRows != 1); 405 } 406 407 offsets.BitOffsetForFirstPartitionSize = (mfxU16)localBuf.bitOffset;; 408 409 // size of compressed header (unknown so far, will be written by driver/HuC) 410 WriteLiteral(localBuf, 0, 16); 411 412 return localBuf.bitOffset; 413 }; 414 PrepareFrameHeader(VP9MfxVideoParam const & par,mfxU8 * pBuf,mfxU32 bufferSizeBytes,Task const & task,VP9SeqLevelParam const & seqPar,BitOffsets & offsets)415 mfxU16 PrepareFrameHeader(VP9MfxVideoParam const &par, 416 mfxU8 *pBuf, 417 mfxU32 bufferSizeBytes, 418 Task const& task, 419 VP9SeqLevelParam const &seqPar, 420 BitOffsets &offsets) 421 { 422 if (bufferSizeBytes < VP9_MAX_UNCOMPRESSED_HEADER_SIZE + MAX_IVF_HEADER_SIZE) 423 { 424 return 0; // zero size of header - indication that something went wrong 425 } 426 427 BitBuffer localBuf; 428 localBuf.pBuffer = pBuf; 429 localBuf.bitOffset = 0; 430 431 mfxExtVP9Param& opt = GetExtBufferRef(par); 432 mfxU16 ivfHeaderSize = 0; 433 434 if (opt.WriteIVFHeaders != MFX_CODINGOPTION_OFF) 435 { 436 if (task.m_insertIVFSeqHeader) 437 { 438 mfxStatus sts = AddSeqHeader(task.m_frameParam.width, 439 task.m_frameParam.height, 440 par.mfx.FrameInfo.FrameRateExtN, 441 par.mfx.FrameInfo.FrameRateExtD, 442 0, 443 localBuf.pBuffer, 444 bufferSizeBytes); 445 MFX_CHECK_STS(sts); 446 447 ivfHeaderSize += IVF_SEQ_HEADER_SIZE_BYTES; 448 } 449 450 mfxStatus sts = AddPictureHeader(localBuf.pBuffer + ivfHeaderSize, bufferSizeBytes - ivfHeaderSize); 451 MFX_CHECK_STS(sts); 452 453 ivfHeaderSize += IVF_PIC_HEADER_SIZE_BYTES; 454 } 455 456 localBuf.bitOffset += ivfHeaderSize * 8; 457 458 mfxU16 totalBitsWritten = WriteUncompressedHeader(localBuf, 459 task, 460 seqPar, 461 offsets); 462 463 return (totalBitsWritten + 7) / 8; 464 } 465 } // MfxHwVP9Encode 466