1 /* ********************************************************************* *\
2 
3 Copyright (C) 2013 Intel Corporation.  All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 - Redistributions of source code must retain the above copyright notice,
8 this list of conditions and the following disclaimer.
9 - Redistributions in binary form must reproduce the above copyright notice,
10 this list of conditions and the following disclaimer in the documentation
11 and/or other materials provided with the distribution.
12 - Neither the name of Intel Corporation nor the names of its contributors
13 may be used to endorse or promote products derived from this software
14 without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL INTEL CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 \* ********************************************************************* */
28 
29 #ifndef HANDBRAKE_QSV_LIBAV_H
30 #define HANDBRAKE_QSV_LIBAV_H
31 
32 /**
33  * @file
34  * @ingroup lavc_codec_hwaccel_qsv
35  * Common header for QSV/MediaSDK acceleration
36  */
37 
38 /**
39  * @defgroup lavc_codec_hwaccel_qsv QSV/MediaSDK based Decode/Encode and VPP
40  * @ingroup lavc_codec_hwaccel
41  *
42  *  As Intel Quick Sync Video (QSV) can decode/preprocess/encode with HW
43  *  acceleration.
44  *
45  *  Supported features:
46  *    - access:
47  *      - format AV_PIX_FMT_QSV_H264, AVCodec decoder based implementation
48  *      - name "h264_qsv", avcodec_find_decoder_by_name( "h264_qsv")
49  *    - IO Pattern:
50  *      - Opaque memory: MFX_IOPATTERN_OUT_OPAQUE_MEMORY // Video memory is
51  *                       MFX_IMPL_HARDWARE or MFX_IMPL_AUTO and runtime support,
52  *                       otherwise: System Memory
53  *      - System memory: MFX_IOPATTERN_OUT_SYSTEM_MEMORY
54  *    - Allocators:
55  *      - default allocator for System memory: MFX_MEMTYPE_SYSTEM_MEMORY
56  *    - details:
57  *      implementation as "per frame"
58  *
59  *  TODO list:
60  *    - access:
61  *      - format AV_PIX_FMT_QSV_MPEG2
62  *      - format AV_PIX_FMT_QSV_VC1
63  *      - format AV_PIX_FMT_QSV, see "details" below
64  *    - IO Pattern:
65  *      - VIDEO_MEMORY  // MFX_IOPATTERN_OUT_VIDEO_MEMORY
66  *    - Allocators:
67  *      - Video memory: MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET /
68  *                      MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET
69  *    - details:
70  *      "per slice" support: AV_PIX_FMT_QSV with AVHWAccel based implementation
71  *
72  *  Note hb_qsv_config struct required to fill in via
73  *  AVCodecContext.hwaccel_context
74  *
75  *  As per frame, note AVFrame.data[2] (qsv_atom) used for frame atom id,
76  *  data/linesize should be used together with SYSTEM_MEMORY and tested
77  *
78  *  Note: Compilation would require:
79  *   - Intel MediaSDK headers, Full SDK is available from the original web site:
80  *                     http://software.intel.com/en-us/vcsource/tools/media-SDK
81  *     Will be referenced as mfx*.h (mfxdefs.h, mfxstructures.h, ... )
82  *  and
83  *  - Final application has to link against Intel MediaSDK dispatcher, available
84  *     at MediaSDK as well
85  *
86  *  Target OS: as per available dispatcher and driver support
87  *
88  *  Implementation details:
89  *   Provided struct hb_qsv_context contain several struct hb_qsv_space(s) for decode,
90  *   VPP and encode.
91  *   hb_qsv_space just contain needed environment for the appropriate action.
92  *   Based on this - pipeline (see pipes) will be build to pass details such as
93  *   mfxFrameSurface1* and mfxSyncPoint* from one action to the next.
94  *
95  *  Resources re-usage (hb_qsv_flush_stages):
96  *     hb_qsv_context *qsv = (hb_qsv_context *)video_codec_ctx->priv_data;
97  *     hb_qsv_list *pipe = (hb_qsv_list *)video_frame->data[2];
98  *     hb_qsv_flush_stages( qsv->pipes, &pipe );
99  *
100  *  DTS re-usage:
101  *     hb_qsv_dts_pop(qsv);
102  *
103  *   for video,DX9/11 memory it has to be Unlock'ed as well
104  *
105  *  Implementation is thread aware and uses synchronization point(s) from MediaSDK
106  *  as per configuration.
107  *
108  *  For the details of MediaSDK usage and options available - please refer to the
109  *  available documentation at MediaSDK.
110  *
111  *  Feature set used from MSDK is defined by HB_QSV_MSDK_VERSION_MAJOR and
112  *  HB_QSV_MSDK_VERSION_MINOR
113  *
114  * @{
115  */
116 
117 #include <stdint.h>
118 #include <string.h>
119 #include "mfx/mfxvideo.h"
120 #include "libavutil/mem.h"
121 #include "libavutil/time.h"
122 #include "libavcodec/avcodec.h"
123 
124 #if defined (__GNUC__)
125 #include <pthread.h>
126 #define ff_qsv_atomic_inc(ptr) __sync_add_and_fetch(ptr,1)
127 #define ff_qsv_atomic_dec(ptr) __sync_sub_and_fetch (ptr,1)
128 #elif HAVE_WINDOWS_H            // MSVC case
129 #include <windows.h>
130 #if HAVE_PTHREADS
131 #include <pthread.h>
132 #elif HAVE_W32THREADS
133 #include "w32pthreads.h"
134 #endif
135 #define ff_qsv_atomic_inc(ptr) InterlockedIncrement(ptr)
136 #define ff_qsv_atomic_dec(ptr) InterlockedDecrement (ptr)
137 #endif
138 
139 
140 // sleep is defined in milliseconds
141 #define hb_qsv_sleep(x) av_usleep((x)*1000)
142 
143 #define HB_QSV_ZERO_MEMORY(VAR)                    {memset(&VAR, 0, sizeof(VAR));}
144 #define HB_QSV_ALIGN32(X)                      (((mfxU32)((X)+31)) & (~ (mfxU32)31))
145 #define HB_QSV_ALIGN16(value)                  (((value + 15) >> 4) << 4)
146 #ifndef HB_QSV_PRINT_RET_MSG
147 #define HB_QSV_PRINT_RET_MSG(ERR)              { fprintf(stderr, "Error code %d,\t%s\t%d\n", ERR, __FUNCTION__, __LINE__); }
148 #endif
149 
150 #ifndef HB_QSV_DEBUG_ASSERT
151 #define HB_QSV_DEBUG_ASSERT(x,y)               { if ((x)) { fprintf(stderr, "\nASSERT: %s\n", y); } }
152 #endif
153 
154 #define HB_QSV_CHECK_RET(P, X, ERR)                {if ((X) > (P)) {HB_QSV_PRINT_RET_MSG(ERR); return;}}
155 #define HB_QSV_CHECK_RESULT(P, X, ERR)             {if ((X) > (P)) {HB_QSV_PRINT_RET_MSG(ERR); return ERR;}}
156 #define HB_QSV_CHECK_POINTER(P, ERR)               {if (!(P)) {HB_QSV_PRINT_RET_MSG(ERR); return ERR;}}
157 #define HB_QSV_IGNORE_MFX_STS(P, X)                {if ((X) == (P)) {P = MFX_ERR_NONE;}}
158 
159 #define HB_QSV_ID_BUFFER MFX_MAKEFOURCC('B','U','F','F')
160 #define HB_QSV_ID_FRAME  MFX_MAKEFOURCC('F','R','M','E')
161 
162 #define HB_QSV_SURFACE_NUM              80
163 #define HB_QSV_SYNC_NUM                 HB_QSV_SURFACE_NUM*3/4
164 #define HB_QSV_BUF_SIZE_DEFAULT         4096*2160*10
165 #define HB_QSV_JOB_SIZE_DEFAULT         10
166 #define HB_QSV_SYNC_TIME_DEFAULT        10000
167 // see hb_qsv_get_free_sync, hb_qsv_get_free_surface , 100 if usleep(10*1000)(10ms) == 1 sec
168 #define HB_QSV_REPEAT_NUM_DEFAULT      100
169 #define HB_QSV_ASYNC_DEPTH_DEFAULT     4
170 
171 // version of MSDK/QSV API currently used
172 #define HB_QSV_MSDK_VERSION_MAJOR  1
173 #define HB_QSV_MSDK_VERSION_MINOR  3
174 
175 typedef enum HB_QSV_STAGE_TYPE {
176 
177 #define HB_QSV_DECODE_MASK   0x001
178     HB_QSV_DECODE   = 0x001,
179 
180 #define HB_QSV_VPP_MASK      0x0F0
181     // "Mandatory VPP filter" , might be with "Hint-based VPP filters"
182     HB_QSV_VPP_DEFAULT = 0x010,
183     // "User Modules" etc
184     HB_QSV_VPP_USER = 0x020,
185 
186 #define av_QSV_ENCODE_MASK   0x100
187     HB_QSV_ENCODE   = 0x100
188 #define HB_QSV_ANY_MASK      0xFFF
189 } HB_QSV_STAGE_TYPE;
190 
191 typedef struct QSVMid {
192     AVBufferRef *hw_frames_ref;
193     mfxHDLPair *handle_pair;
194 
195     AVFrame *locked_frame;
196     AVFrame *hw_frame;
197     mfxFrameSurface1 surf;
198 } QSVMid;
199 
200 typedef struct QSVFrame {
201     AVFrame *frame;
202     mfxFrameSurface1 surface;
203     mfxEncodeCtrl enc_ctrl;
204     mfxExtDecodedFrameInfo dec_info;
205     mfxExtBuffer *ext_param;
206 
207     int queued;
208     int used;
209 
210     struct QSVFrame *next;
211 } QSVFrame;
212 
213 #define HB_QSV_POOL_FFMPEG_SURFACE_SIZE (64)
214 #define HB_QSV_POOL_SURFACE_SIZE (64)
215 
216 typedef struct HBQSVFramesContext {
217     AVBufferRef *hw_frames_ctx;
218     //void *logctx;
219 
220     /* The memory ids for the external frames.
221      * Refcounted, since we need one reference owned by the HBQSVFramesContext
222      * (i.e. by the encoder/decoder) and another one given to the MFX session
223      * from the frame allocator. */
224     AVBufferRef *mids_buf;
225     QSVMid *mids;
226     int  nb_mids;
227     int pool[HB_QSV_POOL_SURFACE_SIZE];
228     void *input_texture;
229 } HBQSVFramesContext;
230 
231 typedef struct hb_qsv_list {
232     // practically pthread_mutex_t
233     void *mutex;
234     pthread_mutexattr_t   mta;
235 
236     void **items;
237     int items_alloc;
238 
239     int items_count;
240 } hb_qsv_list;
241 
242 typedef struct hb_qsv_sync {
243     mfxSyncPoint*   p_sync;
244     int             in_use;
245 } hb_qsv_sync;
246 
247 typedef struct hb_qsv_stage {
248     HB_QSV_STAGE_TYPE type;
249     struct {
250         mfxBitstream *p_bs;
251         mfxFrameSurface1 *p_surface;
252         HBQSVFramesContext *p_frames_ctx;
253     } in;
254     struct {
255         mfxBitstream *p_bs;
256         mfxFrameSurface1 *p_surface;
257         hb_qsv_sync *sync;
258     } out;
259     hb_qsv_list *pending;
260 } hb_qsv_stage;
261 
262 typedef struct hb_qsv_task {
263     mfxBitstream *bs;
264     hb_qsv_stage *stage;
265 } hb_qsv_task;
266 
267 
268 typedef struct hb_qsv_space {
269 
270     uint8_t is_init_done;
271 
272     HB_QSV_STAGE_TYPE type;
273 
274     mfxVideoParam m_mfxVideoParam;
275 
276     mfxFrameAllocResponse response;
277     mfxFrameAllocRequest request[2];    // [0] - in, [1] - out, if needed
278 
279     mfxExtOpaqueSurfaceAlloc ext_opaque_alloc;
280     mfxExtBuffer **p_ext_params;
281     uint16_t p_ext_param_num;
282 
283     uint16_t surface_num_max_used;
284     uint16_t surface_num;
285     mfxFrameSurface1 *p_surfaces[HB_QSV_SURFACE_NUM];
286 
287     uint16_t sync_num_max_used;
288     uint16_t sync_num;
289     hb_qsv_sync *p_syncp[HB_QSV_SYNC_NUM];
290 
291     mfxBitstream bs;
292     uint8_t *p_buf;
293     size_t p_buf_max_size;
294 
295     // only for encode and tasks
296     hb_qsv_list *tasks;
297 
298     hb_qsv_list *pending;
299 
300     // storage for allocations/mfxMemId*
301     mfxMemId *mids;
302 } hb_qsv_space;
303 
304 typedef struct hb_qsv_context {
305     volatile int is_context_active;
306 
307     mfxIMPL impl;
308     mfxSession mfx_session;
309     mfxVersion ver;
310 
311     // decode
312     hb_qsv_space *dec_space;
313     // encode
314     hb_qsv_space *enc_space;
315     // vpp
316     hb_qsv_list *vpp_space;
317 
318     hb_qsv_list *pipes;
319 
320     // MediaSDK starting from API version 1.6 includes DecodeTimeStamp
321     // in addition to TimeStamp
322     // see also HB_QSV_MSDK_VERSION_MINOR , HB_QSV_MSDK_VERSION_MAJOR
323     hb_qsv_list *dts_seq;
324 
325     // practically pthread_mutex_t
326     void *qts_seq_mutex;
327 
328     int is_anex;
329 
330     void *qsv_config;
331 
332     int num_cpu_filters;
333     int la_is_enabled;
334     int qsv_filters_are_enabled;
335     int full_path_is_enabled;
336     char *vpp_scale_mode;
337     char *vpp_interpolation_method;
338     char *qsv_device;
339     int dx_index;
340     AVBufferRef *hb_hw_device_ctx;
341     HBQSVFramesContext *hb_dec_qsv_frames_ctx;
342     HBQSVFramesContext *hb_vpp_qsv_frames_ctx;
343 
344     mfxHDL device_manager_handle;
345     mfxHandleType device_manager_handle_type;
346     void *device_context;
347 } hb_qsv_context;
348 
349 typedef enum {
350     QSV_PART_ANY = 0,
351     QSV_PART_LOWER,
352     QSV_PART_UPPER
353 } hb_qsv_split;
354 
355 typedef struct {
356     int64_t dts;
357 } hb_qsv_dts;
358 
359 typedef struct hb_qsv_alloc_frame {
360     mfxU32 id;
361     mfxFrameInfo info;
362 } hb_qsv_alloc_frame;
363 
364 typedef struct hb_qsv_alloc_buffer {
365     mfxU32 id;
366     mfxU32 nbytes;
367     mfxU16 type;
368 } hb_qsv_alloc_buffer;
369 
370 typedef struct hb_qsv_allocators_space {
371     hb_qsv_space *space;
372     mfxFrameAllocator frame_alloc;
373     mfxBufferAllocator buffer_alloc;
374 } hb_qsv_allocators_space;
375 
376 typedef struct hb_qsv_config {
377     /**
378      * Set asynch depth of processing with QSV
379      * Format: 0 and more
380      *
381      * - encoding: Set by user.
382      * - decoding: Set by user.
383      */
384     int async_depth;
385 
386     /**
387      * Range of numbers that indicate trade-offs between quality and speed.
388      * Format: from 1/MFX_TARGETUSAGE_BEST_QUALITY to 7/MFX_TARGETUSAGE_BEST_SPEED inclusive
389      *
390      * - encoding: Set by user.
391      * - decoding: unused
392      */
393     int target_usage;
394 
395     /**
396      * Number of reference frames; if NumRefFrame = 0, this parameter is not specified.
397      * Format: 0 and more
398      *
399      * - encoding: Set by user.
400      * - decoding: unused
401      */
402     int num_ref_frame;
403 
404     /**
405      * Distance between I- or P- key frames; if it is zero, the GOP structure is unspecified.
406      * Note: If GopRefDist = 1, there are no B-frames used.
407      *
408      * - encoding: Set by user.
409      * - decoding: unused
410      */
411      int gop_ref_dist;
412 
413     /**
414      * Number of pictures within the current GOP (Group of Pictures); if GopPicSize=0,
415      * then the GOP size is unspecified. If GopPicSize=1, only I-frames are used.
416      *
417      * - encoding: Set by user.
418      * - decoding: unused
419      */
420      int gop_pic_size;
421 
422     /**
423      * Set type of surfaces used with QSV
424      * Format: "IOPattern enum" of Media SDK
425      *
426      * - encoding: Set by user.
427      * - decoding: Set by user.
428      */
429     int io_pattern;
430 
431     /**
432      * Set amount of additional surfaces might be needed
433      * Format: amount of additional buffers(surfaces+syncs)
434      * to allocate in advance
435      *
436      * - encoding: Set by user.
437      * - decoding: Set by user.
438      */
439     int additional_buffers;
440 
441     /**
442      * If pipeline should be sync.
443      * Format: wait time in milliseconds,
444      *         HB_QSV_SYNC_TIME_DEFAULT/10000 might be a good value
445      *
446      * - encoding: Set by user.
447      * - decoding: Set by user.
448      */
449     int sync_need;
450 
451     /**
452      * Type of implementation needed
453      *
454      * - encoding: Set by user.
455      * - decoding: Set by user.
456      */
457     int impl_requested;
458 
459     /**
460      * if QSV usage is multithreaded.
461      * Format: Yes/No, 1/0
462      *
463      * - encoding: Set by user.
464      * - decoding: Set by user.
465      */
466     int usage_threaded;
467 
468     /**
469      * if QSV use an external allocation (valid per session/mfxSession)
470      * Format: pointer to allocators, if default: 0
471      *
472      * note that:
473      * System Memory:   can be used without provided and external allocator,
474      *  meaning MediaSDK will use an internal one
475      * Video Memory:    in this case - we must provide an external allocator
476      * Also, Media SDK session doesn't require external allocator if the application
477      *  uses opaque memory
478      *
479      * Calls SetFrameAllocator/SetBufferAllocator
480      * (MFXVideoCORE_SetFrameAllocator/MFXVideoCORE_SetBufferAllocator)
481      * are to pass allocators to Media SDK
482      *
483      * - encoding: Set by user.
484      * - decoding: Set by user.
485      */
486     hb_qsv_allocators_space *allocators;
487 
488 } hb_qsv_config;
489 
490 #define ANEX_UNKNOWN    0
491 #define ANEX_PREFIX     1
492 #define ANEX_NO_PREFIX  2
493 
494 static const uint8_t ff_prefix_code[] = { 0x00, 0x00, 0x00, 0x01 };
495 
496 int hb_qsv_get_free_sync(hb_qsv_space *, hb_qsv_context *);
497 int hb_qsv_get_free_surface(hb_qsv_space *, hb_qsv_context *, mfxFrameInfo *,
498                      hb_qsv_split);
499 int hb_qsv_get_free_encode_task(hb_qsv_list *);
500 
501 int av_is_qsv_available(mfxIMPL, mfxVersion *);
502 int hb_qsv_wait_on_sync(hb_qsv_context *, hb_qsv_stage *);
503 
504 void hb_qsv_add_context_usage(hb_qsv_context *, int);
505 
506 void hb_qsv_pipe_list_create(hb_qsv_list **, int);
507 void hb_qsv_pipe_list_clean(hb_qsv_list **);
508 
509 void hb_qsv_add_stagee(hb_qsv_list **, hb_qsv_stage *, int);
510 hb_qsv_stage *hb_qsv_get_last_stage(hb_qsv_list *);
511 hb_qsv_list *hb_qsv_pipe_by_stage(hb_qsv_list *, hb_qsv_stage *);
512 void hb_qsv_flush_stages(hb_qsv_list *, hb_qsv_list **);
513 
514 void hb_qsv_dts_ordered_insert(hb_qsv_context *, int, int, int64_t, int);
515 void hb_qsv_dts_pop(hb_qsv_context *);
516 
517 hb_qsv_stage *hb_qsv_stage_init(void);
518 void hb_qsv_stage_clean(hb_qsv_stage **);
519 int hb_qsv_context_clean(hb_qsv_context *, int);
520 
521 int ff_qsv_is_sync_in_pipe(mfxSyncPoint *, hb_qsv_context *);
522 int ff_qsv_is_surface_in_pipe(mfxFrameSurface1 *, hb_qsv_context *);
523 
524 hb_qsv_list *hb_qsv_list_init(int);
525 int hb_qsv_list_lock(hb_qsv_list *);
526 int hb_qsv_list_unlock(hb_qsv_list *);
527 int hb_qsv_list_add(hb_qsv_list *, void *);
528 void hb_qsv_list_rem(hb_qsv_list *, void *);
529 void hb_qsv_list_insert(hb_qsv_list *, int, void *);
530 void hb_qsv_list_close(hb_qsv_list **);
531 
532 int hb_qsv_list_count(hb_qsv_list *);
533 void *hb_qsv_list_item(hb_qsv_list *, int);
534 
535 /* @} */
536 
537 #endif // HANDBRAKE_QSV_LIBAV_H
538