1# This file is part of Xpra.
2# Copyright (C) 2013-2021 Antoine Martin <antoine@xpra.org>
3# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
4# later version. See the file COPYING for details.
5
6import binascii
7import os
8import numpy
9import platform
10from collections import deque
11from time import monotonic
12import ctypes
13from ctypes import cdll, POINTER
14from threading import Lock
15from pycuda import driver
16
17from xpra.os_util import WIN32, LINUX, strtobytes
18from xpra.make_thread import start_thread
19from xpra.util import AtomicInteger, engs, csv, pver, envint, envbool, first_time, typedict
20from xpra.codecs.cuda_common.cuda_context import (
21    init_all_devices, get_devices, get_device_name,
22    get_cuda_info, get_pycuda_info, reset_state,
23    get_CUDA_function, record_device_failure, record_device_success, CUDA_ERRORS_INFO,
24    )
25from xpra.codecs.codec_constants import video_spec, TransientCodecException
26from xpra.codecs.image_wrapper import ImageWrapper
27from xpra.codecs.nv_util import (
28    get_nvidia_module_version, get_license_keys,
29    validate_driver_yuv444lossless, get_cards,
30    )
31from xpra.log import Logger
32
33log = Logger("encoder", "nvenc")
34
35from libc.stdint cimport uintptr_t, uint8_t, uint16_t, uint32_t, int32_t, uint64_t  #pylint: disable=syntax-error
36from libc.stdlib cimport free, malloc
37from libc.string cimport memset, memcpy
38
39TEST_ENCODINGS = os.environ.get("XPRA_NVENC_ENCODINGS", "h264,h265").split(",")
40assert (x for x in TEST_ENCODINGS in ("h264", "h265")), "invalid list of encodings: %s" % (TEST_ENCODINGS,)
41assert len(TEST_ENCODINGS)>0, "no encodings enabled!"
42DESIRED_PRESET = os.environ.get("XPRA_NVENC_PRESET", "")
43#NVENC requires compute capability value 0x30 or above:
44cdef int MIN_COMPUTE = 0x30
45
46cdef int SUPPORT_30BPP = envbool("XPRA_NVENC_SUPPORT_30BPP", True)
47cdef int YUV444_THRESHOLD = envint("XPRA_NVENC_YUV444_THRESHOLD", 85)
48cdef int LOSSLESS_THRESHOLD = envint("XPRA_NVENC_LOSSLESS_THRESHOLD", 100)
49cdef int NATIVE_RGB = int(not WIN32)
50NATIVE_RGB = envbool("XPRA_NVENC_NATIVE_RGB", NATIVE_RGB)
51cdef int LOSSLESS_ENABLED = envbool("XPRA_NVENC_LOSSLESS", True)
52cdef int YUV420_ENABLED = envbool("XPRA_NVENC_YUV420P", True)
53cdef int YUV444_ENABLED = envbool("XPRA_NVENC_YUV444P", True)
54cdef int DEBUG_API = envbool("XPRA_NVENC_DEBUG_API", False)
55cdef int GPU_MEMCOPY = envbool("XPRA_NVENC_GPU_MEMCOPY", True)
56cdef int CONTEXT_LIMIT = envint("XPRA_NVENC_CONTEXT_LIMIT", 32)
57cdef int THREADED_INIT = envbool("XPRA_NVENC_THREADED_INIT", True)
58cdef int SLOW_DOWN_INIT = envint("XPRA_NVENC_SLOW_DOWN_INIT", 0)
59
60
61device_lock = Lock()
62
63
64cdef int QP_MAX_VALUE = 51   #newer versions of ffmpeg can decode up to 63
65
66YUV444_CODEC_SUPPORT = {
67    "h264"  : False,
68    "h265"  : False,
69    }
70LOSSLESS_CODEC_SUPPORT = {}
71
72#so we can warn just once per unknown preset:
73UNKNOWN_PRESETS = []
74
75
76cdef inline int MIN(int a, int b):
77    if a<=b:
78        return a
79    return b
80cdef inline int MAX(int a, int b):
81    if a>=b:
82        return a
83    return b
84
85
86CUresult = ctypes.c_int
87CUcontext = ctypes.c_void_p
88
89
90cdef extern from "nvEncodeAPI.h":
91    ctypedef int NVENCSTATUS
92    ctypedef void* NV_ENC_INPUT_PTR
93    ctypedef void* NV_ENC_OUTPUT_PTR
94    ctypedef void* NV_ENC_REGISTERED_PTR
95
96    #not available with driver version 367.35
97    #NVENCSTATUS NvEncodeAPIGetMaxSupportedVersion(uint32_t* version)
98
99    ctypedef enum NV_ENC_CAPS:
100        NV_ENC_CAPS_NUM_MAX_BFRAMES
101        NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES
102        NV_ENC_CAPS_SUPPORT_FIELD_ENCODING
103        NV_ENC_CAPS_SUPPORT_MONOCHROME
104        NV_ENC_CAPS_SUPPORT_FMO
105        NV_ENC_CAPS_SUPPORT_QPELMV
106        NV_ENC_CAPS_SUPPORT_BDIRECT_MODE
107        NV_ENC_CAPS_SUPPORT_CABAC
108        NV_ENC_CAPS_SUPPORT_ADAPTIVE_TRANSFORM
109        NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS
110        NV_ENC_CAPS_SUPPORT_HIERARCHICAL_PFRAMES
111        NV_ENC_CAPS_SUPPORT_HIERARCHICAL_BFRAMES
112        NV_ENC_CAPS_LEVEL_MAX
113        NV_ENC_CAPS_LEVEL_MIN
114        NV_ENC_CAPS_SEPARATE_COLOUR_PLANE
115        NV_ENC_CAPS_WIDTH_MAX
116        NV_ENC_CAPS_HEIGHT_MAX
117        NV_ENC_CAPS_SUPPORT_TEMPORAL_SVC
118        NV_ENC_CAPS_SUPPORT_DYN_RES_CHANGE
119        NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE
120        NV_ENC_CAPS_SUPPORT_DYN_FORCE_CONSTQP
121        NV_ENC_CAPS_SUPPORT_DYN_RCMODE_CHANGE
122        NV_ENC_CAPS_SUPPORT_SUBFRAME_READBACK
123        NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING
124        NV_ENC_CAPS_SUPPORT_INTRA_REFRESH
125        NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE
126        NV_ENC_CAPS_SUPPORT_DYNAMIC_SLICE_MODE
127        NV_ENC_CAPS_SUPPORT_REF_PIC_INVALIDATION
128        NV_ENC_CAPS_PREPROC_SUPPORT
129        NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT
130        NV_ENC_CAPS_MB_NUM_MAX
131        NV_ENC_CAPS_EXPOSED_COUNT
132        NV_ENC_CAPS_SUPPORT_YUV444_ENCODE
133        NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE
134        NV_ENC_CAPS_SUPPORT_SAO
135        NV_ENC_CAPS_SUPPORT_MEONLY_MODE
136        NV_ENC_CAPS_SUPPORT_LOOKAHEAD
137        NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ
138        NV_ENC_CAPS_SUPPORT_10BIT_ENCODE
139        NV_ENC_CAPS_NUM_MAX_LTR_FRAMES
140        NV_ENC_CAPS_SUPPORT_WEIGHTED_PREDICTION
141        NV_ENC_CAPS_DYNAMIC_QUERY_ENCODER_CAPACITY
142        NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE
143        NV_ENC_CAPS_SUPPORT_EMPHASIS_LEVEL_MAP
144        #added in 9.1:
145        #NV_ENC_CAPS_WIDTH_MIN
146        #NV_ENC_CAPS_HEIGHT_MIN
147        #NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES
148
149
150    ctypedef enum NV_ENC_DEVICE_TYPE:
151        NV_ENC_DEVICE_TYPE_DIRECTX
152        NV_ENC_DEVICE_TYPE_CUDA
153        NV_ENC_DEVICE_TYPE_OPENGL
154
155    ctypedef enum NV_ENC_INPUT_RESOURCE_TYPE:
156        NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX
157        NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR
158        NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY
159        NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX
160
161    ctypedef enum NV_ENC_MEMORY_HEAP:
162        NV_ENC_MEMORY_HEAP_AUTOSELECT
163        NV_ENC_MEMORY_HEAP_VID
164        NV_ENC_MEMORY_HEAP_SYSMEM_CACHED
165        NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED
166
167    ctypedef enum NV_ENC_H264_ENTROPY_CODING_MODE:
168        NV_ENC_H264_ENTROPY_CODING_MODE_AUTOSELECT
169        NV_ENC_H264_ENTROPY_CODING_MODE_CABAC
170        NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC
171
172    ctypedef enum NV_ENC_STEREO_PACKING_MODE:
173        NV_ENC_STEREO_PACKING_MODE_NONE
174        NV_ENC_STEREO_PACKING_MODE_CHECKERBOARD
175        NV_ENC_STEREO_PACKING_MODE_COLINTERLEAVE
176        NV_ENC_STEREO_PACKING_MODE_ROWINTERLEAVE
177        NV_ENC_STEREO_PACKING_MODE_SIDEBYSIDE
178        NV_ENC_STEREO_PACKING_MODE_TOPBOTTOM
179        NV_ENC_STEREO_PACKING_MODE_FRAMESEQ
180
181    ctypedef enum NV_ENC_H264_FMO_MODE:
182        NV_ENC_H264_FMO_AUTOSELECT
183        NV_ENC_H264_FMO_ENABLE
184        NV_ENC_H264_FMO_DISABLE
185
186    ctypedef enum NV_ENC_H264_BDIRECT_MODE:
187        NV_ENC_H264_BDIRECT_MODE_AUTOSELECT
188        NV_ENC_H264_BDIRECT_MODE_DISABLE
189        NV_ENC_H264_BDIRECT_MODE_TEMPORAL
190        NV_ENC_H264_BDIRECT_MODE_SPATIAL
191
192    ctypedef enum NV_ENC_H264_ADAPTIVE_TRANSFORM_MODE:
193        NV_ENC_H264_ADAPTIVE_TRANSFORM_AUTOSELECT
194        NV_ENC_H264_ADAPTIVE_TRANSFORM_DISABLE
195        NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE
196
197    ctypedef enum NV_ENC_PARAMS_FRAME_FIELD_MODE:
198        NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME
199        NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD
200        NV_ENC_PARAMS_FRAME_FIELD_MODE_MBAFF
201
202    ctypedef enum NV_ENC_BUFFER_FORMAT:
203        NV_ENC_BUFFER_FORMAT_UNDEFINED
204        NV_ENC_BUFFER_FORMAT_NV12
205        NV_ENC_BUFFER_FORMAT_YV12
206        NV_ENC_BUFFER_FORMAT_IYUV
207        NV_ENC_BUFFER_FORMAT_YUV444
208        NV_ENC_BUFFER_FORMAT_YUV420_10BIT
209        NV_ENC_BUFFER_FORMAT_YUV444_10BIT
210        NV_ENC_BUFFER_FORMAT_ARGB
211        NV_ENC_BUFFER_FORMAT_ARGB10
212        NV_ENC_BUFFER_FORMAT_AYUV
213        NV_ENC_BUFFER_FORMAT_ABGR
214        NV_ENC_BUFFER_FORMAT_ABGR10
215
216    ctypedef enum NV_ENC_PIC_FLAGS:
217        NV_ENC_PIC_FLAG_FORCEINTRA
218        NV_ENC_PIC_FLAG_FORCEIDR
219        NV_ENC_PIC_FLAG_OUTPUT_SPSPPS
220        NV_ENC_PIC_FLAG_EOS
221
222    ctypedef enum NV_ENC_PIC_STRUCT:
223        NV_ENC_PIC_STRUCT_FRAME
224        NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM
225        NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP
226
227    ctypedef enum NV_ENC_PIC_TYPE:
228        NV_ENC_PIC_TYPE_P
229        NV_ENC_PIC_TYPE_B
230        NV_ENC_PIC_TYPE_I
231        NV_ENC_PIC_TYPE_IDR
232        NV_ENC_PIC_TYPE_BI
233        NV_ENC_PIC_TYPE_SKIPPED
234        NV_ENC_PIC_TYPE_INTRA_REFRESH
235        NV_ENC_PIC_TYPE_UNKNOWN
236
237    ctypedef enum NV_ENC_SLICE_TYPE:
238        NV_ENC_SLICE_TYPE_DEFAULT
239        NV_ENC_SLICE_TYPE_I
240        NV_ENC_SLICE_TYPE_UNKNOWN
241
242    ctypedef enum  NV_ENC_MV_PRECISION:
243        NV_ENC_MV_PRECISION_FULL_PEL
244        NV_ENC_MV_PRECISION_HALF_PEL
245        NV_ENC_MV_PRECISION_QUARTER_PEL
246
247    ctypedef enum NV_ENC_LEVEL:
248        NV_ENC_LEVEL_AUTOSELECT
249        NV_ENC_LEVEL_H264_1
250        NV_ENC_LEVEL_H264_1b
251        NV_ENC_LEVEL_H264_11
252        NV_ENC_LEVEL_H264_12
253        NV_ENC_LEVEL_H264_13
254        NV_ENC_LEVEL_H264_2
255        NV_ENC_LEVEL_H264_21
256        NV_ENC_LEVEL_H264_22
257        NV_ENC_LEVEL_H264_3
258        NV_ENC_LEVEL_H264_31
259        NV_ENC_LEVEL_H264_32
260        NV_ENC_LEVEL_H264_4
261        NV_ENC_LEVEL_H264_41
262        NV_ENC_LEVEL_H264_42
263        NV_ENC_LEVEL_H264_5
264        NV_ENC_LEVEL_H264_51
265        NV_ENC_LEVEL_H264_52
266        NV_ENC_LEVEL_HEVC_1
267        NV_ENC_LEVEL_HEVC_2
268        NV_ENC_LEVEL_HEVC_21
269        NV_ENC_LEVEL_HEVC_3
270        NV_ENC_LEVEL_HEVC_31
271        NV_ENC_LEVEL_HEVC_4
272        NV_ENC_LEVEL_HEVC_41
273        NV_ENC_LEVEL_HEVC_5
274        NV_ENC_LEVEL_HEVC_51
275        NV_ENC_LEVEL_HEVC_52
276        NV_ENC_LEVEL_HEVC_6
277        NV_ENC_LEVEL_HEVC_61
278        NV_ENC_LEVEL_HEVC_62
279
280    ctypedef enum NV_ENC_PARAMS_RC_MODE:
281        NV_ENC_PARAMS_RC_CONSTQP            #Constant QP mode
282        NV_ENC_PARAMS_RC_VBR                #Variable bitrate mode
283        NV_ENC_PARAMS_RC_CBR                #Constant bitrate mode
284        NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ    #low-delay CBR, high quality
285        NV_ENC_PARAMS_RC_CBR_HQ             #CBR, high quality (slower)
286        NV_ENC_PARAMS_RC_VBR_HQ
287        #SDK 7 names (deprecated):
288        NV_ENC_PARAMS_RC_VBR_MINQP          #Variable bitrate mode with MinQP
289        NV_ENC_PARAMS_RC_2_PASS_QUALITY     #Multi pass encoding optimized for image quality and works only with low latency mode
290        NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP   #Multi pass encoding optimized for maintaining frame size and works only with low latency mode
291        NV_ENC_PARAMS_RC_2_PASS_VBR         #Multi pass VBR
292        NV_ENC_PARAMS_RC_CBR2               #(deprecated)
293
294    ctypedef enum NV_ENC_HEVC_CUSIZE:
295        NV_ENC_HEVC_CUSIZE_AUTOSELECT
296        NV_ENC_HEVC_CUSIZE_8x8
297        NV_ENC_HEVC_CUSIZE_16x16
298        NV_ENC_HEVC_CUSIZE_32x32
299        NV_ENC_HEVC_CUSIZE_64x64
300
301
302    ctypedef struct NV_ENC_LOCK_BITSTREAM:
303        uint32_t    version             #[in]: Struct version. Must be set to ::NV_ENC_LOCK_BITSTREAM_VER.
304        uint32_t    doNotWait           #[in]: If this flag is set, the NvEncodeAPI interface will return buffer pointer even if operation is not completed. If not set, the call will block until operation completes.
305        uint32_t    ltrFrame            #[out]: Flag indicating this frame is marked as LTR frame
306        uint32_t    reservedBitFields   #[in]: Reserved bit fields and must be set to 0
307        void*       outputBitstream     #[in]: Pointer to the bitstream buffer being locked.
308        uint32_t*   sliceOffsets        #[in,out]: Array which receives the slice offsets. Currently used only when NV_ENC_CONFIG_H264::sliceMode == 3. Array size must be equal to NV_ENC_CONFIG_H264::sliceModeData.
309        uint32_t    frameIdx            #[out]: Frame no. for which the bitstream is being retrieved.
310        uint32_t    hwEncodeStatus      #[out]: The NvEncodeAPI interface status for the locked picture.
311        uint32_t    numSlices           #[out]: Number of slices in the encoded picture. Will be reported only if NV_ENC_INITIALIZE_PARAMS::reportSliceOffsets set to 1.
312        uint32_t    bitstreamSizeInBytes#[out]: Actual number of bytes generated and copied to the memory pointed by bitstreamBufferPtr.
313        uint64_t    outputTimeStamp     #[out]: Presentation timestamp associated with the encoded output.
314        uint64_t    outputDuration      #[out]: Presentation duration associates with the encoded output.
315        void*       bitstreamBufferPtr  #[out]: Pointer to the generated output bitstream. Client should allocate sufficiently large buffer to hold the encoded output. Client is responsible for managing this memory.
316        NV_ENC_PIC_TYPE     pictureType #[out]: Picture type of the encoded picture.
317        NV_ENC_PIC_STRUCT   pictureStruct   #[out]: Structure of the generated output picture.
318        uint32_t    frameAvgQP          #[out]: Average QP of the frame.
319        uint32_t    frameSatd           #[out]: Total SATD cost for whole frame.
320        uint32_t    ltrFrameIdx         #[out]: Frame index associated with this LTR frame.
321        uint32_t    ltrFrameBitmap      #[out]: Bitmap of LTR frames indices which were used for encoding this frame. Value of 0 if no LTR frames were used.
322        uint32_t    reserved[236]       #[in]: Reserved and must be set to 0
323        void*       reserved2[64]       #[in]: Reserved and must be set to NULL
324
325    ctypedef struct NV_ENC_LOCK_INPUT_BUFFER:
326        uint32_t    version             #[in]:  Struct version. Must be set to ::NV_ENC_LOCK_INPUT_BUFFER_VER.
327        uint32_t    doNotWait           #[in]:  Set to 1 to make ::NvEncLockInputBuffer() a unblocking call. If the encoding is not completed, driver will return ::NV_ENC_ERR_ENCODER_BUSY error code.
328        uint32_t    reservedBitFields   #[in]:  Reserved bitfields and must be set to 0
329        NV_ENC_INPUT_PTR inputBuffer    #[in]:  Pointer to the input buffer to be locked, client should pass the pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource API.
330        void*       bufferDataPtr       #[out]: Pointed to the locked input buffer data. Client can only access input buffer using the \p bufferDataPtr.
331        uint32_t    pitch               #[out]: Pitch of the locked input buffer.
332        uint32_t    reserved1[251]      #[in]:  Reserved and must be set to 0
333        void*       reserved2[64]       #[in]:  Reserved and must be set to NULL
334
335    ctypedef struct NV_ENC_STAT:
336        uint32_t    version             #[in]:  Struct version. Must be set to ::NV_ENC_STAT_VER.
337        uint32_t    reserved            #[in]:  Reserved and must be set to 0
338        NV_ENC_OUTPUT_PTR outputBitStream   #[out]: Specifies the pointer to output bitstream.
339        uint32_t    bitStreamSize       #[out]: Size of generated bitstream in bytes.
340        uint32_t    picType             #[out]: Picture type of encoded picture. See ::NV_ENC_PIC_TYPE.
341        uint32_t    lastValidByteOffset #[out]: Offset of last valid bytes of completed bitstream
342        uint32_t    sliceOffsets[16]    #[out]: Offsets of each slice
343        uint32_t    picIdx              #[out]: Picture number
344        uint32_t    reserved1[233]      #[in]:  Reserved and must be set to 0
345        void*       reserved2[64]       #[in]:  Reserved and must be set to NULL
346
347    ctypedef struct NV_ENC_SEQUENCE_PARAM_PAYLOAD:
348        pass
349    ctypedef struct NV_ENC_EVENT_PARAMS:
350        pass
351    ctypedef struct NV_ENC_MAP_INPUT_RESOURCE:
352        uint32_t    version             #[in]:  Struct version. Must be set to ::NV_ENC_MAP_INPUT_RESOURCE_VER.
353        uint32_t    subResourceIndex    #[in]:  Deprecated. Do not use.
354        void*       inputResource       #[in]:  Deprecated. Do not use.
355        NV_ENC_REGISTERED_PTR registeredResource    #[in]:  The Registered resource handle obtained by calling NvEncRegisterInputResource.
356        NV_ENC_INPUT_PTR mappedResource #[out]: Mapped pointer corresponding to the registeredResource. This pointer must be used in NV_ENC_PIC_PARAMS::inputBuffer parameter in ::NvEncEncodePicture() API.
357        NV_ENC_BUFFER_FORMAT mappedBufferFmt    #[out]: Buffer format of the outputResource. This buffer format must be used in NV_ENC_PIC_PARAMS::bufferFmt if client using the above mapped resource pointer.
358        uint32_t    reserved1[251]      #[in]:  Reserved and must be set to 0.
359        void*       reserved2[63]       #[in]:  Reserved and must be set to NULL
360    ctypedef struct NV_ENC_REGISTER_RESOURCE:
361        uint32_t    version             #[in]: Struct version. Must be set to ::NV_ENC_REGISTER_RESOURCE_VER.
362        NV_ENC_INPUT_RESOURCE_TYPE  resourceType    #[in]: Specifies the type of resource to be registered. Supported values are ::NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX, ::NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR.
363        uint32_t    width               #[in]: Input buffer Width.
364        uint32_t    height              #[in]: Input buffer Height.
365        uint32_t    pitch               #[in]: Input buffer Pitch.
366        uint32_t    subResourceIndex    #[in]: Subresource Index of the DirectX resource to be registered. Should eb set to 0 for other interfaces.
367        void*       resourceToRegister  #[in]: Handle to the resource that is being registered.
368        NV_ENC_REGISTERED_PTR   registeredResource  #[out]: Registered resource handle. This should be used in future interactions with the Nvidia Video Encoder Interface.
369        NV_ENC_BUFFER_FORMAT    bufferFormat        #[in]: Buffer format of resource to be registered.
370        uint32_t    reserved1[248]      #[in]: Reserved and must be set to 0.
371        void*       reserved2[62]       #[in]: Reserved and must be set to NULL.
372
373    ctypedef struct GUID:
374        uint32_t Data1
375        uint16_t Data2
376        uint16_t Data3
377        uint8_t  Data4[8]
378
379    #Encode Codec GUIDS supported by the NvEncodeAPI interface.
380    GUID NV_ENC_CODEC_H264_GUID
381    GUID NV_ENC_CODEC_HEVC_GUID
382
383    #Profiles:
384    GUID NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID
385    GUID NV_ENC_H264_PROFILE_BASELINE_GUID
386    GUID NV_ENC_H264_PROFILE_MAIN_GUID
387    GUID NV_ENC_H264_PROFILE_HIGH_GUID
388    GUID NV_ENC_H264_PROFILE_HIGH_444_GUID
389    GUID NV_ENC_H264_PROFILE_STEREO_GUID
390    #GUID NV_ENC_H264_PROFILE_SVC_TEMPORAL_SCALABILTY
391    GUID NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID
392    GUID NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID
393
394    GUID NV_ENC_HEVC_PROFILE_MAIN_GUID
395    GUID NV_ENC_HEVC_PROFILE_MAIN10_GUID
396    GUID NV_ENC_HEVC_PROFILE_FREXT_GUID
397
398    #Presets:
399    GUID NV_ENC_PRESET_DEFAULT_GUID
400    GUID NV_ENC_PRESET_HP_GUID
401    GUID NV_ENC_PRESET_HQ_GUID
402    GUID NV_ENC_PRESET_BD_GUID
403    #V3 ONLY PRESETS:
404    GUID NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID
405    GUID NV_ENC_PRESET_LOW_LATENCY_HQ_GUID
406    GUID NV_ENC_PRESET_LOW_LATENCY_HP_GUID
407    #V4 ONLY PRESETS:
408    GUID NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID
409    GUID NV_ENC_PRESET_LOSSLESS_HP_GUID
410    #V10:
411    GUID NV_ENC_PRESET_P1_GUID  #FC0A8D3E-45F8-4CF8-80C7-298871590EBF
412    GUID NV_ENC_PRESET_P2_GUID  #F581CFB8-88D6-4381-93F0-DF13F9C27DAB
413    GUID NV_ENC_PRESET_P3_GUID  #36850110-3A07-441F-94D5-3670631F91F6
414    GUID NV_ENC_PRESET_P4_GUID  #90A7B826-DF06-4862-B9D2-CD6D73A08681
415    GUID NV_ENC_PRESET_P5_GUID  #21C6E6B4-297A-4CBA-998F-B6CBDE72ADE3
416    GUID NV_ENC_PRESET_P6_GUID  #8E75C279-6299-4AB6-8302-0B215A335CF5
417    GUID NV_ENC_PRESET_P7_GUID  #84848C12-6F71-4C13-931B-53E283F57974
418
419    ctypedef struct NV_ENC_CAPS_PARAM:
420        uint32_t    version
421        uint32_t    capsToQuery
422        uint32_t    reserved[62]
423
424    ctypedef struct NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS:
425        uint32_t    version         #[in]: Struct version. Must be set to ::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER.
426        NV_ENC_DEVICE_TYPE deviceType   #[in]: (NV_ENC_DEVICE_TYPE) Specified the device Type
427        void        *device         #[in]: Pointer to client device.
428        GUID        *reserved       #[in]: Pointer to a GUID key issued to the client.
429        uint32_t    apiVersion      #[in]: API version. Should be set to NVENCAPI_VERSION.
430        uint32_t    reserved1[253]  #[in]: Reserved and must be set to 0
431        void        *reserved2[64]  #[in]: Reserved and must be set to NULL
432
433    ctypedef struct NV_ENC_CREATE_INPUT_BUFFER:
434        uint32_t    version         #[in]: Struct version. Must be set to ::NV_ENC_CREATE_INPUT_BUFFER_VER
435        uint32_t    width           #[in]: Input buffer width
436        uint32_t    height          #[in]: Input buffer width
437        NV_ENC_MEMORY_HEAP memoryHeap       #[in]: Deprecated. Do not use
438        NV_ENC_BUFFER_FORMAT bufferFmt      #[in]: Input buffer format
439        uint32_t    reserved        #[in]: Reserved and must be set to 0
440        void        *inputBuffer    #[out]: Pointer to input buffer
441        void        *pSysMemBuffer  #[in]: Pointer to existing sysmem buffer
442        uint32_t    reserved1[57]   #[in]: Reserved and must be set to 0
443        void        *reserved2[63]  #[in]: Reserved and must be set to NULL
444
445    ctypedef struct NV_ENC_CREATE_BITSTREAM_BUFFER:
446        uint32_t    version         #[in]: Struct version. Must be set to ::NV_ENC_CREATE_BITSTREAM_BUFFER_VER
447        uint32_t    size            #[in]: Size of the bitstream buffer to be created
448        NV_ENC_MEMORY_HEAP memoryHeap      #[in]: Deprecated. Do not use
449        uint32_t    reserved        #[in]: Reserved and must be set to 0
450        void        *bitstreamBuffer#[out]: Pointer to the output bitstream buffer
451        void        *bitstreamBufferPtr #[out]: Reserved and should not be used
452        uint32_t    reserved1[58]   #[in]: Reserved and should be set to 0
453        void*       reserved2[64]   #[in]: Reserved and should be set to NULL
454
455    ctypedef struct NV_ENC_QP:
456        uint32_t    qpInterP
457        uint32_t    qpInterB
458        uint32_t    qpIntra
459
460    ctypedef struct NV_ENC_CONFIG_HEVC_VUI_PARAMETERS:
461        uint32_t    overscanInfoPresentFlag         #[in]: if set to 1 , it specifies that the overscanInfo is present
462        uint32_t    overscanInfo                    #[in]: Specifies the overscan info(as defined in Annex E of the ITU-T Specification).
463        uint32_t    videoSignalTypePresentFlag      #[in]: If set to 1, it specifies  that the videoFormat, videoFullRangeFlag and colourDescriptionPresentFlag are present. */
464        uint32_t    videoFormat                     #[in]: Specifies the source video format(as defined in Annex E of the ITU-T Specification).*/
465        uint32_t    videoFullRangeFlag              #[in]: Specifies the output range of the luma and chroma samples(as defined in Annex E of the ITU-T Specification). */
466        uint32_t    colourDescriptionPresentFlag    #[in]: If set to 1, it specifies that the colourPrimaries, transferCharacteristics and colourMatrix are present. */
467        uint32_t    colourPrimaries                 #[in]: Specifies color primaries for converting to RGB(as defined in Annex E of the ITU-T Specification) */
468        uint32_t    transferCharacteristics         #[in]: Specifies the opto-electronic transfer characteristics to use (as defined in Annex E of the ITU-T Specification) */
469        uint32_t    colourMatrix                    #[in]: Specifies the matrix coefficients used in deriving the luma and chroma from the RGB primaries (as defined in Annex E of the ITU-T Specification). */
470        uint32_t    chromaSampleLocationFlag        #[in]: if set to 1 , it specifies that the chromaSampleLocationTop and chromaSampleLocationBot are present.*/
471        uint32_t    chromaSampleLocationTop         #[in]: Specifies the chroma sample location for top field(as defined in Annex E of the ITU-T Specification) */
472        uint32_t    chromaSampleLocationBot         #[in]: Specifies the chroma sample location for bottom field(as defined in Annex E of the ITU-T Specification) */
473        uint32_t    bitstreamRestrictionFlag        #[in]: if set to 1, it specifies the bitstream restriction parameters are present in the bitstream.*/
474        uint32_t    reserved[15]
475
476    ctypedef struct NV_ENC_CONFIG_H264_VUI_PARAMETERS:
477        uint32_t    overscanInfoPresentFlag         #[in]: if set to 1 , it specifies that the overscanInfo is present
478        uint32_t    overscanInfo                    #[in]: Specifies the overscan info(as defined in Annex E of the ITU-T Specification).
479        uint32_t    videoSignalTypePresentFlag      #[in]: If set to 1, it specifies  that the videoFormat, videoFullRangeFlag and colourDescriptionPresentFlag are present.
480        uint32_t    videoFormat                     #[in]: Specifies the source video format(as defined in Annex E of the ITU-T Specification).
481        uint32_t    videoFullRangeFlag              #[in]: Specifies the output range of the luma and chroma samples(as defined in Annex E of the ITU-T Specification).
482        uint32_t    colourDescriptionPresentFlag    #[in]: If set to 1, it specifies that the colourPrimaries, transferCharacteristics and colourMatrix are present.
483        uint32_t    colourPrimaries                 #[in]: Specifies color primaries for converting to RGB(as defined in Annex E of the ITU-T Specification)
484        uint32_t    transferCharacteristics         #[in]: Specifies the opto-electronic transfer characteristics to use (as defined in Annex E of the ITU-T Specification)
485        uint32_t    colourMatrix                    #[in]: Specifies the matrix coefficients used in deriving the luma and chroma from the RGB primaries (as defined in Annex E of the ITU-T Specification).
486        uint32_t    chromaSampleLocationFlag        #[in]: if set to 1 , it specifies that thechromaSampleLocationTop and chromaSampleLocationBot are present.
487        uint32_t    chromaSampleLocationTop         #[in]: Specifies the chroma sample location for top field(as defined in Annex E of the ITU-T Specification)
488        uint32_t    chromaSampleLocationBot         #[in]: Specifies the chroma sample location for bottom field(as defined in Annex E of the ITU-T Specification)
489        uint32_t    bitstreamRestrictionFlag        #[in]: if set to 1, it speficies the bitstream restriction parameters are present in the bitstream.
490        uint32_t    reserved[15]
491
492    ctypedef struct NV_ENC_CONFIG_H264:
493        uint32_t    enableTemporalSVC   #[in]: Set to 1 to enable SVC temporal
494        uint32_t    enableStereoMVC     #[in]: Set to 1 to enable stereo MVC
495        uint32_t    hierarchicalPFrames #[in]: Set to 1 to enable hierarchical PFrames
496        uint32_t    hierarchicalBFrames #[in]: Set to 1 to enable hierarchical BFrames
497        uint32_t    outputBufferingPeriodSEI    #[in]: Set to 1 to write SEI buffering period syntax in the bitstream
498        uint32_t    outputPictureTimingSEI      #[in]: Set to 1 to write SEI picture timing syntax in the bitstream
499        uint32_t    outputAUD                   #[in]: Set to 1 to write access unit delimiter syntax in bitstream
500        uint32_t    disableSPSPPS               #[in]: Set to 1 to disable writing of Sequence and Picture parameter info in bitstream
501        uint32_t    outputFramePackingSEI       #[in]: Set to 1 to enable writing of frame packing arrangement SEI messages to bitstream
502        uint32_t    outputRecoveryPointSEI      #[in]: Set to 1 to enable writing of recovery point SEI message
503        uint32_t    enableIntraRefresh          #[in]: Set to 1 to enable gradual decoder refresh or intra refresh. If the GOP structure uses B frames this will be ignored
504        uint32_t    enableConstrainedEncoding   #[in]: Set this to 1 to enable constrainedFrame encoding where each slice in the constarined picture is independent of other slices
505                                                #Check support for constrained encoding using ::NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING caps.
506        uint32_t    repeatSPSPPS        #[in]: Set to 1 to enable writing of Sequence and Picture parameter for every IDR frame
507        uint32_t    enableVFR           #[in]: Set to 1 to enable variable frame rate.
508        uint32_t    enableLTR           #[in]: Currently this feature is not available and must be set to 0. Set to 1 to enable LTR support and auto-mark the first
509        uint32_t    qpPrimeYZeroTransformBypassFlag #[in]  To enable lossless encode set this to 1, set QP to 0 and RC_mode to NV_ENC_PARAMS_RC_CONSTQP and profile to HIGH_444_PREDICTIVE_PROFILE
510                                                    #Check support for lossless encoding using ::NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE caps.
511        uint32_t    useConstrainedIntraPred         #[in]: Set 1 to enable constrained intra prediction.
512        uint32_t    reservedBitFields[15]       #[in]: Reserved bitfields and must be set to 0
513        uint32_t    level               #[in]: Specifies the encoding level. Client is recommended to set this to NV_ENC_LEVEL_AUTOSELECT in order to enable the NvEncodeAPI interface to select the correct level.
514        uint32_t    idrPeriod           #[in]: Specifies the IDR interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG.Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically.
515        uint32_t    separateColourPlaneFlag     #[in]: Set to 1 to enable 4:4:4 separate colour planes
516        uint32_t    disableDeblockingFilterIDC  #[in]: Specifies the deblocking filter mode. Permissible value range: [0,2]
517        uint32_t    numTemporalLayers   #[in]: Specifies max temporal layers to be used for hierarchical coding. Valid value range is [1,::NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS]
518        uint32_t    spsId               #[in]: Specifies the SPS id of the sequence header.
519        uint32_t    ppsId               #[in]: Specifies the PPS id of the picture header.
520        NV_ENC_H264_ADAPTIVE_TRANSFORM_MODE adaptiveTransformMode   #[in]: Specifies the AdaptiveTransform Mode. Check support for AdaptiveTransform mode using ::NV_ENC_CAPS_SUPPORT_ADAPTIVE_TRANSFORM caps.
521        NV_ENC_H264_FMO_MODE fmoMode    #[in]: Specified the FMO Mode. Check support for FMO using ::NV_ENC_CAPS_SUPPORT_FMO caps.
522        NV_ENC_H264_BDIRECT_MODE bdirectMode    #[in]: Specifies the BDirect mode. Check support for BDirect mode using ::NV_ENC_CAPS_SUPPORT_BDIRECT_MODE caps.
523        NV_ENC_H264_ENTROPY_CODING_MODE entropyCodingMode   #[in]: Specifies the entropy coding mode. Check support for CABAC mode using ::NV_ENC_CAPS_SUPPORT_CABAC caps.
524        NV_ENC_STEREO_PACKING_MODE stereoMode   #[in]: Specifies the stereo frame packing mode which is to be signalled in frame packing arrangement SEI
525        uint32_t    intraRefreshPeriod  #[in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set.
526                                        #Will be disabled if NV_ENC_CONFIG::gopLength is not set to NVENC_INFINITE_GOPLENGTH.
527        uint32_t    intraRefreshCnt     #[in]: Specifies the length of intra refresh in number of frames for periodic intra refresh. This value should be smaller than intraRefreshPeriod
528        uint32_t    maxNumRefFrames     #[in]: Specifies the DPB size used for encoding. Setting it to 0 will let driver use the default dpb size.
529                                        #The low latency application which wants to invalidate reference frame as an error resilience tool
530                                        #is recommended to use a large DPB size so that the encoder can keep old reference frames which can be used if recent
531                                        #frames are invalidated.
532        uint32_t    sliceMode           #[in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices
533                                        #sliceMode = 0 MB based slices, sliceMode = 1 Byte based slices, sliceMode = 2 MB row based slices, sliceMode = 3, numSlices in Picture
534                                        #When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting
535                                        #When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice
536        uint32_t    sliceModeData       #[in]: Specifies the parameter needed for sliceMode. For:
537                                        #sliceMode = 0, sliceModeData specifies # of MBs in each slice (except last slice)
538                                        #sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)
539                                        #sliceMode = 2, sliceModeData specifies # of MB rows in each slice (except last slice)
540                                        #sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally
541        NV_ENC_CONFIG_H264_VUI_PARAMETERS h264VUIParameters   #[in]: Specifies the H264 video usability info pamameters
542        uint32_t    ltrNumFrames        #[in]: Specifies the number of LTR frames used. Additionally, encoder will mark the first numLTRFrames base layer reference frames within each IDR interval as LTR
543        uint32_t    ltrTrustMode        #[in]: Specifies the LTR operating mode. Set to 0 to disallow encoding using LTR frames until later specified. Set to 1 to allow encoding using LTR frames unless later invalidated.
544        uint32_t    chromaFormatIDC     #[in]: Specifies the chroma format. Should be set to 1 for yuv420 input, 3 for yuv444 input.
545                                        #Check support for YUV444 encoding using ::NV_ENC_CAPS_SUPPORT_YUV444_ENCODE caps.
546        uint32_t    maxTemporalLayers   #[in]: Specifies the max temporal layer used for hierarchical coding.
547        uint32_t    reserved1[270]      #[in]: Reserved and must be set to 0
548        void        *reserved2[64]      #[in]: Reserved and must be set to NULL
549
550    ctypedef struct NV_ENC_CONFIG_HEVC:
551        uint32_t    level               #[in]: Specifies the level of the encoded bitstream.
552        uint32_t    tier                #[in]: Specifies the level tier of the encoded bitstream.
553        NV_ENC_HEVC_CUSIZE minCUSize    #[in]: Specifies the minimum size of luma coding unit.
554        NV_ENC_HEVC_CUSIZE maxCUSize    #[in]: Specifies the maximum size of luma coding unit. Currently NVENC SDK only supports maxCUSize equal to NV_ENC_HEVC_CUSIZE_32x32.
555        uint32_t    useConstrainedIntraPred             #[in]: Set 1 to enable constrained intra prediction.
556        uint32_t    disableDeblockAcrossSliceBoundary   #[in]: Set 1 to disable in loop filtering across slice boundary.
557        uint32_t    outputBufferingPeriodSEI            #[in]: Set 1 to write SEI buffering period syntax in the bitstream
558        uint32_t    outputPictureTimingSEI              #[in]: Set 1 to write SEI picture timing syntax in the bitstream
559        uint32_t    outputAUD                           #[in]: Set 1 to write Access Unit Delimiter syntax.
560        uint32_t    enableLTR                           #[in]: Set 1 to enable use of long term reference pictures for inter prediction.
561        uint32_t    disableSPSPPS                       #[in]: Set 1 to disable VPS,SPS and PPS signalling in the bitstream.
562        uint32_t    repeatSPSPPS                        #[in]: Set 1 to output VPS,SPS and PPS for every IDR frame.
563        uint32_t    enableIntraRefresh                  #[in]: Set 1 to enable gradual decoder refresh or intra refresh. If the GOP structure uses B frames this will be ignored
564        uint32_t    chromaFormatIDC                     #[in]: Specifies the chroma format. Should be set to 1 for yuv420 input, 3 for yuv444 input.
565        uint32_t    pixelBitDepthMinus8                 #[in]: Specifies pixel bit depth minus 8. Should be set to 0 for 8 bit input, 2 for 10 bit input.
566        uint32_t    reserved                            #[in]: Reserved bitfields.
567        uint32_t    idrPeriod                           #[in]: Specifies the IDR interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG.Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically.
568        uint32_t    intraRefreshPeriod                  #[in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set.
569                                                        #Will be disabled if NV_ENC_CONFIG::gopLength is not set to NVENC_INFINITE_GOPLENGTH.
570        uint32_t    intraRefreshCnt                     #[in]: Specifies the length of intra refresh in number of frames for periodic intra refresh. This value should be smaller than intraRefreshPeriod
571        uint32_t    maxNumRefFramesInDPB                #[in]: Specifies the maximum number of references frames in the DPB.
572        uint32_t    ltrNumFrames                        #[in]: Specifies the maximum number of long term references can be used for prediction
573        uint32_t    vpsId                               #[in]: Specifies the VPS id of the video parameter set. Currently reserved and must be set to 0.
574        uint32_t    spsId                               #[in]: Specifies the SPS id of the sequence header. Currently reserved and must be set to 0.
575        uint32_t    ppsId                               #[in]: Specifies the PPS id of the picture header. Currently reserved and must be set to 0.
576        uint32_t    sliceMode                           #[in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices
577                                                        #sliceMode = 0 CTU based slices, sliceMode = 1 Byte based slices, sliceMode = 2 CTU row based slices, sliceMode = 3, numSlices in Picture
578                                                        #When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice
579        uint32_t    sliceModeData                       #[in]: Specifies the parameter needed for sliceMode. For:
580                                                        #sliceMode = 0, sliceModeData specifies # of CTUs in each slice (except last slice)
581                                                        #sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)
582                                                        #sliceMode = 2, sliceModeData specifies # of CTU rows in each slice (except last slice)
583                                                        #sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally
584        uint32_t    maxTemporalLayersMinus1             #[in]: Specifies the max temporal layer used for hierarchical coding.
585        NV_ENC_CONFIG_HEVC_VUI_PARAMETERS hevcVUIParameters #Specifies the HEVC video usability info pamameters
586        uint32_t    reserved1[218]                      #[in]: Reserved and must be set to 0.
587        void*       reserved2[64]                       #[in]: Reserved and must be set to NULL
588
589    ctypedef struct NV_ENC_CODEC_CONFIG:
590        NV_ENC_CONFIG_H264  h264Config                  #[in]: Specifies the H.264-specific encoder configuration
591        NV_ENC_CONFIG_HEVC  hevcConfig                  #[in]: Specifies the HEVC-specific encoder configuration. Currently unsupported and must not to be used.
592        uint32_t            reserved[256]               #[in]: Reserved and must be set to 0
593
594    ctypedef struct NV_ENC_RC_PARAMS:
595        uint32_t    version
596        NV_ENC_PARAMS_RC_MODE rateControlMode   #[in]: Specifies the rate control mode. Check support for various rate control modes using ::NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES caps.
597        NV_ENC_QP   constQP             #[in]: Specifies the initial QP to be used for encoding, these values would be used for all frames if in Constant QP mode.
598        uint32_t    averageBitRate      #[in]: Specifies the average bitrate(in bits/sec) used for encoding.
599        uint32_t    maxBitRate          #[in]: Specifies the maximum bitrate for the encoded output. This is used for VBR and ignored for CBR mode.
600        uint32_t    vbvBufferSize       #[in]: Specifies the VBV(HRD) buffer size. in bits. Set 0 to use the default VBV  buffer size.
601        uint32_t    vbvInitialDelay     #[in]: Specifies the VBV(HRD) initial delay in bits. Set 0 to use the default VBV  initial delay
602        uint32_t    enableMinQP         #[in]: Set this to 1 if minimum QP used for rate control.
603        uint32_t    enableMaxQP         #[in]: Set this to 1 if maximum QP used for rate control.
604        uint32_t    enableInitialRCQP   #[in]: Set this to 1 if user suppplied initial QP is used for rate control.
605        uint32_t    enableAQ            #[in]: Set this to 1 to enable adaptive quantization.
606        uint32_t    reservedBitField1   #[in]: Reserved bitfields and must be set to 0
607        uint32_t    reservedBitFields[27] #[in]: Reserved bitfields and must be set to 0
608        NV_ENC_QP   minQP               #[in]: Specifies the minimum QP used for rate control. Client must set NV_ENC_CONFIG::enableMinQP to 1.
609        NV_ENC_QP   maxQP               #[in]: Specifies the maximum QP used for rate control. Client must set NV_ENC_CONFIG::enableMaxQP to 1.
610        NV_ENC_QP   initialRCQP         #[in]: Specifies the initial QP used for rate control. Client must set NV_ENC_CONFIG::enableInitialRCQP to 1.
611        uint32_t    temporallayerIdxMask#[in]: Specifies the temporal layers (as a bitmask) whose QPs have changed. Valid max bitmask is [2^NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS - 1]
612        uint8_t     temporalLayerQP[8]  #[in]: Specifies the temporal layer QPs used for rate control. Temporal layer index is used as as the array index
613        uint8_t     targetQuality       #[in]: Target CQ (Constant Quality) level for VBR mode (range 0-51 with 0-automatic)
614        uint8_t     targetQualityLSB    #[in]: Fractional part of target quality (as 8.8 fixed point format)
615        uint16_t    lookaheadDepth      #[in]: Maximum depth of lookahead with range 0-32 (only used if enableLookahead=1)
616        uint32_t    reserved[9]
617
618    ctypedef struct NV_ENC_CONFIG:
619        uint32_t    version             #[in]: Struct version. Must be set to ::NV_ENC_CONFIG_VER.
620        GUID        profileGUID         #[in]: Specifies the codec profile guid. If client specifies \p NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID the NvEncodeAPI interface will select the appropriate codec profile.
621        uint32_t    gopLength           #[in]: Specifies the number of pictures in one GOP. Low latency application client can set goplength to NVENC_INFINITE_GOPLENGTH so that keyframes are not inserted automatically.
622        int32_t     frameIntervalP      #[in]: Specifies the GOP pattern as follows: \p frameIntervalP = 0: I, 1: IPP, 2: IBP, 3: IBBP  If goplength is set to NVENC_INFINITE_GOPLENGTH \p frameIntervalP should be set to 1.
623        uint32_t    monoChromeEncoding  #[in]: Set this to 1 to enable monochrome encoding for this session.
624        NV_ENC_PARAMS_FRAME_FIELD_MODE  frameFieldMode      #[in]: Specifies the frame/field mode. Check support for field encoding using ::NV_ENC_CAPS_SUPPORT_FIELD_ENCODING caps.
625        NV_ENC_MV_PRECISION mvPrecision #[in]: Specifies the desired motion vector prediction precision.
626        NV_ENC_RC_PARAMS    rcParams    #[in]: Specifies the rate control parameters for the current encoding session.
627        NV_ENC_CODEC_CONFIG encodeCodecConfig   #[in]: Specifies the codec specific config parameters through this union.
628        uint32_t    reserved[278]       #[in]: Reserved and must be set to 0
629        void        *reserved2[64]      #[in]: Reserved and must be set to NULL
630
631    ctypedef enum NV_ENC_TUNING_INFO:
632        NV_ENC_TUNING_INFO_UNDEFINED            #Undefined tuningInfo. Invalid value for encoding
633        NV_ENC_TUNING_INFO_HIGH_QUALITY         #Tune presets for latency tolerant encoding
634        NV_ENC_TUNING_INFO_LOW_LATENCY          #Tune presets for low latency streaming
635        NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY    #Tune presets for ultra low latency streaming
636        NV_ENC_TUNING_INFO_LOSSLESS             #Tune presets for lossless encoding
637        NV_ENC_TUNING_INFO_COUNT                #Count number of tuningInfos. Invalid value
638
639
640    ctypedef struct NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE:
641        uint32_t    numCandsPerBlk16x16 #[in]: Specifies the number of candidates per 16x16 block.
642        uint32_t    numCandsPerBlk16x8  #[in]: Specifies the number of candidates per 16x8 block.
643        uint32_t    numCandsPerBlk8x16  #[in]: Specifies the number of candidates per 8x16 block.
644        uint32_t    numCandsPerBlk8x8   #[in]: Specifies the number of candidates per 8x8 block.
645        uint32_t    reserved            #[in]: Reserved for padding.
646        uint32_t    reserved1[3]        #[in]: Reserved for future use.
647
648    ctypedef struct NV_ENC_INITIALIZE_PARAMS:
649        uint32_t    version             #[in]: Struct version. Must be set to ::NV_ENC_INITIALIZE_PARAMS_VER.
650        GUID        encodeGUID          #[in]: Specifies the Encode GUID for which the encoder is being created. ::NvEncInitializeEncoder() API will fail if this is not set, or set to unsupported value.
651        GUID        presetGUID          #[in]: Specifies the preset for encoding. If the preset GUID is set then , the preset configuration will be applied before any other parameter.
652        uint32_t    encodeWidth         #[in]: Specifies the encode width. If not set ::NvEncInitializeEncoder() API will fail.
653        uint32_t    encodeHeight        #[in]: Specifies the encode height. If not set ::NvEncInitializeEncoder() API will fail.
654        uint32_t    darWidth            #[in]: Specifies the display aspect ratio Width.
655        uint32_t    darHeight           #[in]: Specifies the display aspect ratio height.
656        uint32_t    frameRateNum        #[in]: Specifies the numerator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen ).
657        uint32_t    frameRateDen        #[in]: Specifies the denominator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen ).
658        uint32_t    enableEncodeAsync   #[in]: Set this to 1 to enable asynchronous mode and is expected to use events to get picture completion notification.
659        uint32_t    enablePTD           #[in]: Set this to 1 to enable the Picture Type Decision is be taken by the NvEncodeAPI interface.
660        uint32_t    reportSliceOffsets  #[in]: Set this to 1 to enable reporting slice offsets in ::_NV_ENC_LOCK_BITSTREAM. Currently supported only for H264. Client must set this to 0 if NV_ENC_CONFIG_H264::sliceMode is 1
661        uint32_t    enableSubFrameWrite #[in]: Set this to 1 to write out available bitstream to memory at subframe intervals
662        uint32_t    enableExternalMEHints   #[in]: Set to 1 to enable external ME hints for the current frame. Currently this feature is supported only if NV_ENC_INITIALIZE_PARAMS::enablePTD to 0 or\p frameIntervalP = 1 (i.e no B frames).
663        uint32_t    enableMEOnlyMode    #[in] Set to 1 to enable ME Only Mode
664        uint32_t    reservedBitFields[28]   #[in]: Reserved bitfields and must be set to 0
665        uint32_t    privDataSize        #[in]: Reserved private data buffer size and must be set to 0
666        void        *privData           #[in]: Reserved private data buffer and must be set to NULL
667        NV_ENC_CONFIG *encodeConfig     #[in]: Specifies the advanced codec specific structure. If client has sent a valid codec config structure, it will override parameters set by the NV_ENC_INITIALIZE_PARAMS::presetGUID parameter. If set to NULL the NvEncodeAPI interface will use the NV_ENC_INITIALIZE_PARAMS::presetGUID to set the codec specific parameters.
668                                        #Client can also optionally query the NvEncodeAPI interface to get codec specific parameters for a presetGUID using ::NvEncGetEncodePresetConfig() API. It can then modify (if required) some of the codec config parameters and send down a custom config structure as part of ::_NV_ENC_INITIALIZE_PARAMS.
669                                        #Even in this case client is recommended to pass the same preset guid it has used in ::NvEncGetEncodePresetConfig() API to query the config structure; as NV_ENC_INITIALIZE_PARAMS::presetGUID. This will not override the custom config structure but will be used to determine other Encoder HW specific parameters not exposed in the API.
670        uint32_t    maxEncodeWidth      #[in]: Maximum encode width to be used for current Encode session.
671                                        #Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encoder will not allow dynamic resolution change.
672        uint32_t    maxEncodeHeight     #[in]: Maximum encode height to be allowed for current Encode session.
673                                        #Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encode will not allow dynamic resolution change.
674        NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE maxMEHintCountsPerBlock[2]  #[in]: If Client wants to pass external motion vectors in NV_ENC_PIC_PARAMS::meExternalHints buffer it must specify the maximum number of hint candidates per block per direction for the encode session.
675                                        #The NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[0] is for L0 predictors and NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[1] is for L1 predictors.
676                                        #This client must also set NV_ENC_INITIALIZE_PARAMS::enableExternalMEHints to 1.
677        uint32_t    reserved[289]       #[in]: Reserved and must be set to 0
678        void        *reserved2[64]      #[in]: Reserved and must be set to NULL
679
680    ctypedef struct NV_ENC_RECONFIGURE_PARAMS:
681        uint32_t    version             #[in]: Struct version. Must be set to ::NV_ENC_RECONFIGURE_PARAMS_VER.
682        NV_ENC_INITIALIZE_PARAMS reInitEncodeParams
683        uint32_t    resetEncoder        #[in]: This resets the rate control states and other internal encoder states. This should be used only with an IDR frame.
684                                        #If NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1, encoder will force the frame type to IDR
685        uint32_t    forceIDR            #[in]: Encode the current picture as an IDR picture. This flag is only valid when Picture type decision is taken by the Encoder
686                                        #[_NV_ENC_INITIALIZE_PARAMS::enablePTD == 1].
687        uint32_t    reserved
688
689    ctypedef struct NV_ENC_PRESET_CONFIG:
690        uint32_t    version             #[in]:  Struct version. Must be set to ::NV_ENC_PRESET_CONFIG_VER.
691        NV_ENC_CONFIG presetCfg         #[out]: preset config returned by the Nvidia Video Encoder interface.
692        uint32_t    reserved1[255]      #[in]: Reserved and must be set to 0
693        void*       reserved2[64]       #[in]: Reserved and must be set to NULL
694
695    ctypedef struct NV_ENC_H264_SEI_PAYLOAD:
696        uint32_t    payloadSize         #[in] SEI payload size in bytes. SEI payload must be byte aligned, as described in Annex D
697        uint32_t    payloadType         #[in] SEI payload types and syntax can be found in Annex D of the H.264 Specification.
698        uint8_t     *payload            #[in] pointer to user data
699    ctypedef NV_ENC_H264_SEI_PAYLOAD NV_ENC_SEI_PAYLOAD
700
701    ctypedef struct NV_ENC_PIC_PARAMS_H264:
702        uint32_t    displayPOCSyntax    #[in]: Specifies the display POC syntax This is required to be set if client is handling the picture type decision.
703        uint32_t    reserved3           #[in]: Reserved and must be set to 0
704        uint32_t    refPicFlag          #[in]: Set to 1 for a reference picture. This is ignored if NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1.
705        uint32_t    colourPlaneId       #[in]: Specifies the colour plane ID associated with the current input.
706        uint32_t    forceIntraRefreshWithFrameCnt   #[in]: Forces an intra refresh with duration equal to intraRefreshFrameCnt.
707                                        #When outputRecoveryPointSEI is set this is value is used for recovery_frame_cnt in recovery point SEI message
708                                        #forceIntraRefreshWithFrameCnt cannot be used if B frames are used in the GOP structure specified
709        uint32_t    constrainedFrame    #[in]: Set to 1 if client wants to encode this frame with each slice completely independent of other slices in the frame.
710                                        #NV_ENC_INITIALIZE_PARAMS::enableConstrainedEncoding should be set to 1
711        uint32_t    sliceModeDataUpdate #[in]: Set to 1 if client wants to change the sliceModeData field to speficy new sliceSize Parameter
712                                        #When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting
713        uint32_t    ltrMarkFrame        #[in]: Set to 1 if client wants to mark this frame as LTR
714        uint32_t    ltrUseFrames        #[in]: Set to 1 if client allows encoding this frame using the LTR frames specified in ltrFrameBitmap
715        uint32_t    reservedBitFields   #[in]: Reserved bit fields and must be set to 0
716        uint8_t*    sliceTypeData       #[in]: Array which specifies the slice type used to force intra slice for a particular slice. Currently supported only for NV_ENC_CONFIG_H264::sliceMode == 3.
717                                        #Client should allocate array of size sliceModeData where sliceModeData is specified in field of ::_NV_ENC_CONFIG_H264
718                                        #Array element with index n corresponds to nth slice. To force a particular slice to intra client should set corresponding array element to NV_ENC_SLICE_TYPE_I
719                                        #all other array elements should be set to NV_ENC_SLICE_TYPE_DEFAULT
720        uint32_t    sliceTypeArrayCnt   #[in]: Client should set this to the number of elements allocated in sliceTypeData array. If sliceTypeData is NULL then this should be set to 0
721        uint32_t    seiPayloadArrayCnt  #[in]: Specifies the number of elements allocated in  seiPayloadArray array.
722        NV_ENC_SEI_PAYLOAD *seiPayloadArray    #[in]: Array of SEI payloads which will be inserted for this frame.
723        uint32_t    sliceMode           #[in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices
724                                        #sliceMode = 0 MB based slices, sliceMode = 1 Byte based slices, sliceMode = 2 MB row based slices, sliceMode = 3, numSlices in Picture
725                                        #When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting
726                                        #When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice
727        uint32_t    sliceModeData       #[in]: Specifies the parameter needed for sliceMode. For:
728                                        #sliceMode = 0, sliceModeData specifies # of MBs in each slice (except last slice)
729                                        #sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)
730                                        #sliceMode = 2, sliceModeData specifies # of MB rows in each slice (except last slice)
731                                        #sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally
732        uint32_t    ltrMarkFrameIdx     #[in]: Specifies the long term referenceframe index to use for marking this frame as LTR.
733        uint32_t    ltrUseFrameBitmap   #[in]: Specifies the the associated bitmap of LTR frame indices when encoding this frame.
734        uint32_t    ltrUsageMode        #[in]: Specifies additional usage constraints for encoding using LTR frames from this point further. 0: no constraints, 1: no short term refs older than current, no previous LTR frames.
735        uint32_t    reserved[243]       #[in]: Reserved and must be set to 0.
736        void*       reserved2[62]       #[in]: Reserved and must be set to NULL.
737
738    ctypedef struct NV_ENC_PIC_PARAMS_HEVC:
739        uint32_t displayPOCSyntax       #[in]: Specifies the display POC syntax This is required to be set if client is handling the picture type decision.
740        uint32_t refPicFlag             #[in]: Set to 1 for a reference picture. This is ignored if NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1.
741        uint32_t temporalId             #[in]: Specifies the temporal id of the picture
742        uint32_t forceIntraRefreshWithFrameCnt  #[in]: Forces an intra refresh with duration equal to intraRefreshFrameCnt.
743                                        #When outputRecoveryPointSEI is set this is value is used for recovery_frame_cnt in recovery point SEI message
744                                        #forceIntraRefreshWithFrameCnt cannot be used if B frames are used in the GOP structure specified
745        uint32_t constrainedFrame       #[in]: Set to 1 if client wants to encode this frame with each slice completely independent of other slices in the frame
746                                        #NV_ENC_INITIALIZE_PARAMS::enableConstrainedEncoding should be set to 1
747        uint32_t sliceModeDataUpdate    #[in]: Set to 1 if client wants to change the sliceModeData field to specify new sliceSize Parameter
748                                        #When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting
749        uint32_t ltrMarkFrame           #[in]: Set to 1 if client wants to mark this frame as LTR
750        uint32_t ltrUseFrames           #[in]: Set to 1 if client allows encoding this frame using the LTR frames specified in ltrFrameBitmap
751        uint32_t reservedBitFields      #[in]: Reserved bit fields and must be set to 0
752        uint8_t* sliceTypeData          #[in]: Array which specifies the slice type used to force intra slice for a particular slice. Currently supported only for NV_ENC_CONFIG_H264::sliceMode == 3.
753                                        #Client should allocate array of size sliceModeData where sliceModeData is specified in field of ::_NV_ENC_CONFIG_H264
754                                        #Array element with index n corresponds to nth slice. To force a particular slice to intra client should set corresponding array element to NV_ENC_SLICE_TYPE_I
755                                        #all other array elements should be set to NV_ENC_SLICE_TYPE_DEFAULT
756        uint32_t sliceTypeArrayCnt      #[in]: Client should set this to the number of elements allocated in sliceTypeData array. If sliceTypeData is NULL then this should be set to 0
757        uint32_t sliceMode              #[in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices
758                                        #sliceMode = 0 CTU based slices, sliceMode = 1 Byte based slices, sliceMode = 2 CTU row based slices, sliceMode = 3, numSlices in Picture
759                                        #When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting
760                                        #When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice
761        uint32_t sliceModeData          #[in]: Specifies the parameter needed for sliceMode. For:
762                                        #sliceMode = 0, sliceModeData specifies # of CTUs in each slice (except last slice)
763                                        #sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)
764                                        #sliceMode = 2, sliceModeData specifies # of CTU rows in each slice (except last slice)
765                                        #sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally
766        uint32_t ltrMarkFrameIdx        #[in]: Specifies the long term reference frame index to use for marking this frame as LTR.
767        uint32_t ltrUseFrameBitmap      #[in]: Specifies the associated bitmap of LTR frame indices to use when encoding this frame.
768        uint32_t ltrUsageMode           #[in]: Not supported. Reserved for future use and must be set to 0.
769        uint32_t seiPayloadArrayCnt     #[in]: Specifies the number of elements allocated in  seiPayloadArray array.
770        uint32_t reserved               #[in]: Reserved and must be set to 0.
771        NV_ENC_SEI_PAYLOAD* seiPayloadArray #[in]: Array of SEI payloads which will be inserted for this frame.
772        uint32_t reserved2 [244]        #[in]: Reserved and must be set to 0.
773        void*    reserved3[61]          #[in]: Reserved and must be set to NULL.
774
775    ctypedef union NV_ENC_CODEC_PIC_PARAMS:
776        NV_ENC_PIC_PARAMS_H264 h264PicParams    #[in]: H264 encode picture params.
777        NV_ENC_PIC_PARAMS_HEVC hevcPicParams    #[in]: HEVC encode picture params.
778        uint32_t               reserved[256]    #[in]: Reserved and must be set to 0.
779
780    ctypedef struct NV_ENC_MEONLY_PARAMS:
781        uint32_t    version             #[in]: Struct version. Must be set to NV_ENC_MEONLY_PARAMS_VER.
782        uint32_t    inputWidth          #[in]: Specifies the input buffer width
783        uint32_t    inputHeight         #[in]: Specifies the input buffer height
784        NV_ENC_INPUT_PTR inputBuffer    #[in]: Specifies the input buffer pointer. Client must use a pointer obtained from NvEncCreateInputBuffer() or NvEncMapInputResource() APIs.
785        NV_ENC_INPUT_PTR referenceFrame #[in]: Specifies the reference frame pointer
786        NV_ENC_OUTPUT_PTR outputMV      #[in,out]: Specifies the pointer to output motion vector data buffer allocated by NvEncCreateMVBuffer.
787        NV_ENC_BUFFER_FORMAT bufferFmt  #[in]: Specifies the input buffer format.
788        uint32_t    reserved1[252]      #[in]: Reserved and must be set to 0
789        void* reserved2[61]             #[in]: Reserved and must be set to NULL
790
791    ctypedef struct NVENC_EXTERNAL_ME_HINT:
792        int32_t     mvx                 #[in]: Specifies the x component of integer pixel MV (relative to current MB) S12.0.
793        int32_t     mvy                 #[in]: Specifies the y component of integer pixel MV (relative to current MB) S10.0
794        int32_t     refidx              #[in]: Specifies the reference index (31=invalid). Current we support only 1 reference frame per direction for external hints, so \p refidx must be 0.
795        int32_t     dir                 #[in]: Specifies the direction of motion estimation . 0=L0 1=L1.
796        int32_t     partType            #[in]: Specifies the bloack partition type.0=16x16 1=16x8 2=8x16 3=8x8 (blocks in partition must be consecutive).
797        int32_t     lastofPart          #[in]: Set to 1 for the last MV of (sub) partition
798        int32_t     lastOfMB            #[in]: Set to 1 for the last MV of macroblock.
799
800    ctypedef struct NV_ENC_PIC_PARAMS:
801        uint32_t    version             #[in]: Struct version. Must be set to ::NV_ENC_PIC_PARAMS_VER.
802        uint32_t    inputWidth          #[in]: Specifies the input buffer width
803        uint32_t    inputHeight         #[in]: Specifies the input buffer height
804        uint32_t    inputPitch          #[in]: Specifies the input buffer pitch. If pitch value is not known, set this to inputWidth.
805        uint32_t    encodePicFlags      #[in]: Specifies bit-wise OR`ed encode pic flags. See ::NV_ENC_PIC_FLAGS enum.
806        uint32_t    frameIdx            #[in]: Specifies the frame index associated with the input frame [optional].
807        uint64_t    inputTimeStamp      #[in]: Specifies presentation timestamp associated with the input picture.
808        uint64_t    inputDuration       #[in]: Specifies duration of the input picture
809        NV_ENC_INPUT_PTR  inputBuffer   #[in]: Specifies the input buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs.
810        NV_ENC_OUTPUT_PTR outputBitstream #[in]: Specifies the pointer to output buffer. Client should use a pointer obtained from ::NvEncCreateBitstreamBuffer() API.
811        void*       completionEvent     #[in]: Specifies an event to be signalled on completion of encoding of this Frame [only if operating in Asynchronous mode]. Each output buffer should be associated with a distinct event pointer.
812        NV_ENC_BUFFER_FORMAT bufferFmt  #[in]: Specifies the input buffer format.
813        NV_ENC_PIC_STRUCT pictureStruct #[in]: Specifies structure of the input picture.
814        NV_ENC_PIC_TYPE pictureType     #[in]: Specifies input picture type. Client required to be set explicitly by the client if the client has not set NV_ENC_INITALIZE_PARAMS::enablePTD to 1 while calling NvInitializeEncoder.
815        NV_ENC_CODEC_PIC_PARAMS codecPicParams  #[in]: Specifies the codec specific per-picture encoding parameters.
816        uint32_t    newEncodeWidth      #[in]: Specifies the new output width for current Encoding session, in case of dynamic resolution change. Client should only set this in combination with NV_ENC_PIC_FLAGS::NV_ENC_PIC_FLAG_DYN_RES_CHANGE.
817                                        #Additionally, if Picture Type decision is handled by the Client [_NV_ENC_INITIALIZE_PARAMS::enablePTD == 0], the client should set the _NV_ENC_PIC_PARAMS::pictureType as ::NV_ENC_PIC_TYPE_IDR.
818                                        #If _NV_ENC_INITIALIZE_PARAMS::enablePTD == 1, then the Encoder will generate an IDR frame corresponding to this input.
819        uint32_t    newEncodeHeight     #[in]: Specifies the new output width for current Encoding session, in case of dynamic resolution change. Client should only set this in combination with NV_ENC_PIC_FLAGS::NV_ENC_PIC_FLAG_DYN_RES_CHANGE.
820                                        #Additionally, if Picture Type decision is handled by the Client [_NV_ENC_INITIALIZE_PARAMS::enablePTD == 0], the client should set the _NV_ENC_PIC_PARAMS::pictureType as ::NV_ENC_PIC_TYPE_IDR.
821                                        #If _NV_ENC_INITIALIZE_PARAMS::enablePTD == 1, then the Encoder will generate an IDR frame corresponding to this input.
822        NV_ENC_RC_PARAMS rcParams       #[in]: Specifies the rate control parameters for the current encoding session.
823        NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE meHintCountsPerBlock[2] #[in]: Specifies the number of hint candidates per block per direction for the current frame. meHintCountsPerBlock[0] is for L0 predictors and meHintCountsPerBlock[1] is for L1 predictors.
824                                        #The candidate count in NV_ENC_PIC_PARAMS::meHintCountsPerBlock[lx] must never exceed NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[lx] provided during encoder intialization.
825        NVENC_EXTERNAL_ME_HINT *meExternalHints     #[in]: Specifies the pointer to ME external hints for the current frame. The size of ME hint buffer should be equal to number of macroblocks multiplied by the total number of candidates per macroblock.
826                                        #The total number of candidates per MB per direction = 1*meHintCountsPerBlock[Lx].numCandsPerBlk16x16 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk16x8 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk8x8
827                                        # + 4*meHintCountsPerBlock[Lx].numCandsPerBlk8x8. For frames using bidirectional ME , the total number of candidates for single macroblock is sum of total number of candidates per MB for each direction (L0 and L1)
828        uint32_t    newDarWidth         #[in]: Specifies the new disalay aspect ratio width for current Encoding session, in case of dynamic resolution change. Client should only set this in combination with NV_ENC_PIC_FLAGS::NV_ENC_PIC_FLAG_DYN_RES_CHANGE.
829                                        #Additionally, if Picture Type decision is handled by the Client [_NV_ENC_INITIALIZE_PARAMS::enablePTD == 0], the client should set the _NV_ENC_PIC_PARAMS::pictureType as ::NV_ENC_PIC_TYPE_IDR.
830                                        #If _NV_ENC_INITIALIZE_PARAMS::enablePTD == 1, then the Encoder will generate an IDR frame corresponding to this input.
831        uint32_t    newDarHeight        #[in]: Specifies the new disalay aspect ratio height for current Encoding session, in case of dynamic resolution change. Client should only set this in combination with NV_ENC_PIC_FLAGS::NV_ENC_PIC_FLAG_DYN_RES_CHANGE.
832                                        #If _NV_ENC_INITIALIZE_PARAMS::enablePTD == 1, then the Encoder will generate an IDR frame corresponding to this input.
833        uint32_t    reserved1[259]      #[in]: Reserved and must be set to 0
834        void*       reserved2[63]       #[in]: Reserved and must be set to NULL
835
836    #NVENCSTATUS NvEncodeAPICreateInstance(NV_ENCODE_API_FUNCTION_LIST *functionList)
837
838    ctypedef NVENCSTATUS (*PNVENCOPENENCODESESSION)         (void* device, uint32_t deviceType, void** encoder) nogil
839    ctypedef NVENCSTATUS (*PNVENCGETENCODEGUIDCOUNT)        (void* encoder, uint32_t* encodeGUIDCount) nogil
840    ctypedef NVENCSTATUS (*PNVENCGETENCODEGUIDS)            (void* encoder, GUID* GUIDs, uint32_t guidArraySize, uint32_t* GUIDCount) nogil
841    ctypedef NVENCSTATUS (*PNVENCGETENCODEPROFILEGUIDCOUNT) (void* encoder, GUID encodeGUID, uint32_t* encodeProfileGUIDCount) nogil
842    ctypedef NVENCSTATUS (*PNVENCGETENCODEPROFILEGUIDS)     (void* encoder, GUID encodeGUID, GUID* profileGUIDs, uint32_t guidArraySize, uint32_t* GUIDCount) nogil
843    ctypedef NVENCSTATUS (*PNVENCGETINPUTFORMATCOUNT)       (void* encoder, GUID encodeGUID, uint32_t* inputFmtCount) nogil
844    ctypedef NVENCSTATUS (*PNVENCGETINPUTFORMATS)           (void* encoder, GUID encodeGUID, NV_ENC_BUFFER_FORMAT* inputFmts, uint32_t inputFmtArraySize, uint32_t* inputFmtCount) nogil
845    ctypedef NVENCSTATUS (*PNVENCGETENCODECAPS)             (void* encoder, GUID encodeGUID, NV_ENC_CAPS_PARAM* capsParam, int* capsVal) nogil
846    ctypedef NVENCSTATUS (*PNVENCGETENCODEPRESETCOUNT)      (void* encoder, GUID encodeGUID, uint32_t* encodePresetGUIDCount) nogil
847    ctypedef NVENCSTATUS (*PNVENCGETENCODEPRESETGUIDS)      (void* encoder, GUID encodeGUID, GUID* presetGUIDs, uint32_t guidArraySize, uint32_t* encodePresetGUIDCount) nogil
848    ctypedef NVENCSTATUS (*PNVENCGETENCODEPRESETCONFIG)     (void* encoder, GUID encodeGUID, GUID  presetGUID, NV_ENC_PRESET_CONFIG* presetConfig) nogil
849    ctypedef NVENCSTATUS (*PNVENCGETENCODEPRESETCONFIGEX)   (void* encoder, GUID encodeGUID, GUID  presetGUID, NV_ENC_TUNING_INFO tuningInfo, NV_ENC_PRESET_CONFIG* presetConfig)
850    ctypedef NVENCSTATUS (*PNVENCINITIALIZEENCODER)         (void* encoder, NV_ENC_INITIALIZE_PARAMS* createEncodeParams) nogil
851    ctypedef NVENCSTATUS (*PNVENCCREATEINPUTBUFFER)         (void* encoder, NV_ENC_CREATE_INPUT_BUFFER* createInputBufferParams) nogil
852    ctypedef NVENCSTATUS (*PNVENCDESTROYINPUTBUFFER)        (void* encoder, NV_ENC_INPUT_PTR inputBuffer) nogil
853    ctypedef NVENCSTATUS (*PNVENCCREATEBITSTREAMBUFFER)     (void* encoder, NV_ENC_CREATE_BITSTREAM_BUFFER* createBitstreamBufferParams) nogil
854    ctypedef NVENCSTATUS (*PNVENCDESTROYBITSTREAMBUFFER)    (void* encoder, NV_ENC_OUTPUT_PTR bitstreamBuffer) nogil
855    ctypedef NVENCSTATUS (*PNVENCENCODEPICTURE)             (void* encoder, NV_ENC_PIC_PARAMS* encodePicParams) nogil
856    ctypedef NVENCSTATUS (*PNVENCLOCKBITSTREAM)             (void* encoder, NV_ENC_LOCK_BITSTREAM* lockBitstreamBufferParams) nogil
857    ctypedef NVENCSTATUS (*PNVENCUNLOCKBITSTREAM)           (void* encoder, NV_ENC_OUTPUT_PTR bitstreamBuffer) nogil
858    ctypedef NVENCSTATUS (*PNVENCLOCKINPUTBUFFER)           (void* encoder, NV_ENC_LOCK_INPUT_BUFFER* lockInputBufferParams) nogil
859    ctypedef NVENCSTATUS (*PNVENCUNLOCKINPUTBUFFER)         (void* encoder, NV_ENC_INPUT_PTR inputBuffer) nogil
860    ctypedef NVENCSTATUS (*PNVENCGETENCODESTATS)            (void* encoder, NV_ENC_STAT* encodeStats) nogil
861    ctypedef NVENCSTATUS (*PNVENCGETSEQUENCEPARAMS)         (void* encoder, NV_ENC_SEQUENCE_PARAM_PAYLOAD* sequenceParamPayload) nogil
862    ctypedef NVENCSTATUS (*PNVENCREGISTERASYNCEVENT)        (void* encoder, NV_ENC_EVENT_PARAMS* eventParams) nogil
863    ctypedef NVENCSTATUS (*PNVENCUNREGISTERASYNCEVENT)      (void* encoder, NV_ENC_EVENT_PARAMS* eventParams) nogil
864    ctypedef NVENCSTATUS (*PNVENCMAPINPUTRESOURCE)          (void* encoder, NV_ENC_MAP_INPUT_RESOURCE* mapInputResParams) nogil
865    ctypedef NVENCSTATUS (*PNVENCUNMAPINPUTRESOURCE)        (void* encoder, NV_ENC_INPUT_PTR mappedInputBuffer) nogil
866    ctypedef NVENCSTATUS (*PNVENCDESTROYENCODER)            (void* encoder) nogil
867    ctypedef NVENCSTATUS (*PNVENCINVALIDATEREFFRAMES)       (void* encoder, uint64_t invalidRefFrameTimeStamp) nogil
868    ctypedef NVENCSTATUS (*PNVENCOPENENCODESESSIONEX)       (NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS *openSessionExParams, void** encoder) nogil
869    ctypedef NVENCSTATUS (*PNVENCREGISTERRESOURCE)          (void* encoder, NV_ENC_REGISTER_RESOURCE* registerResParams) nogil
870    ctypedef NVENCSTATUS (*PNVENCUNREGISTERRESOURCE)        (void* encoder, NV_ENC_REGISTERED_PTR registeredRes) nogil
871    ctypedef NVENCSTATUS (*PNVENCRECONFIGUREENCODER)        (void* encoder, NV_ENC_RECONFIGURE_PARAMS* reInitEncodeParams) nogil
872
873    ctypedef struct NV_ENCODE_API_FUNCTION_LIST:
874        uint32_t    version         #[in]: Client should pass NV_ENCODE_API_FUNCTION_LIST_VER.
875        uint32_t    reserved        #[in]: Reserved and should be set to 0.
876        PNVENCOPENENCODESESSION         nvEncOpenEncodeSession
877        PNVENCGETENCODEGUIDCOUNT        nvEncGetEncodeGUIDCount
878        PNVENCGETENCODEPROFILEGUIDCOUNT nvEncGetEncodeProfileGUIDCount
879        PNVENCGETENCODEPROFILEGUIDS     nvEncGetEncodeProfileGUIDs
880        PNVENCGETENCODEGUIDS            nvEncGetEncodeGUIDs
881        PNVENCGETINPUTFORMATCOUNT       nvEncGetInputFormatCount
882        PNVENCGETINPUTFORMATS           nvEncGetInputFormats
883        PNVENCGETENCODECAPS             nvEncGetEncodeCaps
884        PNVENCGETENCODEPRESETCOUNT      nvEncGetEncodePresetCount
885        PNVENCGETENCODEPRESETGUIDS      nvEncGetEncodePresetGUIDs
886        PNVENCGETENCODEPRESETCONFIG     nvEncGetEncodePresetConfig
887        PNVENCGETENCODEPRESETCONFIGEX   nvEncGetEncodePresetConfigEx
888        PNVENCINITIALIZEENCODER         nvEncInitializeEncoder
889        PNVENCCREATEINPUTBUFFER         nvEncCreateInputBuffer
890        PNVENCDESTROYINPUTBUFFER        nvEncDestroyInputBuffer
891        PNVENCCREATEBITSTREAMBUFFER     nvEncCreateBitstreamBuffer
892        PNVENCDESTROYBITSTREAMBUFFER    nvEncDestroyBitstreamBuffer
893        PNVENCENCODEPICTURE             nvEncEncodePicture
894        PNVENCLOCKBITSTREAM             nvEncLockBitstream
895        PNVENCUNLOCKBITSTREAM           nvEncUnlockBitstream
896        PNVENCLOCKINPUTBUFFER           nvEncLockInputBuffer
897        PNVENCUNLOCKINPUTBUFFER         nvEncUnlockInputBuffer
898        PNVENCGETENCODESTATS            nvEncGetEncodeStats
899        PNVENCGETSEQUENCEPARAMS         nvEncGetSequenceParams
900        PNVENCREGISTERASYNCEVENT        nvEncRegisterAsyncEvent
901        PNVENCUNREGISTERASYNCEVENT      nvEncUnregisterAsyncEvent
902        PNVENCMAPINPUTRESOURCE          nvEncMapInputResource
903        PNVENCUNMAPINPUTRESOURCE        nvEncUnmapInputResource
904        PNVENCDESTROYENCODER            nvEncDestroyEncoder
905        PNVENCINVALIDATEREFFRAMES       nvEncInvalidateRefFrames
906        PNVENCOPENENCODESESSIONEX       nvEncOpenEncodeSessionEx
907        PNVENCREGISTERRESOURCE          nvEncRegisterResource
908        PNVENCUNREGISTERRESOURCE        nvEncUnregisterResource
909        PNVENCRECONFIGUREENCODER        nvEncReconfigureEncoder
910        void*                           reserved2[285]                  #[in]:  Reserved and must be set to NULL
911
912    #constants:
913    unsigned int NVENCAPI_MAJOR_VERSION
914    unsigned int NVENCAPI_MINOR_VERSION
915    uint32_t NVENCAPI_VERSION
916    unsigned int NV_ENCODE_API_FUNCTION_LIST_VER
917    unsigned int NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER
918    unsigned int NV_ENC_INITIALIZE_PARAMS_VER
919    unsigned int NV_ENC_PRESET_CONFIG_VER
920    unsigned int NV_ENC_CONFIG_VER
921    unsigned int NV_ENC_CREATE_INPUT_BUFFER_VER
922    unsigned int NV_ENC_CREATE_BITSTREAM_BUFFER_VER
923    unsigned int NV_ENC_CAPS_PARAM_VER
924    unsigned int NV_ENC_LOCK_INPUT_BUFFER_VER
925    unsigned int NV_ENC_LOCK_BITSTREAM_VER
926    unsigned int NV_ENC_PIC_PARAMS_VER
927    unsigned int NV_ENC_RC_PARAMS_VER
928    unsigned int NV_ENC_REGISTER_RESOURCE_VER
929    unsigned int NV_ENC_MAP_INPUT_RESOURCE_VER
930    unsigned int NVENC_INFINITE_GOPLENGTH
931    unsigned int NV_ENC_SUCCESS
932    unsigned int NV_ENC_ERR_NO_ENCODE_DEVICE
933    unsigned int NV_ENC_ERR_UNSUPPORTED_DEVICE
934    unsigned int NV_ENC_ERR_INVALID_ENCODERDEVICE
935    unsigned int NV_ENC_ERR_INVALID_DEVICE
936    unsigned int NV_ENC_ERR_DEVICE_NOT_EXIST
937    unsigned int NV_ENC_ERR_INVALID_PTR
938    unsigned int NV_ENC_ERR_INVALID_EVENT
939    unsigned int NV_ENC_ERR_INVALID_PARAM
940    unsigned int NV_ENC_ERR_INVALID_CALL
941    unsigned int NV_ENC_ERR_OUT_OF_MEMORY
942    unsigned int NV_ENC_ERR_ENCODER_NOT_INITIALIZED
943    unsigned int NV_ENC_ERR_UNSUPPORTED_PARAM
944    unsigned int NV_ENC_ERR_LOCK_BUSY
945    unsigned int NV_ENC_ERR_NOT_ENOUGH_BUFFER
946    unsigned int NV_ENC_ERR_INVALID_VERSION
947    unsigned int NV_ENC_ERR_MAP_FAILED
948    unsigned int NV_ENC_ERR_NEED_MORE_INPUT
949    unsigned int NV_ENC_ERR_ENCODER_BUSY
950    unsigned int NV_ENC_ERR_EVENT_NOT_REGISTERD
951    unsigned int NV_ENC_ERR_GENERIC
952    unsigned int NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY
953    unsigned int NV_ENC_ERR_UNIMPLEMENTED
954    unsigned int NV_ENC_ERR_RESOURCE_REGISTER_FAILED
955    unsigned int NV_ENC_ERR_RESOURCE_NOT_REGISTERED
956    unsigned int NV_ENC_ERR_RESOURCE_NOT_MAPPED
957
958    unsigned int NV_ENC_CAPS_MB_PER_SEC_MAX
959    unsigned int NV_ENC_RECONFIGURE_PARAMS_VER
960
961
962NV_ENC_STATUS_TXT = {
963    NV_ENC_SUCCESS : "This indicates that API call returned with no errors.",
964    NV_ENC_ERR_NO_ENCODE_DEVICE       : "This indicates that no encode capable devices were detected",
965    NV_ENC_ERR_UNSUPPORTED_DEVICE     : "This indicates that devices pass by the client is not supported.",
966    NV_ENC_ERR_INVALID_ENCODERDEVICE  : "This indicates that the encoder device supplied by the client is not valid.",
967    NV_ENC_ERR_INVALID_DEVICE         : "This indicates that device passed to the API call is invalid.",
968    NV_ENC_ERR_DEVICE_NOT_EXIST       : """This indicates that device passed to the API call is no longer available and
969 needs to be reinitialized. The clients need to destroy the current encoder
970 session by freeing the allocated input output buffers and destroying the device
971 and create a new encoding session.""",
972    NV_ENC_ERR_INVALID_PTR            : "This indicates that one or more of the pointers passed to the API call is invalid.",
973    NV_ENC_ERR_INVALID_EVENT          : "This indicates that completion event passed in ::NvEncEncodePicture() call is invalid.",
974    NV_ENC_ERR_INVALID_PARAM          : "This indicates that one or more of the parameter passed to the API call is invalid.",
975    NV_ENC_ERR_INVALID_CALL           : "This indicates that an API call was made in wrong sequence/order.",
976    NV_ENC_ERR_OUT_OF_MEMORY          : "This indicates that the API call failed because it was unable to allocate enough memory to perform the requested operation.",
977    NV_ENC_ERR_ENCODER_NOT_INITIALIZED: """This indicates that the encoder has not been initialized with
978::NvEncInitializeEncoder() or that initialization has failed.
979The client cannot allocate input or output buffers or do any encoding
980related operation before successfully initializing the encoder.""",
981    NV_ENC_ERR_UNSUPPORTED_PARAM      : "This indicates that an unsupported parameter was passed by the client.",
982    NV_ENC_ERR_LOCK_BUSY              : """This indicates that the ::NvEncLockBitstream() failed to lock the output
983buffer. This happens when the client makes a non blocking lock call to
984access the output bitstream by passing NV_ENC_LOCK_BITSTREAM::doNotWait flag.
985This is not a fatal error and client should retry the same operation after
986few milliseconds.""",
987    NV_ENC_ERR_NOT_ENOUGH_BUFFER      : "This indicates that the size of the user buffer passed by the client is insufficient for the requested operation.",
988    NV_ENC_ERR_INVALID_VERSION        : "This indicates that an invalid struct version was used by the client.",
989    NV_ENC_ERR_MAP_FAILED             : "This indicates that ::NvEncMapInputResource() API failed to map the client provided input resource.",
990    NV_ENC_ERR_NEED_MORE_INPUT        : """
991This indicates encode driver requires more input buffers to produce an output
992bitstream. If this error is returned from ::NvEncEncodePicture() API, this
993is not a fatal error. If the client is encoding with B frames then,
994::NvEncEncodePicture() API might be buffering the input frame for re-ordering.
995A client operating in synchronous mode cannot call ::NvEncLockBitstream()
996API on the output bitstream buffer if ::NvEncEncodePicture() returned the
997::NV_ENC_ERR_NEED_MORE_INPUT error code.
998The client must continue providing input frames until encode driver returns
999::NV_ENC_SUCCESS. After receiving ::NV_ENC_SUCCESS status the client can call
1000::NvEncLockBitstream() API on the output buffers in the same order in which
1001it has called ::NvEncEncodePicture().
1002""",
1003    NV_ENC_ERR_ENCODER_BUSY : """This indicates that the HW encoder is busy encoding and is unable to encode
1004the input. The client should call ::NvEncEncodePicture() again after few milliseconds.""",
1005    NV_ENC_ERR_EVENT_NOT_REGISTERD : """This indicates that the completion event passed in ::NvEncEncodePicture()
1006API has not been registered with encoder driver using ::NvEncRegisterAsyncEvent().""",
1007    NV_ENC_ERR_GENERIC : "This indicates that an unknown internal error has occurred.",
1008    NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY  : "This indicates that the client is attempting to use a feature that is not available for the license type for the current system.",
1009    NV_ENC_ERR_UNIMPLEMENTED : "This indicates that the client is attempting to use a feature that is not implemented for the current version.",
1010    NV_ENC_ERR_RESOURCE_REGISTER_FAILED : "This indicates that the ::NvEncRegisterResource API failed to register the resource.",
1011    NV_ENC_ERR_RESOURCE_NOT_REGISTERED : "This indicates that the client is attempting to unregister a resource that has not been successfuly registered.",
1012    NV_ENC_ERR_RESOURCE_NOT_MAPPED : "This indicates that the client is attempting to unmap a resource that has not been successfuly mapped.",
1013      }
1014
1015OPEN_TRANSIENT_ERROR = (
1016    NV_ENC_ERR_NO_ENCODE_DEVICE,
1017    #NV_ENC_ERR_UNSUPPORTED_DEVICE,
1018    #NV_ENC_ERR_INVALID_ENCODERDEVICE,
1019    #NV_ENC_ERR_INVALID_DEVICE,
1020    NV_ENC_ERR_DEVICE_NOT_EXIST,
1021    NV_ENC_ERR_OUT_OF_MEMORY,
1022    NV_ENC_ERR_ENCODER_BUSY,
1023    NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY,
1024    )
1025
1026CAPS_NAMES = {
1027        NV_ENC_CAPS_NUM_MAX_BFRAMES             : "NUM_MAX_BFRAMES",
1028        NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES : "SUPPORTED_RATECONTROL_MODES",
1029        NV_ENC_CAPS_SUPPORT_FIELD_ENCODING      : "SUPPORT_FIELD_ENCODING",
1030        NV_ENC_CAPS_SUPPORT_MONOCHROME          : "SUPPORT_MONOCHROME",
1031        NV_ENC_CAPS_SUPPORT_FMO                 : "SUPPORT_FMO",
1032        NV_ENC_CAPS_SUPPORT_QPELMV              : "SUPPORT_QPELMV",
1033        NV_ENC_CAPS_SUPPORT_BDIRECT_MODE        : "SUPPORT_BDIRECT_MODE",
1034        NV_ENC_CAPS_SUPPORT_CABAC               : "SUPPORT_CABAC",
1035        NV_ENC_CAPS_SUPPORT_ADAPTIVE_TRANSFORM  : "SUPPORT_ADAPTIVE_TRANSFORM",
1036        NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS     : "NUM_MAX_TEMPORAL_LAYERS",
1037        NV_ENC_CAPS_SUPPORT_HIERARCHICAL_PFRAMES: "SUPPORT_HIERARCHICAL_PFRAMES",
1038        NV_ENC_CAPS_SUPPORT_HIERARCHICAL_BFRAMES: "SUPPORT_HIERARCHICAL_BFRAMES",
1039        NV_ENC_CAPS_LEVEL_MAX                   : "LEVEL_MAX",
1040        NV_ENC_CAPS_LEVEL_MIN                   : "LEVEL_MIN",
1041        NV_ENC_CAPS_SEPARATE_COLOUR_PLANE       : "SEPARATE_COLOUR_PLANE",
1042        NV_ENC_CAPS_WIDTH_MAX                   : "WIDTH_MAX",
1043        NV_ENC_CAPS_HEIGHT_MAX                  : "HEIGHT_MAX",
1044        NV_ENC_CAPS_SUPPORT_TEMPORAL_SVC        : "SUPPORT_TEMPORAL_SVC",
1045        NV_ENC_CAPS_SUPPORT_DYN_RES_CHANGE      : "SUPPORT_DYN_RES_CHANGE",
1046        NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE  : "SUPPORT_DYN_BITRATE_CHANGE",
1047        NV_ENC_CAPS_SUPPORT_DYN_FORCE_CONSTQP   : "SUPPORT_DYN_FORCE_CONSTQP",
1048        NV_ENC_CAPS_SUPPORT_DYN_RCMODE_CHANGE   : "SUPPORT_DYN_RCMODE_CHANGE",
1049        NV_ENC_CAPS_SUPPORT_SUBFRAME_READBACK   : "SUPPORT_SUBFRAME_READBACK",
1050        NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING: "SUPPORT_CONSTRAINED_ENCODING",
1051        NV_ENC_CAPS_SUPPORT_INTRA_REFRESH       : "SUPPORT_INTRA_REFRESH",
1052        NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE : "SUPPORT_CUSTOM_VBV_BUF_SIZE",
1053        NV_ENC_CAPS_SUPPORT_DYNAMIC_SLICE_MODE  : "SUPPORT_DYNAMIC_SLICE_MODE",
1054        NV_ENC_CAPS_SUPPORT_REF_PIC_INVALIDATION: "SUPPORT_REF_PIC_INVALIDATION",
1055        NV_ENC_CAPS_PREPROC_SUPPORT             : "PREPROC_SUPPORT",
1056        NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT        : "ASYNC_ENCODE_SUPPORT",
1057        NV_ENC_CAPS_MB_NUM_MAX                  : "MB_NUM_MAX",
1058        NV_ENC_CAPS_EXPOSED_COUNT               : "EXPOSED_COUNT",
1059        NV_ENC_CAPS_SUPPORT_YUV444_ENCODE       : "SUPPORT_YUV444_ENCODE",
1060        NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE     : "SUPPORT_LOSSLESS_ENCODE",
1061        NV_ENC_CAPS_SUPPORT_SAO                 : "SUPPORT_SAO",
1062        NV_ENC_CAPS_SUPPORT_MEONLY_MODE         : "SUPPORT_MEONLY_MODE",
1063        NV_ENC_CAPS_SUPPORT_LOOKAHEAD           : "SUPPORT_LOOKAHEAD",
1064        NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ         : "SUPPORT_TEMPORAL_AQ",
1065        NV_ENC_CAPS_SUPPORT_10BIT_ENCODE        : "SUPPORT_10BIT_ENCODE",
1066        NV_ENC_CAPS_NUM_MAX_LTR_FRAMES          : "NUM_MAX_LTR_FRAMES",
1067        NV_ENC_CAPS_SUPPORT_WEIGHTED_PREDICTION : "SUPPORT_WEIGHTED_PREDICTION",
1068        NV_ENC_CAPS_DYNAMIC_QUERY_ENCODER_CAPACITY  : "DYNAMIC_QUERY_ENCODER_CAPACITY",
1069        NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE     : "SUPPORT_BFRAME_REF_MODE",
1070        NV_ENC_CAPS_SUPPORT_EMPHASIS_LEVEL_MAP  : "SUPPORT_EMPHASIS_LEVEL_MAP",
1071        }
1072
1073PIC_TYPES = {
1074             NV_ENC_PIC_TYPE_P              : "P",
1075             NV_ENC_PIC_TYPE_B              : "B",
1076             NV_ENC_PIC_TYPE_I              : "I",
1077             NV_ENC_PIC_TYPE_IDR            : "IDR",
1078             NV_ENC_PIC_TYPE_BI             : "BI",
1079             NV_ENC_PIC_TYPE_SKIPPED        : "SKIPPED",
1080             NV_ENC_PIC_TYPE_INTRA_REFRESH  : "INTRA_REFRESH",
1081             NV_ENC_PIC_TYPE_UNKNOWN        : "UNKNOWN",
1082            }
1083
1084TUNING_STR = {
1085        NV_ENC_TUNING_INFO_UNDEFINED            : "undefined",
1086        NV_ENC_TUNING_INFO_HIGH_QUALITY         : "high-quality",
1087        NV_ENC_TUNING_INFO_LOW_LATENCY          : "low-latency",
1088        NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY    : "ultra-low-latency",
1089        NV_ENC_TUNING_INFO_LOSSLESS             : "lossless",
1090        }
1091
1092NvEncodeAPICreateInstance = None
1093cuCtxGetCurrent = None
1094
1095def init_nvencode_library():
1096    global NvEncodeAPICreateInstance, cuCtxGetCurrent
1097    if WIN32:
1098        load = ctypes.WinDLL
1099        nvenc_libname = "nvencodeapi64.dll"
1100        cuda_libname = "nvcuda.dll"
1101    else:
1102        #assert os.name=="posix"
1103        load = cdll.LoadLibrary
1104        nvenc_libname = "libnvidia-encode.so.1"
1105        cuda_libname = "libcuda.so"
1106    #CUDA:
1107    log("init_nvencode_library() will try to load %s", cuda_libname)
1108    try:
1109        x = load(cuda_libname)
1110        log("init_nvencode_library() %s(%s)=%s", load, cuda_libname, x)
1111    except Exception as e:
1112        log("failed to load '%s'", cuda_libname, exc_info=True)
1113        raise ImportError("nvenc: the required library %s cannot be loaded: %s" % (cuda_libname, e)) from None
1114    cuCtxGetCurrent = x.cuCtxGetCurrent
1115    cuCtxGetCurrent.restype = ctypes.c_int          # CUresult == int
1116    cuCtxGetCurrent.argtypes = [POINTER(CUcontext)] # CUcontext *pctx
1117    log("init_nvencode_library() %s.cuCtxGetCurrent=%s", os.path.splitext(cuda_libname)[0], cuCtxGetCurrent)
1118    #nvidia-encode:
1119    log("init_nvencode_library() will try to load %s", nvenc_libname)
1120    try:
1121        x = load(nvenc_libname)
1122        log("init_nvencode_library() %s(%s)=%s", load, nvenc_libname, x)
1123    except Exception as e:
1124        log("failed to load '%s'", nvenc_libname, exc_info=True)
1125        raise ImportError("nvenc: the required library %s cannot be loaded: %s" % (nvenc_libname, e)) from None
1126    NvEncodeAPICreateInstance = x.NvEncodeAPICreateInstance
1127    NvEncodeAPICreateInstance.restype = ctypes.c_int
1128    NvEncodeAPICreateInstance.argtypes = [ctypes.c_void_p]
1129    log("init_nvencode_library() NvEncodeAPICreateInstance=%s", NvEncodeAPICreateInstance)
1130    #NVENCSTATUS NvEncodeAPICreateInstance(NV_ENCODE_API_FUNCTION_LIST *functionList)
1131
1132
1133cdef guidstr(GUID guid):
1134    #really ugly! (surely there's a way using struct.unpack ?)
1135    #is this even endian safe? do we care? (always on the same system)
1136    parts = []
1137    for v, s in ((guid.Data1, 4), (guid.Data2, 2), (guid.Data3, 2)):
1138        b = bytearray(s)
1139        for j in range(s):
1140            b[s-j-1] = v % 256
1141            v = v // 256
1142        parts.append(b)
1143    parts.append(bytearray(guid.get("Data4")[:2]))
1144    parts.append(bytearray(guid.get("Data4")[2:8]))
1145    s = "-".join(binascii.hexlify(b).upper().decode("latin1") for b in parts)
1146    #log.info("guidstr(%s)=%s", guid, s)
1147    return s
1148
1149cdef GUID c_parseguid(src) except *:
1150    #just as ugly as above - shoot me now
1151    #only this format is allowed:
1152    sample_guid = b"CE788D20-AAA9-4318-92BB-AC7E858C8D36"
1153    bsrc = strtobytes(src.upper())
1154    if len(bsrc)!=len(sample_guid):
1155        raise Exception("invalid GUID format: expected %s characters but got %s" % (len(sample_guid), len(src)))
1156    cdef int i
1157    #validate the input bytestring:
1158    hexords = tuple(x for x in b"0123456789ABCDEF")
1159    for i in range(len(sample_guid)):
1160        if sample_guid[i]==ord(b"-"):
1161            #dash must be in the same place:
1162            if bsrc[i]!=ord(b"-"):
1163                raise Exception("invalid GUID format: character at position %s is not '-': %s" % (i, src[i]))
1164        else:
1165            #must be an hex number:
1166            c = bsrc[i]
1167            if c not in hexords:
1168                raise Exception("invalid GUID format: character at position %s is not in hex: %s" % (i, chr(c)))
1169    parts = bsrc.split(b"-")    #ie: ["CE788D20", "AAA9", ...]
1170    nparts = []
1171    for i, s in (0, 4), (1, 2), (2, 2), (3, 2), (4, 6):
1172        part = parts[i]
1173        binv = binascii.unhexlify(part)
1174        #log("c_parseguid bytes(%s)=%r", part, binv)
1175        v = 0
1176        for j in range(s):
1177            c = binv[j]
1178            v += c<<((s-j-1)*8)
1179        nparts.append(v)
1180    cdef GUID guid
1181    guid.Data1 = nparts[0]
1182    guid.Data2 = nparts[1]
1183    guid.Data3 = nparts[2]
1184    v = (nparts[3]<<48) + nparts[4]
1185    for i in range(8):
1186        guid.Data4[i] = <uint8_t> ((v>>((7-i)*8)) % 256)
1187    log("c_parseguid(%s)=%s", src, guid)
1188    return guid
1189
1190def parseguid(s):
1191    return c_parseguid(s)
1192
1193def test_parse():
1194    sample_guid = "CE788D20-AAA9-4318-92BB-AC7E858C8D36"
1195    x = c_parseguid(sample_guid)
1196    v = guidstr(x)
1197    assert v==sample_guid, "expected %s but got %s" % (sample_guid, v)
1198test_parse()
1199
1200
1201cdef GUID CLIENT_KEY_GUID
1202memset(&CLIENT_KEY_GUID, 0, sizeof(GUID))
1203CLIENT_KEYS_STR = get_license_keys(NVENCAPI_MAJOR_VERSION) + get_license_keys()
1204if CLIENT_KEYS_STR:
1205    #if we have client keys, parse them and keep the ones that look valid
1206    validated = []
1207    for x in CLIENT_KEYS_STR:
1208        if x:
1209            try:
1210                CLIENT_KEY_GUID = c_parseguid(x)
1211                validated.append(x)
1212            except Exception as e:
1213                log.error("invalid nvenc client key specified: '%s' (%s)", x, e)
1214                del e
1215    CLIENT_KEYS_STR = validated
1216
1217CODEC_GUIDS = {
1218    guidstr(NV_ENC_CODEC_H264_GUID)         : "H264",
1219    guidstr(NV_ENC_CODEC_HEVC_GUID)         : "HEVC",
1220    }
1221
1222cdef codecstr(GUID guid):
1223    s = guidstr(guid)
1224    return CODEC_GUIDS.get(s, s)
1225
1226
1227CODEC_PROFILES_GUIDS = {
1228    guidstr(NV_ENC_CODEC_H264_GUID) : {
1229        guidstr(NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID)       : "auto",
1230        guidstr(NV_ENC_H264_PROFILE_BASELINE_GUID)          : "baseline",
1231        guidstr(NV_ENC_H264_PROFILE_MAIN_GUID)              : "main",
1232        guidstr(NV_ENC_H264_PROFILE_HIGH_GUID)              : "high",
1233        guidstr(NV_ENC_H264_PROFILE_STEREO_GUID)            : "stereo",
1234        #guidstr(NV_ENC_H264_PROFILE_SVC_TEMPORAL_SCALABILTY): "temporal",
1235        guidstr(NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID)  : "progressive-high",
1236        guidstr(NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID)  : "constrained-high",
1237        #new in SDK v4:
1238        guidstr(NV_ENC_H264_PROFILE_HIGH_444_GUID)          : "high-444",
1239        },
1240    guidstr(NV_ENC_CODEC_HEVC_GUID) : {
1241        guidstr(NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID)       : "auto",
1242        guidstr(NV_ENC_HEVC_PROFILE_MAIN_GUID)              : "main",
1243        guidstr(NV_ENC_HEVC_PROFILE_MAIN10_GUID)            : "main10",
1244        guidstr(NV_ENC_HEVC_PROFILE_FREXT_GUID)             : "frext",
1245        },
1246    }
1247
1248PROFILE_STR = {}
1249for codec_guid, profiles in CODEC_PROFILES_GUIDS.items():
1250    for profile_guid, profile_name in profiles.items():
1251        PROFILE_STR[profile_guid] = profile_name
1252
1253
1254#this one is not defined anywhere but in the OBS source
1255#(I think they have access to information we do not have):
1256#GUID NV_ENC_PRESET_STREAMING = c_parseguid("7ADD423D-D035-4F6F-AEA5-50885658643C")
1257
1258CODEC_PRESETS_GUIDS = {
1259    guidstr(NV_ENC_PRESET_DEFAULT_GUID)                     : "default",
1260    guidstr(NV_ENC_PRESET_HP_GUID)                          : "hp",
1261    guidstr(NV_ENC_PRESET_HQ_GUID)                          : "hq",
1262    guidstr(NV_ENC_PRESET_BD_GUID)                          : "bd",
1263    guidstr(NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID)         : "low-latency",
1264    guidstr(NV_ENC_PRESET_LOW_LATENCY_HQ_GUID)              : "low-latency-hq",
1265    guidstr(NV_ENC_PRESET_LOW_LATENCY_HP_GUID)              : "low-latency-hp",
1266    #new in SDK4:
1267    guidstr(NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID)            : "lossless",
1268    guidstr(NV_ENC_PRESET_LOSSLESS_HP_GUID)                 : "lossless-hp",
1269    "7ADD423D-D035-4F6F-AEA5-50885658643C"                  : "streaming",
1270    #SDK 10:
1271    guidstr(NV_ENC_PRESET_P1_GUID)  : "P1",
1272    guidstr(NV_ENC_PRESET_P2_GUID)  : "P2",
1273    guidstr(NV_ENC_PRESET_P3_GUID)  : "P3",
1274    guidstr(NV_ENC_PRESET_P4_GUID)  : "P4",
1275    guidstr(NV_ENC_PRESET_P5_GUID)  : "P5",
1276    guidstr(NV_ENC_PRESET_P6_GUID)  : "P6",
1277    guidstr(NV_ENC_PRESET_P7_GUID)  : "P7",
1278    }
1279
1280YUV444_PRESETS = ("high-444", "lossless", "lossless-hp",)
1281LOSSLESS_PRESETS = ("lossless", "lossless-hp",)
1282
1283cdef presetstr(GUID preset):
1284    s = guidstr(preset)
1285    return CODEC_PRESETS_GUIDS.get(s, s)
1286
1287
1288#try to map preset names to a "speed" value:
1289PRESET_SPEED = {
1290    "lossless"      : 0,
1291    "lossless-hp"   : 30,
1292    "bd"            : 40,
1293    "hq"            : 50,
1294    "default"       : 50,
1295    "hp"            : 60,
1296    "low-latency-hq": 70,
1297    "low-latency"   : 80,
1298    "low-latency-hp": 100,
1299    "streaming"     : -1000,    #disabled for now
1300    }
1301PRESET_QUALITY = {
1302    "lossless"      : 100,
1303    "lossless-hp"   : 100,
1304    "bd"            : 80,
1305    "hq"            : 70,
1306    "low-latency-hq": 60,
1307    "default"       : 50,
1308    "hp"            : 40,
1309    "low-latency"   : 20,
1310    "low-latency-hp": 0,
1311    "streaming"     : -1000,    #disabled for now
1312    "P1"            : 10,
1313    "P2"            : 25,
1314    "P3"            : 40,
1315    "P4"            : 55,
1316    "P5"            : 70,
1317    "P6"            : 85,
1318    "P7"            : 100,
1319    }
1320
1321
1322
1323BUFFER_FORMAT = {
1324        NV_ENC_BUFFER_FORMAT_UNDEFINED              : "undefined",
1325        NV_ENC_BUFFER_FORMAT_NV12                   : "NV12_PL",
1326        NV_ENC_BUFFER_FORMAT_YV12                   : "YV12_PL",
1327        NV_ENC_BUFFER_FORMAT_IYUV                   : "IYUV_PL",
1328        NV_ENC_BUFFER_FORMAT_YUV444                 : "YUV444_PL",
1329        NV_ENC_BUFFER_FORMAT_YUV420_10BIT           : "YUV420_10BIT",
1330        NV_ENC_BUFFER_FORMAT_YUV444_10BIT           : "YUV444_10BIT",
1331        NV_ENC_BUFFER_FORMAT_ARGB                   : "ARGB",
1332        NV_ENC_BUFFER_FORMAT_ARGB10                 : "ARGB10",
1333        NV_ENC_BUFFER_FORMAT_AYUV                   : "AYUV",
1334        NV_ENC_BUFFER_FORMAT_ABGR                   : "ABGR",
1335        NV_ENC_BUFFER_FORMAT_ABGR10                 : "ABGR10",
1336        }
1337
1338
1339def get_COLORSPACES(encoding):
1340    global YUV420_ENABLED, YUV444_ENABLED, YUV444_CODEC_SUPPORT
1341    out_cs = []
1342    if YUV420_ENABLED:
1343        out_cs.append("YUV420P")
1344    if YUV444_CODEC_SUPPORT.get(encoding.lower(), YUV444_ENABLED) or NATIVE_RGB:
1345        out_cs.append("YUV444P")
1346    COLORSPACES = {
1347        "BGRX" : out_cs,
1348        "BGRA" : out_cs,
1349        "XRGB" : out_cs,
1350        "ARGB" : out_cs,
1351        }
1352    if SUPPORT_30BPP:
1353        COLORSPACES["r210"] = ("GBRP10", )
1354    return COLORSPACES
1355
1356def get_input_colorspaces(encoding):
1357    return list(get_COLORSPACES(encoding).keys())
1358
1359def get_output_colorspaces(encoding, input_colorspace):
1360    cs = get_COLORSPACES(encoding)
1361    out = cs.get(input_colorspace)
1362    assert out, "invalid input colorspace %s for encoding %s (must be one of: %s)" % (input_colorspace, encoding, out)
1363    #the output will actually be in one of those two formats once decoded
1364    #because internally that's what we convert to before encoding
1365    #(well, NV12... which is equivallent to YUV420P here...)
1366    return out
1367
1368
1369WIDTH_MASK = 0xFFFE
1370HEIGHT_MASK = 0xFFFE
1371
1372#Note: these counters should be per-device, but
1373#when we call get_runtime_factor(), we don't know which device is going to get used!
1374#since we have load balancing, using an overall factor isn't too bad
1375context_counter = AtomicInteger()
1376context_gen_counter = AtomicInteger()
1377cdef double last_context_failure = 0
1378
1379# per-device preset denylist - should be mutated with device_lock held
1380bad_presets = {}
1381
1382def get_runtime_factor() -> float:
1383    global last_context_failure, context_counter
1384    device_count = len(init_all_devices())
1385    max_contexts = CONTEXT_LIMIT * device_count
1386    cc = context_counter.get()
1387    #try to avoid using too many contexts
1388    #(usually, we can have up to 32 contexts per card)
1389    low_limit = min(CONTEXT_LIMIT, 1 + CONTEXT_LIMIT// 2) * device_count
1390    f = max(0, 1.0 - (max(0, cc-low_limit)/max(1, max_contexts-low_limit)))
1391    #if we have had errors recently, lower our chances further:
1392    cdef double failure_elapsed = monotonic()-last_context_failure
1393    #discount factor gradually for 1 minute:
1394    f /= 61-min(60, failure_elapsed)
1395    log("nvenc.get_runtime_factor()=%s", f)
1396    return f
1397
1398
1399MAX_SIZE = {}
1400
1401def get_spec(encoding, colorspace):
1402    assert encoding in get_encodings(), "invalid format: %s (must be one of %s" % (encoding, get_encodings())
1403    assert colorspace in get_COLORSPACES(encoding), "invalid colorspace: %s (must be one of %s)" % (colorspace, get_COLORSPACES(encoding))
1404    #ratings: quality, speed, setup cost, cpu cost, gpu cost, latency, max_w, max_h
1405    #undocumented and found the hard way, see:
1406    #https://github.com/Xpra-org/xpra/issues/1046#issuecomment-765450102
1407    #https://github.com/Xpra-org/xpra/issues/1550
1408    min_w, min_h = 128, 128
1409    #FIXME: we should probe this using WIDTH_MAX, HEIGHT_MAX!
1410    global MAX_SIZE
1411    max_w, max_h = MAX_SIZE.get(encoding, (4096, 4096))
1412    has_lossless_mode = colorspace in ("XRGB", "ARGB", "BGRX", "BGRA", "r210") and encoding=="h264"
1413    cs = video_spec(encoding=encoding, input_colorspace=colorspace, output_colorspaces=get_COLORSPACES(encoding)[colorspace], has_lossless_mode=LOSSLESS_CODEC_SUPPORT.get(encoding, LOSSLESS_ENABLED),
1414                      codec_class=Encoder, codec_type=get_type(),
1415                      quality=60+has_lossless_mode*40, speed=100, size_efficiency=100,
1416                      setup_cost=80, cpu_cost=10, gpu_cost=100,
1417                      #using a hardware encoder for something this small is silly:
1418                      min_w=min_w, min_h=min_h,
1419                      max_w=max_w, max_h=max_h,
1420                      can_scale=colorspace!="r210",
1421                      width_mask=WIDTH_MASK, height_mask=HEIGHT_MASK)
1422    cs.get_runtime_factor = get_runtime_factor
1423    return cs
1424
1425#ie: NVENCAPI_VERSION=0x30 -> PRETTY_VERSION = [3, 0]
1426PRETTY_VERSION = (int(NVENCAPI_MAJOR_VERSION), int(NVENCAPI_MINOR_VERSION))
1427
1428def get_version():
1429    return PRETTY_VERSION
1430
1431def get_type() -> str:
1432    return "nvenc"
1433
1434def get_info() -> dict:
1435    global last_context_failure, context_counter, context_gen_counter
1436    info = {
1437            "version"           : PRETTY_VERSION,
1438            "device_count"      : len(get_devices() or []),
1439            "context_count"     : context_counter.get(),
1440            "generation"        : context_gen_counter.get(),
1441            }
1442    cards = get_cards()
1443    if cards:
1444        info["cards"] = cards
1445    #only show the version if we have it already (don't probe now)
1446    v = get_nvidia_module_version(False)
1447    if v:
1448        info["kernel_module_version"] = v
1449    if LINUX:
1450        info["kernel_version"] = platform.uname()[2]
1451    if last_context_failure>0:
1452        info["last_failure"] = int(monotonic()-last_context_failure)
1453    return info
1454
1455
1456ENCODINGS = []
1457def get_encodings():
1458    global ENCODINGS
1459    return ENCODINGS
1460
1461cdef inline int roundup(int n, int m):
1462    return (n + m - 1) & ~(m - 1)
1463
1464
1465cdef uintptr_t cmalloc(size_t size, what) except 0:
1466    cdef void *ptr = malloc(size)
1467    if ptr==NULL:
1468        raise Exception("failed to allocate %i bytes of memory for %s" % (size, what))
1469    return <uintptr_t> ptr
1470
1471cdef nvencStatusInfo(NVENCSTATUS ret):
1472    return NV_ENC_STATUS_TXT.get(ret)
1473
1474class NVENCException(Exception):
1475    def __init__(self, code, fn):
1476        self.function = fn
1477        self.code = code
1478        self.api_message = nvencStatusInfo(code)
1479        msg = "%s - returned %i" % (fn, code)
1480        if self.api_message:
1481            msg += ": %s" % self.api_message
1482        super().__init__(msg)
1483
1484cdef inline raiseNVENC(NVENCSTATUS ret, msg):
1485    if DEBUG_API:
1486        log("raiseNVENC(%i, %s)", ret, msg)
1487    if ret!=0:
1488        raise NVENCException(ret, msg)
1489
1490
1491cdef class Encoder:
1492    cdef unsigned int width
1493    cdef unsigned int height
1494    cdef unsigned int scaled_width
1495    cdef unsigned int scaled_height
1496    cdef unsigned int input_width
1497    cdef unsigned int input_height
1498    cdef unsigned int encoder_width
1499    cdef unsigned int encoder_height
1500    cdef object encoding
1501    cdef object src_format
1502    cdef object dst_formats
1503    cdef int scaling
1504    cdef int speed
1505    cdef int quality
1506    cdef uint32_t target_bitrate
1507    cdef uint32_t max_bitrate
1508    #PyCUDA:
1509    cdef object driver
1510    cdef object cuda_info
1511    cdef object pycuda_info
1512    cdef object cuda_device_info
1513    cdef object cuda_device_context
1514    cdef void *cuda_context_ptr
1515    cdef object kernel
1516    cdef object kernel_name
1517    cdef object max_block_sizes
1518    cdef object max_grid_sizes
1519    cdef unsigned long max_threads_per_block
1520    cdef uint64_t free_memory
1521    cdef uint64_t total_memory
1522    #NVENC:
1523    cdef NV_ENCODE_API_FUNCTION_LIST *functionList
1524    cdef NV_ENC_INITIALIZE_PARAMS *params
1525    cdef void *context
1526    cdef GUID codec
1527    cdef NV_ENC_REGISTERED_PTR inputHandle
1528    cdef object inputBuffer
1529    cdef object cudaInputBuffer
1530    cdef object cudaOutputBuffer
1531    cdef unsigned int inputPitch                    #note: this isn't the pitch (aka rowstride) we actually use!
1532                                                    #just the value returned from the allocation call
1533    cdef unsigned int outputPitch
1534    cdef void *bitstreamBuffer
1535    cdef NV_ENC_BUFFER_FORMAT bufferFmt
1536    cdef object codec_name
1537    cdef object preset_name
1538    cdef object profile_name
1539    cdef object pixel_format
1540    cdef uint8_t lossless
1541    #statistics, etc:
1542    cdef double time
1543    cdef uint64_t first_frame_timestamp
1544    cdef unsigned long frames
1545    cdef unsigned long index
1546    cdef object last_frame_times
1547    cdef uint64_t bytes_in
1548    cdef uint64_t bytes_out
1549    cdef uint8_t ready
1550    cdef uint8_t closed
1551    cdef uint8_t threaded_init
1552
1553    cdef object __weakref__
1554
1555    cdef GUID init_codec(self) except *:
1556        log("init_codec()")
1557        codecs = self.query_codecs()
1558        #codecs={'H264': {"guid" : '6BC82762-4E63-4CA4-AA85-1E50F321F6BF', .. }
1559        internal_name = {"H265" : "HEVC"}.get(self.codec_name.upper(), self.codec_name.upper())
1560        guid_str = codecs.get(internal_name, {}).get("guid")
1561        assert guid_str, "%s not supported! (only available: %s)" % (self.codec_name, csv(codecs.keys()))
1562        self.codec = c_parseguid(guid_str)
1563        return self.codec
1564
1565    cdef GUID get_codec(self):
1566        return self.codec
1567
1568    cdef GUID get_preset(self, GUID codec) except *:
1569        global bad_presets
1570        presets = self.query_presets(codec)
1571        options = {}
1572        #if a preset was specified, give it the best score possible (-1):
1573        if DESIRED_PRESET:
1574            options[-1] = DESIRED_PRESET
1575        #for new style presets (P1 - P7),
1576        #we only care about the quality here,
1577        #the speed is set using the "tuning"
1578        for i in range(1, 8):
1579            name = "P%i" % i
1580            guid = presets.get(name)
1581            if not guid:
1582                continue
1583            preset_quality = PRESET_QUALITY.get(name, 50)
1584            distance = abs(self.quality-preset_quality)
1585            options.setdefault(distance, []).append((name, guid))
1586        #TODO: figure out why the new-style presets fail
1587        options = {}
1588        #no new-style presets found,
1589        #fallback to older lookup code:
1590        if not options:
1591            #add all presets ranked by how far they are from the target speed and quality:
1592            log("presets for %s: %s (pixel format=%s)", guidstr(codec), csv(presets.keys()), self.pixel_format)
1593            for name, x in presets.items():
1594                preset_speed = PRESET_SPEED.get(name, 50)
1595                preset_quality = PRESET_QUALITY.get(name, 50)
1596                is_lossless = name in LOSSLESS_PRESETS
1597                log("preset %16s: speed=%5i, quality=%5i (lossless=%s - want lossless=%s)", name, preset_speed, preset_quality, is_lossless, bool(self.lossless))
1598                if is_lossless and self.pixel_format!="YUV444P":
1599                    continue
1600                if preset_speed>=0 and preset_quality>=0:
1601                    #quality (3) weighs more than speed (2):
1602                    v = 2 * abs(preset_speed-self.speed) + 3 * abs(preset_quality-self.quality)
1603                    if self.lossless!=is_lossless:
1604                        v -= 100
1605                    l = options.setdefault(v, [])
1606                    if x not in l:
1607                        l.append((name, x))
1608        log("get_preset(%s) speed=%s, quality=%s, lossless=%s, pixel_format=%s, options=%s", codecstr(codec), self.speed, self.quality, bool(self.lossless), self.pixel_format, options)
1609        device_id = self.cuda_device_context.device_id
1610        for score in sorted(options.keys()):
1611            for preset, preset_guid in options.get(score):
1612                if preset in bad_presets.get(device_id, []):
1613                    log("skipping bad preset '%s' (speed=%s, quality=%s, lossless=%s, pixel_format=%s)", preset, self.speed, self.quality, self.lossless, self.pixel_format)
1614                    continue
1615
1616                if preset and (preset in presets.keys()):
1617                    log("using preset '%s' for speed=%s, quality=%s, lossless=%s, pixel_format=%s", preset, self.speed, self.quality, self.lossless, self.pixel_format)
1618                    return c_parseguid(preset_guid)
1619        raise Exception("no matching presets available for '%s' with speed=%i and quality=%i" % (self.codec_name, self.speed, self.quality))
1620
1621    def init_context(self, encoding, unsigned int width, unsigned int height, src_format, options:typedict=None):
1622        assert NvEncodeAPICreateInstance is not None, "encoder module is not initialized"
1623        log("init_context%s", (encoding, width, height, src_format, options))
1624        options = options or typedict()
1625        cuda_device_context = options.get("cuda-device-context")
1626        assert cuda_device_context, "no cuda device context"
1627        self.cuda_device_context = cuda_device_context
1628        assert src_format in ("ARGB", "XRGB", "BGRA", "BGRX", "r210"), "invalid source format %s" % src_format
1629        dst_formats = options.strtupleget("dst-formats")
1630        assert "YUV420P" in dst_formats or "YUV444P" in dst_formats
1631        self.width = width
1632        self.height = height
1633        self.quality = options.intget("quality", 50)
1634        self.speed = options.intget("speed", 50)
1635        self.scaled_width = options.intget("scaled-width", width)
1636        self.scaled_height = options.intget("scaled-height", height)
1637        self.scaling = bool(self.scaled_width!=self.width or self.scaled_height!=self.height)
1638        self.input_width = roundup(width, 32)
1639        self.input_height = roundup(height, 32)
1640        self.encoder_width = roundup(self.scaled_width, 32)
1641        self.encoder_height = roundup(self.scaled_height, 32)
1642        self.src_format = src_format
1643        self.dst_formats = dst_formats
1644        self.encoding = encoding
1645        self.codec_name = encoding.upper()      #ie: "H264"
1646        self.preset_name = None
1647        self.frames = 0
1648        self.pixel_format = ""
1649        self.last_frame_times = deque(maxlen=200)
1650        self.update_bitrate()
1651
1652        options = options or typedict()
1653        #the pixel format we feed into the encoder
1654        self.pixel_format = self.get_target_pixel_format(self.quality)
1655        self.profile_name = self._get_profile(options)
1656        self.lossless = self.get_target_lossless(self.pixel_format, self.quality)
1657        log("using %s %s compression at %s%% quality with pixel format %s",
1658            ["lossy","lossless"][self.lossless], encoding, self.quality, self.pixel_format)
1659
1660        self.threaded_init = options.boolget("threaded-init", THREADED_INIT)
1661        if self.threaded_init:
1662            start_thread(self.threaded_init_device, "threaded-init-device", daemon=True, args=(options,))
1663        else:
1664            self.init_device(options)
1665
1666
1667    cdef _get_profile(self, options):
1668        #convert the pixel format into a "colourspace" string:
1669        csc_mode = "YUV420P"
1670        if self.pixel_format in ("BGRX", "BGRA", "YUV444P"):
1671            csc_mode = "YUV444P"
1672        elif self.pixel_format=="r210":
1673            csc_mode = "YUV444P10"
1674
1675        #use the environment as default if present:
1676        profile = os.environ.get("XPRA_NVENC_PROFILE", "")
1677        profile = os.environ.get("XPRA_NVENC_%s_PROFILE" % csc_mode, profile)
1678        #now see if the client has requested a different value:
1679        profile = options.strget("h264.%s.profile" % csc_mode, profile)
1680        return profile
1681
1682
1683    def threaded_init_device(self, options : typedict):
1684        global device_lock
1685        with device_lock:
1686            if SLOW_DOWN_INIT:
1687                import time
1688                time.sleep(SLOW_DOWN_INIT)
1689            try:
1690                self.init_device(options)
1691            except NVENCException as e:
1692                log("threaded_init_device(%s)", options, exc_info=True)
1693                log.warn("Warning: failed to initialize NVENC device")
1694                if not e.api_message:
1695                    log.warn(" unknown error %i", e.code)
1696                else:
1697                    log.warn(" error %i:", e.code)
1698                    log.warn(" '%s'", e.api_message)
1699                self.clean()
1700            except Exception as e:
1701                log("threaded_init_device(%s)", options, exc_info=True)
1702                log.warn("Warning: failed to initialize device:")
1703                log.warn(" %s", e)
1704                self.clean()
1705
1706    def init_device(self, options : typedict):
1707        global bad_presets
1708        cdef double start = monotonic()
1709        with self.cuda_device_context as cuda_context:
1710            self.init_cuda(cuda_context)
1711            self.init_cuda_kernel(cuda_context)
1712
1713        device_id = self.cuda_device_context.device_id
1714        try:
1715            #the example code accesses the cuda context after a context.pop()
1716            #(which is weird)
1717            self.init_nvenc()
1718
1719            record_device_success(device_id)
1720        except Exception as e:
1721            log("init_cuda failed", exc_info=True)
1722            if self.preset_name and isinstance(e, NVENCException) and e.code==NV_ENC_ERR_INVALID_PARAM:
1723                log("adding preset '%s' to bad presets", self.preset_name)
1724                bad_presets.setdefault(device_id, []).append(self.preset_name)
1725            else:
1726                record_device_failure(device_id)
1727
1728            raise
1729        cdef double end = monotonic()
1730        self.ready = 1
1731        log("init_device(%s) took %1.fms", options, (end-start)*1000.0)
1732
1733    def is_ready(self):
1734        return bool(self.ready)
1735
1736
1737    def get_target_pixel_format(self, quality):
1738        global NATIVE_RGB, YUV420_ENABLED, YUV444_ENABLED, LOSSLESS_ENABLED, YUV444_THRESHOLD, YUV444_CODEC_SUPPORT
1739        v = None
1740        hasyuv444 = YUV444_CODEC_SUPPORT.get(self.encoding, YUV444_ENABLED) and "YUV444P" in self.dst_formats
1741        nativergb = NATIVE_RGB and hasyuv444
1742        if nativergb and self.src_format in ("BGRX", "BGRA"):
1743            v = "BGRX"
1744        elif self.src_format=="r210":
1745            v = "r210"
1746        else:
1747            hasyuv420 = YUV420_ENABLED and "YUV420P" in self.dst_formats
1748            if hasyuv444:
1749                #NVENC and the client can handle it,
1750                #now check quality and scaling:
1751                #(don't use YUV444 is we're going to downscale or use low quality anyway)
1752                if (quality>=YUV444_THRESHOLD and not self.scaling) or not hasyuv420:
1753                    v = "YUV444P"
1754            if not v:
1755                if hasyuv420:
1756                    v = "NV12"
1757                else:
1758                    raise Exception("no compatible formats found for quality=%i, scaling=%s, YUV420 support=%s, YUV444 support=%s, codec=%s, dst-formats=%s" % (
1759                        quality, self.scaling, hasyuv420, hasyuv444, self.codec_name, self.dst_formats))
1760        log("get_target_pixel_format(%i)=%s for encoding=%s, scaling=%s, NATIVE_RGB=%s, YUV444_CODEC_SUPPORT=%s, YUV420_ENABLED=%s, YUV444_ENABLED=%s, YUV444_THRESHOLD=%s, LOSSLESS_ENABLED=%s, src_format=%s, dst_formats=%s",
1761            quality, v, self.encoding, self.scaling, bool(NATIVE_RGB), YUV444_CODEC_SUPPORT, bool(YUV420_ENABLED), bool(YUV444_ENABLED), YUV444_THRESHOLD, bool(LOSSLESS_ENABLED), self.src_format, csv(self.dst_formats))
1762        return v
1763
1764    def get_target_lossless(self, pixel_format : str, quality : int):
1765        global LOSSLESS_ENABLED, LOSSLESS_CODEC_SUPPORT
1766        if pixel_format not in ("YUV444P", "r210"):
1767            return False
1768        if not LOSSLESS_CODEC_SUPPORT.get(self.encoding, LOSSLESS_ENABLED):
1769            return False
1770        return quality>=LOSSLESS_THRESHOLD
1771
1772    def init_cuda(self, cuda_context):
1773        cdef int result
1774        cdef uintptr_t context_pointer
1775
1776        global last_context_failure
1777        log("init_cuda(%s) pixel format=%s", cuda_context, self.pixel_format)
1778        try:
1779            log("init_cuda(%s)", cuda_context)
1780            self.cuda_info = get_cuda_info()
1781            log("init_cuda cuda info=%s", self.cuda_info)
1782            self.pycuda_info = get_pycuda_info()
1783            if self.cuda_device_context:
1784                log("init_cuda pycuda info=%s", self.pycuda_info)
1785                self.cuda_device_info = self.cuda_device_context.get_info()
1786
1787            #get the CUDA context (C pointer):
1788            #a bit of magic to pass a cython pointer to ctypes:
1789            context_pointer = <uintptr_t> (&self.cuda_context_ptr)
1790            result = cuCtxGetCurrent(ctypes.cast(context_pointer, POINTER(ctypes.c_void_p)))
1791            if DEBUG_API:
1792                log("cuCtxGetCurrent() returned %s, context_pointer=%#x, cuda context pointer=%#x",
1793                    CUDA_ERRORS_INFO.get(result, result), context_pointer, <uintptr_t> self.cuda_context_ptr)
1794            assert result==0, "failed to get current cuda context, cuCtxGetCurrent returned %s" % CUDA_ERRORS_INFO.get(result, result)
1795            assert (<uintptr_t> self.cuda_context_ptr)!=0, "invalid cuda context pointer"
1796        except driver.MemoryError as e:
1797            last_context_failure = monotonic()
1798            log("init_cuda %s", e)
1799            raise TransientCodecException("could not initialize cuda: %s" % e) from None
1800
1801    cdef init_cuda_kernel(self, cuda_context):
1802        log("init_cuda_kernel(..)")
1803        global YUV420_ENABLED, YUV444_ENABLED, YUV444_CODEC_SUPPORT, NATIVE_RGB
1804        cdef unsigned int plane_size_div, wmult, hmult, max_input_stride
1805        #use alias to make code easier to read:
1806        da = driver.device_attribute
1807        if self.pixel_format=="BGRX":
1808            assert NATIVE_RGB
1809            kernel_name = None
1810            self.bufferFmt = NV_ENC_BUFFER_FORMAT_ARGB
1811            plane_size_div= 1
1812            wmult = 4
1813            hmult = 1
1814        elif self.pixel_format=="r210":
1815            assert NATIVE_RGB
1816            kernel_name = None
1817            self.bufferFmt = NV_ENC_BUFFER_FORMAT_ARGB10
1818            plane_size_div= 1
1819            wmult = 4
1820            hmult = 1
1821        #if supported (separate plane flag), use YUV444P:
1822        elif self.pixel_format=="YUV444P":
1823            assert YUV444_CODEC_SUPPORT.get(self.encoding, YUV444_ENABLED), "YUV444 is not enabled for %s" % self.encoding
1824            kernel_name = "%s_to_YUV444" % (self.src_format.replace("A", "X"))  #ie: XRGB_to_YUV444
1825            self.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444
1826            #3 full planes:
1827            plane_size_div = 1
1828            wmult = 1
1829            hmult = 3
1830        elif self.pixel_format=="NV12":
1831            assert YUV420_ENABLED
1832            kernel_name = "%s_to_NV12" % (self.src_format.replace("A", "X"))  #ie: BGRX_to_NV12
1833            self.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12
1834            #1 full Y plane and 2 U+V planes subsampled by 4:
1835            plane_size_div = 2
1836            wmult = 1
1837            hmult = 3
1838        else:
1839            raise Exception("BUG: invalid dst format: %s" % self.pixel_format)
1840
1841        #allocate CUDA "output" buffer (on device):
1842        #this is the buffer we feed into the encoder
1843        #the data may come from the CUDA kernel,
1844        #or it may be uploaded directly there (ie: BGRX)
1845        self.cudaOutputBuffer, self.outputPitch = driver.mem_alloc_pitch(self.encoder_width*wmult, self.encoder_height*hmult//plane_size_div, 16)
1846        log("CUDA Output Buffer=%#x, pitch=%s", int(self.cudaOutputBuffer), self.outputPitch)
1847
1848        if kernel_name:
1849            #load the kernel:
1850            self.kernel = get_CUDA_function(kernel_name)
1851            self.kernel_name = kernel_name
1852            assert self.kernel, "failed to load %s for cuda context %s" % (self.kernel_name, cuda_context)
1853            #allocate CUDA input buffer (on device) 32-bit RGBX
1854            #(and make it bigger just in case - subregions from XShm can have a huge rowstride)
1855            #(this is the buffer we feed into the kernel)
1856            max_input_stride = MAX(2560, self.input_width)*4
1857            self.cudaInputBuffer, self.inputPitch = driver.mem_alloc_pitch(max_input_stride, self.input_height, 16)
1858            log("CUDA Input Buffer=%#x, pitch=%s", int(self.cudaInputBuffer), self.inputPitch)
1859            #CUDA
1860            d = self.cuda_device_context.device
1861            self.max_block_sizes = d.get_attribute(da.MAX_BLOCK_DIM_X), d.get_attribute(da.MAX_BLOCK_DIM_Y), d.get_attribute(da.MAX_BLOCK_DIM_Z)
1862            self.max_grid_sizes = d.get_attribute(da.MAX_GRID_DIM_X), d.get_attribute(da.MAX_GRID_DIM_Y), d.get_attribute(da.MAX_GRID_DIM_Z)
1863            log("max_block_sizes=%s, max_grid_sizes=%s", self.max_block_sizes, self.max_grid_sizes)
1864            self.max_threads_per_block = self.kernel.get_attribute(driver.function_attribute.MAX_THREADS_PER_BLOCK)
1865            log("max_threads_per_block=%s", self.max_threads_per_block)
1866        else:
1867            #we don't use a CUDA kernel
1868            self.kernel_name = None
1869            self.kernel = None
1870            self.cudaInputBuffer = None
1871            self.inputPitch = self.outputPitch
1872            self.max_block_sizes = 0
1873            self.max_grid_sizes = 0
1874            self.max_threads_per_block = 0
1875
1876        #allocate input buffer on host:
1877        #this is the buffer we upload to the device
1878        self.inputBuffer = driver.pagelocked_zeros(self.inputPitch*self.input_height, dtype=numpy.byte)
1879        log("inputBuffer=%s (size=%s)", self.inputBuffer, self.inputPitch*self.input_height)
1880
1881
1882    def init_nvenc(self):
1883        log("init_nvenc()")
1884        self.open_encode_session()
1885        self.init_encoder()
1886        self.init_buffers()
1887
1888    def init_encoder(self):
1889        log("init_encoder()")
1890        cdef GUID codec = self.init_codec()
1891        cdef NVENCSTATUS r
1892        cdef NV_ENC_INITIALIZE_PARAMS *params = <NV_ENC_INITIALIZE_PARAMS*> cmalloc(sizeof(NV_ENC_INITIALIZE_PARAMS), "initialization params")
1893        assert memset(params, 0, sizeof(NV_ENC_INITIALIZE_PARAMS))!=NULL
1894        try:
1895            self.init_params(codec, params)
1896            if DEBUG_API:
1897                log("nvEncInitializeEncoder using encode=%s", codecstr(codec))
1898            with nogil:
1899                r = self.functionList.nvEncInitializeEncoder(self.context, params)
1900            raiseNVENC(r, "initializing encoder")
1901            log("NVENC initialized with '%s' codec and '%s' preset" % (self.codec_name, self.preset_name))
1902
1903            self.dump_caps(self.codec_name, codec)
1904        finally:
1905            if params.encodeConfig!=NULL:
1906                free(params.encodeConfig)
1907            free(params)
1908
1909    cdef dump_caps(self, codec_name, GUID codec):
1910        #test all caps:
1911        caps = {}
1912        for cap, descr in CAPS_NAMES.items():
1913            if cap!=NV_ENC_CAPS_EXPOSED_COUNT:
1914                v = self.query_encoder_caps(codec, cap)
1915                caps[descr] = v
1916        log("caps(%s)=%s", codec_name, caps)
1917
1918    cdef init_params(self, GUID codec, NV_ENC_INITIALIZE_PARAMS *params):
1919        #caller must free the config!
1920        assert self.context, "context is not initialized"
1921        cdef GUID preset = self.get_preset(self.codec)
1922        self.preset_name = CODEC_PRESETS_GUIDS.get(guidstr(preset), guidstr(preset))
1923        log("init_params(%s) using preset=%s", codecstr(codec), presetstr(preset))
1924        profiles = self.query_profiles(codec)
1925        if self.profile_name and profiles and self.profile_name not in profiles:
1926            self.profile_name = tuple(profiles.keys())[0]
1927        profile_guidstr = profiles.get(self.profile_name)
1928        cdef GUID profile
1929        if profile_guidstr:
1930            profile = c_parseguid(profile_guidstr)
1931        else:
1932            profile = NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID
1933        log("using profile=%s", PROFILE_STR.get(guidstr(profile)))
1934
1935        input_format = BUFFER_FORMAT[self.bufferFmt]
1936        input_formats = self.query_input_formats(codec)
1937        assert input_format in input_formats, "%s does not support %s (only: %s)" %  (self.codec_name, input_format, input_formats)
1938
1939        assert memset(params, 0, sizeof(NV_ENC_INITIALIZE_PARAMS))!=NULL
1940        params.version = NV_ENC_INITIALIZE_PARAMS_VER
1941        params.encodeGUID = codec
1942        params.presetGUID = preset
1943        params.encodeWidth = self.encoder_width
1944        params.encodeHeight = self.encoder_height
1945        params.maxEncodeWidth = self.encoder_width
1946        params.maxEncodeHeight = self.encoder_height
1947        params.darWidth = self.encoder_width
1948        params.darHeight = self.encoder_height
1949        params.enableEncodeAsync = 0            #not supported on Linux
1950        params.enablePTD = 1                    #not supported in sync mode!?
1951        params.frameRateNum = 30
1952        params.frameRateDen = 1
1953
1954        #apply preset:
1955        cdef NV_ENC_PRESET_CONFIG *presetConfig = self.get_preset_config(self.preset_name, codec, preset)
1956        assert presetConfig!=NULL, "could not find preset %s" % self.preset_name
1957        cdef NV_ENC_CONFIG *config = <NV_ENC_CONFIG*> cmalloc(sizeof(NV_ENC_CONFIG), "encoder config")
1958        assert memcpy(config, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG))!=NULL
1959        free(presetConfig)
1960        config.profileGUID = profile
1961        self.tune_preset(config)
1962        params.encodeConfig = config
1963
1964
1965    cdef tune_preset(self, NV_ENC_CONFIG *config):
1966        #config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR     #FIXME: check NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES caps
1967        #config.rcParams.enableMinQP = 1
1968        #config.rcParams.enableMaxQP = 1
1969        config.gopLength = NVENC_INFINITE_GOPLENGTH
1970        config.frameIntervalP = 1
1971        #0=max quality, 63 lowest quality
1972        qpmin = QP_MAX_VALUE-min(QP_MAX_VALUE, int(QP_MAX_VALUE*(self.quality)//100))
1973        qpmax = QP_MAX_VALUE-max(0, int(QP_MAX_VALUE*self.quality//100))
1974        config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME
1975        #config.mvPrecision = NV_ENC_MV_PRECISION_FULL_PEL
1976        if True:
1977            #const QP:
1978            config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP
1979            if self.lossless:
1980                qp = 0
1981            else:
1982                qp = min(QP_MAX_VALUE, max(0, (qpmin + qpmax)//2))
1983            config.rcParams.constQP.qpInterP = qp
1984            config.rcParams.constQP.qpInterB = qp
1985            config.rcParams.constQP.qpIntra = qp
1986            log("constQP: %i", qp)
1987        else:
1988            config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR
1989            config.rcParams.averageBitRate = 500000
1990            config.rcParams.maxBitRate = 600000
1991            #config.rcParams.vbvBufferSize = 0
1992            #config.rcParams.vbvInitialDelay = 0
1993            #config.rcParams.enableInitialRCQP = 1
1994            #config.rcParams.initialRCQP.qpInterP  = qpmin
1995            #config.rcParams.initialRCQP.qpIntra = qpmin
1996            #config.rcParams.initialRCQP.qpInterB = qpmin
1997
1998        if self.pixel_format=="BGRX":
1999            chromaFormatIDC = 3
2000        elif self.pixel_format=="r210":
2001            chromaFormatIDC = 3
2002        elif self.pixel_format=="NV12":
2003            chromaFormatIDC = 1
2004        elif self.pixel_format=="YUV444P":
2005            chromaFormatIDC = 3
2006        else:
2007            raise Exception("unknown pixel format %s" % self.pixel_format)
2008        log("chromaFormatIDC(%s)=%s", self.pixel_format, chromaFormatIDC)
2009
2010        if self.codec_name=="H264":
2011            config.encodeCodecConfig.h264Config.chromaFormatIDC = chromaFormatIDC
2012            #config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag = 0
2013            #config.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag = 0
2014            config.encodeCodecConfig.h264Config.idrPeriod = config.gopLength
2015            config.encodeCodecConfig.h264Config.enableIntraRefresh = 0
2016            #config.encodeCodecConfig.h264Config.maxNumRefFrames = 16
2017            #config.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix = 1      #AVCOL_SPC_BT709 ?
2018            #config.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries = 1   #AVCOL_PRI_BT709 ?
2019            #config.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = 1   #AVCOL_TRC_BT709 ?
2020            #config.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = 1
2021        else:
2022            assert self.codec_name=="H265"
2023            config.encodeCodecConfig.hevcConfig.chromaFormatIDC = chromaFormatIDC
2024            #config.encodeCodecConfig.hevcConfig.level = NV_ENC_LEVEL_HEVC_5
2025            config.encodeCodecConfig.hevcConfig.idrPeriod = config.gopLength
2026            config.encodeCodecConfig.hevcConfig.enableIntraRefresh = 0
2027            #config.encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 = 2*int(self.bufferFmt==NV_ENC_BUFFER_FORMAT_ARGB10)
2028            #config.encodeCodecConfig.hevcConfig.maxNumRefFramesInDPB = 16
2029            #config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFormat = ...
2030
2031    def init_buffers(self):
2032        log("init_buffers()")
2033        cdef NV_ENC_REGISTER_RESOURCE registerResource
2034        cdef NV_ENC_CREATE_BITSTREAM_BUFFER createBitstreamBufferParams
2035        assert self.context, "context is not initialized"
2036        #register CUDA input buffer:
2037        memset(&registerResource, 0, sizeof(NV_ENC_REGISTER_RESOURCE))
2038        registerResource.version = NV_ENC_REGISTER_RESOURCE_VER
2039        registerResource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR
2040        cdef uintptr_t resource = int(self.cudaOutputBuffer)
2041        registerResource.resourceToRegister = <void *> resource
2042        registerResource.width = self.encoder_width
2043        registerResource.height = self.encoder_height
2044        registerResource.pitch = self.outputPitch
2045        registerResource.bufferFormat = self.bufferFmt
2046        if DEBUG_API:
2047            log("nvEncRegisterResource(%#x)", <uintptr_t> &registerResource)
2048        cdef NVENCSTATUS r                  #
2049        with nogil:
2050            r = self.functionList.nvEncRegisterResource(self.context, &registerResource)
2051        raiseNVENC(r, "registering CUDA input buffer")
2052        self.inputHandle = registerResource.registeredResource
2053        log("input handle for CUDA buffer: %#x", <uintptr_t> self.inputHandle)
2054
2055        #allocate output buffer:
2056        memset(&createBitstreamBufferParams, 0, sizeof(NV_ENC_CREATE_BITSTREAM_BUFFER))
2057        createBitstreamBufferParams.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER
2058        #this is the uncompressed size - must be big enough for the compressed stream:
2059        createBitstreamBufferParams.size = min(1024*1024*2, self.encoder_width*self.encoder_height*3//2)
2060        createBitstreamBufferParams.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED
2061        if DEBUG_API:
2062            log("nvEncCreateBitstreamBuffer(%#x)", <uintptr_t> &createBitstreamBufferParams)
2063        with nogil:
2064            r = self.functionList.nvEncCreateBitstreamBuffer(self.context, &createBitstreamBufferParams)
2065        raiseNVENC(r, "creating output buffer")
2066        self.bitstreamBuffer = createBitstreamBufferParams.bitstreamBuffer
2067        log("output bitstream buffer=%#x", <uintptr_t> self.bitstreamBuffer)
2068        assert self.bitstreamBuffer!=NULL
2069
2070
2071    def get_info(self) -> dict:
2072        global YUV444_CODEC_SUPPORT, YUV444_ENABLED, LOSSLESS_CODEC_SUPPORT, LOSSLESS_ENABLED
2073        cdef double pps
2074        info = get_info()
2075        info.update({
2076                "width"     : self.width,
2077                "height"    : self.height,
2078                "frames"    : int(self.frames),
2079                "codec"     : self.codec_name,
2080                "encoder_width"     : self.encoder_width,
2081                "encoder_height"    : self.encoder_height,
2082                "bitrate"           : self.target_bitrate,
2083                "quality"           : self.quality,
2084                "speed"             : self.speed,
2085                "lossless"  : {
2086                               ""          : self.lossless,
2087                               "supported" : LOSSLESS_CODEC_SUPPORT.get(self.encoding, LOSSLESS_ENABLED),
2088                               "threshold" : LOSSLESS_THRESHOLD
2089                    },
2090                "yuv444" : {
2091                            "supported" : YUV444_CODEC_SUPPORT.get(self.encoding, YUV444_ENABLED),
2092                            "threshold" : YUV444_THRESHOLD,
2093                            },
2094                "cuda-device"   : self.cuda_device_info,
2095                "cuda"          : self.cuda_info,
2096                "pycuda"        : self.pycuda_info,
2097                })
2098        if self.scaling:
2099            info.update({
2100                "input_width"       : self.input_width,
2101                "input_height"      : self.input_height,
2102                })
2103        if self.src_format:
2104            info["src_format"] = self.src_format
2105        if self.pixel_format:
2106            info["pixel_format"] = self.pixel_format
2107        cdef unsigned long long b = self.bytes_in
2108        if b>0 and self.bytes_out>0:
2109            info.update({
2110                "bytes_in"  : self.bytes_in,
2111                "bytes_out" : self.bytes_out,
2112                "ratio_pct" : int(100 * self.bytes_out // b)})
2113        if self.preset_name:
2114            info["preset"] = self.preset_name
2115        if self.profile_name:
2116            info["profile"] = self.profile_name
2117        cdef double t = self.time
2118        info["total_time_ms"] = int(self.time*1000.0)
2119        if self.frames>0 and t>0:
2120            pps = self.width * self.height * self.frames / t
2121            info["pixels_per_second"] = int(pps)
2122        info["free_memory"] = int(self.free_memory)
2123        info["total_memory"] = int(self.total_memory)
2124        cdef uint64_t m = self.total_memory
2125        if m>0:
2126            info["free_memory_pct"] = int(100.0*self.free_memory/m)
2127        #calculate fps:
2128        cdef int f = 0
2129        cdef double now = monotonic()
2130        cdef double last_time = now
2131        cdef double cut_off = now-10.0
2132        cdef double ms_per_frame = 0
2133        for start,end in tuple(self.last_frame_times):
2134            if end>cut_off:
2135                f += 1
2136                last_time = min(last_time, end)
2137                ms_per_frame += (end-start)
2138        if f>0 and last_time<now:
2139            info["fps"] = int(0.5+f/(now-last_time))
2140            info["ms_per_frame"] = int(1000.0*ms_per_frame/f)
2141        return info
2142
2143    def __repr__(self):
2144        return "nvenc(%s/%s/%s - %s - %4ix%-4i)" % (self.src_format, self.pixel_format, self.codec_name, self.preset_name, self.width, self.height)
2145
2146    def is_closed(self) -> bool:
2147        return bool(self.closed)
2148
2149    def __dealloc__(self):
2150        if not self.closed:
2151            self.clean()
2152
2153
2154    def clean(self):
2155        if not self.closed:
2156            self.closed = 1
2157            if self.threaded_init:
2158                start_thread(self.threaded_clean, "threaded-clean", daemon=True)
2159            else:
2160                self.do_clean()
2161
2162    def threaded_clean(self):
2163        global device_lock
2164        with device_lock:
2165            self.do_clean()
2166
2167    def do_clean(self):
2168        cdc = self.cuda_device_context
2169        log("clean() cuda_context=%s, encoder context=%#x", cdc, <uintptr_t> self.context)
2170        if cdc:
2171            with cdc:
2172                self.cuda_clean()
2173                self.cuda_device_context = None
2174        self.width = 0
2175        self.height = 0
2176        self.input_width = 0
2177        self.input_height = 0
2178        self.encoder_width = 0
2179        self.encoder_height = 0
2180        self.src_format = ""
2181        self.dst_formats = []
2182        self.scaling = 0
2183        self.speed = 0
2184        self.quality = 0
2185        #PyCUDA:
2186        self.driver = 0
2187        self.cuda_info = None
2188        self.pycuda_info = None
2189        self.cuda_device_info = None
2190        self.kernel = None
2191        self.kernel_name = None
2192        self.max_block_sizes = 0
2193        self.max_grid_sizes = 0
2194        self.max_threads_per_block = 0
2195        self.free_memory = 0
2196        self.total_memory = 0
2197        #NVENC (mostly already cleaned up in cuda_clean):
2198        self.inputPitch = 0
2199        self.outputPitch = 0
2200        self.bitstreamBuffer = NULL
2201        self.bufferFmt = NV_ENC_BUFFER_FORMAT_UNDEFINED
2202        self.codec_name = ""
2203        self.preset_name = ""
2204        self.pixel_format = ""
2205        #statistics, etc:
2206        self.time = 0
2207        self.frames = 0
2208        self.first_frame_timestamp = 0
2209        self.last_frame_times = []
2210        self.bytes_in = 0
2211        self.bytes_out = 0
2212        log("clean() done")
2213
2214
2215    cdef cuda_clean(self):
2216        log("cuda_clean()")
2217        cdef NVENCSTATUS r
2218        if self.context!=NULL and self.frames>0:
2219            try:
2220                self.flushEncoder()
2221            except Exception as e:
2222                log.warn("got exception on flushEncoder, continuing anyway", exc_info=True)
2223        self.buffer_clean()
2224        if self.context!=NULL:
2225            if self.bitstreamBuffer!=NULL:
2226                log("cuda_clean() destroying output bitstream buffer %#x", <uintptr_t> self.bitstreamBuffer)
2227                if DEBUG_API:
2228                    log("nvEncDestroyBitstreamBuffer(%#x)", <uintptr_t> self.bitstreamBuffer)
2229                with nogil:
2230                    r = self.functionList.nvEncDestroyBitstreamBuffer(self.context, self.bitstreamBuffer)
2231                raiseNVENC(r, "destroying output buffer")
2232                self.bitstreamBuffer = NULL
2233            log("cuda_clean() destroying encoder %#x", <uintptr_t> self.context)
2234            if DEBUG_API:
2235                log("nvEncDestroyEncoder(%#x)", <uintptr_t> self.context)
2236            with nogil:
2237                r = self.functionList.nvEncDestroyEncoder(self.context)
2238            raiseNVENC(r, "destroying context")
2239            self.functionList = NULL
2240            self.context = NULL
2241            global context_counter
2242            context_counter.decrease()
2243            log("cuda_clean() (still %s context%s in use)", context_counter, engs(context_counter))
2244        else:
2245            log("skipping encoder context cleanup")
2246        self.cuda_context_ptr = <void *> 0
2247
2248    def buffer_clean(self):
2249        if self.inputHandle!=NULL and self.context!=NULL:
2250            log("buffer_clean() unregistering CUDA output buffer input handle %#x", <uintptr_t> self.inputHandle)
2251            if DEBUG_API:
2252                log("nvEncUnregisterResource(%#x)", <uintptr_t> self.inputHandle)
2253            with nogil:
2254                r = self.functionList.nvEncUnregisterResource(self.context, self.inputHandle)
2255            raiseNVENC(r, "unregistering CUDA input buffer")
2256            self.inputHandle = NULL
2257        if self.inputBuffer is not None:
2258            log("buffer_clean() freeing CUDA host buffer %s", self.inputBuffer)
2259            self.inputBuffer = None
2260        if self.cudaInputBuffer is not None:
2261            log("buffer_clean() freeing CUDA input buffer %#x", int(self.cudaInputBuffer))
2262            self.cudaInputBuffer.free()
2263            self.cudaInputBuffer = None
2264        if self.cudaOutputBuffer is not None:
2265            log("buffer_clean() freeing CUDA output buffer %#x", int(self.cudaOutputBuffer))
2266            self.cudaOutputBuffer.free()
2267            self.cudaOutputBuffer = None
2268
2269    def get_width(self) -> int:
2270        return self.width
2271
2272    def get_height(self) -> int:
2273        return self.height
2274
2275    def get_type(self) -> str:
2276        return "nvenc"
2277
2278    def get_encoding(self) -> str:
2279        return self.encoding
2280
2281    def get_src_format(self) -> str:
2282        return self.src_format
2283
2284    def set_encoding_speed(self, int speed):
2285        if self.speed!=speed:
2286            self.speed = speed
2287            self.update_bitrate()
2288
2289    def set_encoding_quality(self, int quality):
2290        #cdef NV_ENC_RECONFIGURE_PARAMS reconfigure_params
2291        assert self.context, "context is not initialized"
2292        if self.quality==quality:
2293            return
2294        log("set_encoding_quality(%s) current quality=%s", quality, self.quality)
2295        if quality<LOSSLESS_THRESHOLD:
2296            #edge resistance:
2297            raw_delta = quality-self.quality
2298            max_delta = max(-1, min(1, raw_delta))*10
2299            if abs(raw_delta)<abs(max_delta):
2300                delta = raw_delta
2301            else:
2302                delta = max_delta
2303            target_quality = quality-delta
2304        else:
2305            target_quality = 100
2306        self.quality = quality
2307        log("set_encoding_quality(%s) target quality=%s", quality, target_quality)
2308        #code removed:
2309        #new_pixel_format = self.get_target_pixel_format(target_quality)
2310        #etc...
2311        #we can't switch pixel format,
2312        #because we would need to free the buffers and re-allocate new ones
2313        #best to just tear down the encoder context and create a new one
2314        return
2315
2316    def update_bitrate(self):
2317        #use an exponential scale so for a 1Kx1K image (after scaling), roughly:
2318        #speed=0   -> 1Mbit/s
2319        #speed=50  -> 10Mbit/s
2320        #speed=90  -> 66Mbit/s
2321        #speed=100 -> 100Mbit/s
2322        MPixels = (self.encoder_width * self.encoder_height) / (1000.0 * 1000.0)
2323        if self.pixel_format=="NV12":
2324            #subsampling halves the input size:
2325            mult = 0.5
2326        else:
2327            #yuv444p preserves it:
2328            mult = 1.0
2329        lim = 100*1000000
2330        self.target_bitrate = min(lim, max(1000000, int(((0.5+self.speed/200.0)**8)*lim*MPixels*mult)))
2331        self.max_bitrate = 2*self.target_bitrate
2332
2333
2334    cdef flushEncoder(self):
2335        cdef NV_ENC_PIC_PARAMS picParams
2336        cdef NVENCSTATUS r
2337        assert self.context, "context is not initialized"
2338        memset(&picParams, 0, sizeof(NV_ENC_PIC_PARAMS))
2339        picParams.version = NV_ENC_PIC_PARAMS_VER
2340        picParams.encodePicFlags = NV_ENC_PIC_FLAG_EOS
2341        if DEBUG_API:
2342            log("nvEncEncodePicture(%#x)", <uintptr_t> &picParams)
2343        with nogil:
2344            r = self.functionList.nvEncEncodePicture(self.context, &picParams)
2345        raiseNVENC(r, "flushing encoder buffer")
2346
2347    def compress_image(self, image, options=None, int retry=0):
2348        options = options or {}
2349        cuda_device_context = options.get("cuda-device-context")
2350        assert cuda_device_context, "no cuda device context"
2351        #cuda_device_context.__enter__ does self.context.push()
2352        with cuda_device_context as cuda_context:
2353            quality = options.get("quality", -1)
2354            if quality>=0:
2355                self.set_encoding_quality(quality)
2356            speed = options.get("speed", -1)
2357            if speed>=0:
2358                self.set_encoding_speed(speed)
2359            return self.do_compress_image(cuda_context, image)
2360
2361    cdef do_compress_image(self, cuda_context, image):
2362        assert self.context, "nvenc context is not initialized"
2363        assert cuda_context, "missing device context"
2364        cdef unsigned int w = image.get_width()
2365        cdef unsigned int h = image.get_height()
2366        gpu_buffer = image.get_gpu_buffer()
2367        cdef unsigned int stride = image.get_rowstride()
2368        log("do_compress_image(%s) kernel=%s, GPU buffer=%#x, stride=%i, input pitch=%i, output pitch=%i",
2369            image, self.kernel_name, int(gpu_buffer or 0), stride, self.inputPitch, self.outputPitch)
2370        assert image.get_planes()==ImageWrapper.PACKED, "invalid number of planes: %s" % image.get_planes()
2371        assert (w & WIDTH_MASK)<=self.input_width, "invalid width: %s" % w
2372        assert (h & HEIGHT_MASK)<=self.input_height, "invalid height: %s" % h
2373        assert self.inputBuffer is not None, "BUG: encoder is closed?"
2374
2375        if self.frames==0:
2376            #first frame, record pts:
2377            self.first_frame_timestamp = image.get_timestamp()
2378
2379        cdef unsigned long input_size
2380        if self.kernel:
2381            #copy to input buffer, CUDA kernel converts into output buffer:
2382            if GPU_MEMCOPY and gpu_buffer and stride<=self.inputPitch:
2383                driver.memcpy_dtod(self.cudaInputBuffer, int(gpu_buffer), stride*h)
2384                log("GPU memcopy %i bytes from %#x to %#x", stride*h, int(gpu_buffer), int(self.cudaInputBuffer))
2385            else:
2386                stride = self.copy_image(image, False)
2387                log("memcpy_htod(cudaOutputBuffer=%s, inputBuffer=%s)", self.cudaOutputBuffer, self.inputBuffer)
2388                driver.memcpy_htod(self.cudaInputBuffer, self.inputBuffer)
2389            self.exec_kernel(cuda_context, w, h, stride)
2390            input_size = self.inputPitch * self.input_height
2391        else:
2392            #go direct to the CUDA "output" buffer:
2393            if GPU_MEMCOPY and gpu_buffer and stride<=self.outputPitch:
2394                driver.memcpy_dtod(self.cudaOutputBuffer, int(gpu_buffer), stride*h)
2395                log("GPU memcopy %i bytes from %#x to %#x", stride*h, int(gpu_buffer), int(self.cudaOutputBuffer))
2396            else:
2397                stride = self.copy_image(image, True)
2398                driver.memcpy_htod(self.cudaOutputBuffer, self.inputBuffer)
2399            input_size = stride * self.encoder_height
2400        self.bytes_in += input_size
2401
2402        cdef NV_ENC_INPUT_PTR mappedResource = self.map_input_resource()
2403        assert mappedResource!=NULL
2404        try:
2405            return self.nvenc_compress(input_size, mappedResource, image.get_timestamp())
2406        finally:
2407            self.unmap_input_resource(mappedResource)
2408
2409    cdef unsigned int copy_image(self, image, int strict_stride) except -1:
2410        if DEBUG_API:
2411            log("copy_image(%s, %i)", image, strict_stride)
2412        cdef unsigned int image_stride = image.get_rowstride()
2413        #input_height may be smaller if we have rounded down:
2414        cdef unsigned int h = min(image.get_height(), self.input_height)
2415        cdef unsigned int i, stride, min_stride, x, y
2416        pixels = image.get_pixels()
2417        assert pixels is not None, "failed to get pixels from %s" % image
2418        #copy to input buffer:
2419        cdef object buf
2420        if isinstance(pixels, (bytearray, bytes)):
2421            pixels = memoryview(pixels)
2422        if isinstance(pixels, memoryview):
2423            #copy memoryview to inputBuffer directly:
2424            buf = self.inputBuffer
2425        else:
2426            #this is a numpy.ndarray type:
2427            buf = self.inputBuffer.data
2428        cdef double start = monotonic()
2429        cdef unsigned long copy_len
2430        cdef unsigned long pix_len = len(pixels)
2431        assert pix_len>=(h*image_stride), "image pixel buffer is too small: expected at least %ix%i=%i bytes but got %i bytes" % (h, image_stride, h*image_stride, pix_len)
2432        if image_stride==self.inputPitch or (image_stride<self.inputPitch and not strict_stride):
2433            stride = image_stride
2434            copy_len = h*image_stride
2435            #assert pix_len<=input_size, "too many pixels (expected %s max, got %s) image: %sx%s stride=%s, input buffer: stride=%s, height=%s" % (input_size, pix_len, w, h, stride, self.inputPitch, self.input_height)
2436            log("copying %s bytes from %s into %s (len=%i), in one shot",
2437                pix_len, type(pixels), type(self.inputBuffer), len(self.inputBuffer))
2438            #log("target: %s, %s, %s", buf.shape, buf.size, buf.dtype)
2439            if isinstance(pixels, memoryview):
2440                tmp = numpy.asarray(pixels, numpy.int8)
2441            else:
2442                tmp = numpy.frombuffer(pixels, numpy.int8)
2443            try:
2444                buf[:copy_len] = tmp[:copy_len]
2445            except Exception as e:
2446                log("copy_image%s", (image, strict_stride), exc_info=True)
2447                log.error("Error: numpy one shot buffer copy failed")
2448                log.error(" from %s to %s, length=%i", tmp, buf, copy_len)
2449                log.error(" original pixel buffer: %s", type(pixels))
2450                log.error(" for image %s", image)
2451                log.error(" input buffer: %i x %i", self.inputPitch, self.input_height)
2452        else:
2453            #ouch, we need to copy the source pixels into the smaller buffer
2454            #before uploading to the device... this is probably costly!
2455            stride = self.inputPitch
2456            min_stride = min(self.inputPitch, image_stride)
2457            log("copying %s bytes from %s into %s, %i stride at a time (from image stride=%i, target stride=%i)",
2458                stride*h, type(pixels), type(self.inputBuffer), min_stride, image_stride, self.inputPitch)
2459            try:
2460                for i in range(h):
2461                    x = i*self.inputPitch
2462                    y = i*image_stride
2463                    buf[x:x+min_stride] = pixels[y:y+min_stride]
2464            except Exception as e:
2465                log("copy_image%s", (image, strict_stride), exc_info=True)
2466                log.error("Error: numpy partial line buffer copy failed")
2467                log.error(" from %s to %s, length=%i", pixels, buf, min_stride)
2468                log.error(" for image %s", image)
2469                log.error(" original pixel buffer: %s", type(pixels))
2470                log.error(" input buffer: %i x %i", self.inputPitch, self.input_height)
2471                log.error(" at line %i of %i", i+1, h)
2472                raise
2473            copy_len = min_stride * h
2474        cdef double end = monotonic()
2475        cdef double elapsed = end-start
2476        if elapsed==0:
2477            #mswindows monotonic time minimum precision is 1ms...
2478            elapsed = 0.0001
2479        log("copy_image: %9i bytes uploaded in %3.1f ms: %5i MB/s", copy_len, 1000*elapsed, int(copy_len/elapsed)//1024//1024)
2480        return stride
2481
2482    cdef exec_kernel(self, cuda_context, unsigned int w, unsigned int h, unsigned int stride):
2483        cdef uint8_t dx, dy
2484        if self.pixel_format=="NV12":
2485            #(these values are derived from the kernel code - which we should know nothing about here..)
2486            #divide each dimension by 2 since we process 4 pixels at a time:
2487            dx, dy = 2, 2
2488        elif self.pixel_format=="YUV444P":
2489            #one pixel at a time:
2490            dx, dy = 1, 1
2491        else:
2492            raise Exception("bug: invalid pixel format '%s'" % self.pixel_format)
2493
2494        #FIXME: find better values and validate against max_block/max_grid:
2495        #calculate grids/blocks:
2496        #a block is a group of threads: (blockw * blockh) threads
2497        #a grid is a group of blocks: (gridw * gridh) blocks
2498        cdef uint32_t blockw = 32
2499        cdef uint32_t blockh = 32
2500        cdef uint32_t gridw = MAX(1, w//(blockw*dx))
2501        cdef uint32_t gridh = MAX(1, h//(blockh*dy))
2502        #if dx or dy made us round down, add one:
2503        if gridw*dx*blockw<w:
2504            gridw += 1
2505        if gridh*dy*blockh<h:
2506            gridh += 1
2507        cdef unsigned int in_w = self.input_width
2508        cdef unsigned int in_h = self.input_height
2509        if self.scaling:
2510            #scaling so scale exact dimensions, not padded input dimensions:
2511            in_w, in_h = w, h
2512
2513        cdef double start = monotonic()
2514        args = (self.cudaInputBuffer, numpy.int32(in_w), numpy.int32(in_h), numpy.int32(stride),
2515               self.cudaOutputBuffer, numpy.int32(self.encoder_width), numpy.int32(self.encoder_height), numpy.int32(self.outputPitch),
2516               numpy.int32(w), numpy.int32(h))
2517        if DEBUG_API:
2518            def lf(v):
2519                if isinstance(v, driver.DeviceAllocation):
2520                    return hex(int(v))
2521                return int(v)
2522            log_args = tuple(lf(v) for v in args)
2523            log("calling %s%s with block=%s, grid=%s", self.kernel_name, log_args, (blockw,blockh,1), (gridw, gridh))
2524        self.kernel(*args, block=(blockw,blockh,1), grid=(gridw, gridh))
2525        cuda_context.synchronize()
2526        cdef double end = monotonic()
2527        cdef elapsed = end-start
2528        if elapsed==0:
2529            #mswindows monotonic time minimum precision is 1ms...
2530            elapsed = 0.0001
2531        log("exec_kernel:  kernel %13s took %3.1f ms: %5i MPixels/s", self.kernel_name, elapsed*1000.0, (w*h)/elapsed//1024//1024)
2532
2533    cdef NV_ENC_INPUT_PTR map_input_resource(self):
2534        cdef NV_ENC_MAP_INPUT_RESOURCE mapInputResource
2535        #map buffer so nvenc can access it:
2536        memset(&mapInputResource, 0, sizeof(NV_ENC_MAP_INPUT_RESOURCE))
2537        mapInputResource.version = NV_ENC_MAP_INPUT_RESOURCE_VER
2538        mapInputResource.registeredResource  = self.inputHandle
2539        mapInputResource.mappedBufferFmt = self.bufferFmt
2540        if DEBUG_API:
2541            log("nvEncMapInputResource(%#x) inputHandle=%#x", <uintptr_t> &mapInputResource, <uintptr_t> self.inputHandle)
2542        cdef NVENCSTATUS r = self.functionList.nvEncMapInputResource(self.context, &mapInputResource)
2543        raiseNVENC(r, "mapping input resource")
2544        cdef NV_ENC_INPUT_PTR mappedResource = mapInputResource.mappedResource
2545        if DEBUG_API:
2546            log("compress_image(..) device buffer mapped to %#x", <uintptr_t> mappedResource)
2547        return mappedResource
2548
2549    cdef unmap_input_resource(self, NV_ENC_INPUT_PTR mappedResource):
2550        if DEBUG_API:
2551            log("nvEncUnmapInputResource(%#x)", <uintptr_t> mappedResource)
2552        cdef int r = self.functionList.nvEncUnmapInputResource(self.context, mappedResource)
2553        raiseNVENC(r, "unmapping input resource")
2554
2555    cdef nvenc_compress(self, int input_size, NV_ENC_INPUT_PTR input, timestamp=0):
2556        cdef NV_ENC_PIC_PARAMS picParams
2557        cdef NV_ENC_LOCK_BITSTREAM lockOutputBuffer
2558        assert input_size>0, "invalid input size %i" % input_size
2559
2560        cdef double start = monotonic()
2561        if DEBUG_API:
2562            log("nvEncEncodePicture(%#x)", <uintptr_t> &picParams)
2563        memset(&picParams, 0, sizeof(NV_ENC_PIC_PARAMS))
2564        picParams.version = NV_ENC_PIC_PARAMS_VER
2565        picParams.bufferFmt = self.bufferFmt
2566        picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME
2567        picParams.inputWidth = self.encoder_width
2568        picParams.inputHeight = self.encoder_height
2569        picParams.inputPitch = self.outputPitch
2570        picParams.inputBuffer = input
2571        picParams.outputBitstream = self.bitstreamBuffer
2572        #picParams.pictureType: required when enablePTD is disabled
2573        if self.frames==0:
2574            #only the first frame needs to be IDR (as we never lose frames)
2575            picParams.pictureType = NV_ENC_PIC_TYPE_IDR
2576            picParams.encodePicFlags = NV_ENC_PIC_FLAG_OUTPUT_SPSPPS
2577        else:
2578            picParams.pictureType = NV_ENC_PIC_TYPE_P
2579        if self.encoding=="h264":
2580            picParams.codecPicParams.h264PicParams.displayPOCSyntax = 2*self.frames
2581            picParams.codecPicParams.h264PicParams.refPicFlag = self.frames==0
2582            #this causes crashes with Pascal (ie GTX-1070):
2583            #picParams.codecPicParams.h264PicParams.sliceMode = 3            #sliceModeData specifies the number of slices
2584            #picParams.codecPicParams.h264PicParams.sliceModeData = 1        #1 slice!
2585        else:
2586            picParams.codecPicParams.hevcPicParams.displayPOCSyntax = 2*self.frames
2587            picParams.codecPicParams.hevcPicParams.refPicFlag = self.frames==0
2588        picParams.frameIdx = self.frames
2589        if timestamp>0:
2590            if timestamp>=self.first_frame_timestamp:
2591                picParams.inputTimeStamp = timestamp-self.first_frame_timestamp
2592            else:
2593                log.warn("Warning: image timestamp is older than the first frame")
2594                log.warn(" %s vs %s", timestamp, self.first_frame_timestamp)
2595        #inputDuration = 0      #FIXME: use frame delay?
2596        #picParams.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR     #FIXME: check NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES caps
2597        #picParams.rcParams.enableMinQP = 1
2598        #picParams.rcParams.enableMaxQP = 1
2599        #0=max quality, 63 lowest quality
2600        #qmin = QP_MAX_VALUE-min(QP_MAX_VALUE, int(QP_MAX_VALUE*(self.quality+20)/100))
2601        #qmax = QP_MAX_VALUE-max(0, int(QP_MAX_VALUE*(self.quality-20)/100))
2602        #picParams.rcParams.minQP.qpInterB = qmin
2603        #picParams.rcParams.minQP.qpInterP = qmin
2604        #picParams.rcParams.minQP.qpIntra = qmin
2605        #picParams.rcParams.maxQP.qpInterB = qmax
2606        #picParams.rcParams.maxQP.qpInterP = qmax
2607        #picParams.rcParams.maxQP.qpIntra = qmax
2608        #picParams.rcParams.averageBitRate = self.target_bitrate
2609        #picParams.rcParams.maxBitRate = self.max_bitrate
2610        cdef NVENCSTATUS r
2611        with nogil:
2612            r = self.functionList.nvEncEncodePicture(self.context, &picParams)
2613        raiseNVENC(r, "error during picture encoding")
2614
2615        memset(&lockOutputBuffer, 0, sizeof(NV_ENC_LOCK_BITSTREAM))
2616        #lock output buffer:
2617        lockOutputBuffer.version = NV_ENC_LOCK_BITSTREAM_VER
2618        lockOutputBuffer.doNotWait = 0
2619        lockOutputBuffer.outputBitstream = self.bitstreamBuffer
2620        if DEBUG_API:
2621            log("nvEncLockBitstream(%#x) bitstreamBuffer=%#x", <uintptr_t> &lockOutputBuffer, <uintptr_t> self.bitstreamBuffer)
2622        with nogil:
2623            r = self.functionList.nvEncLockBitstream(self.context, &lockOutputBuffer)
2624        raiseNVENC(r, "locking output buffer")
2625        assert lockOutputBuffer.bitstreamBufferPtr!=NULL
2626        #copy to python buffer:
2627        size = lockOutputBuffer.bitstreamSizeInBytes
2628        self.bytes_out += size
2629        data = (<char *> lockOutputBuffer.bitstreamBufferPtr)[:size]
2630        if DEBUG_API:
2631            log("nvEncUnlockBitstream(%#x)", <uintptr_t> self.bitstreamBuffer)
2632        r = self.functionList.nvEncUnlockBitstream(self.context, self.bitstreamBuffer)
2633        raiseNVENC(r, "unlocking output buffer")
2634
2635        #update info:
2636        self.free_memory, self.total_memory = driver.mem_get_info()
2637
2638        client_options = {
2639                    "csc"       : self.src_format,
2640                    "frame"     : int(self.frames),
2641                    "pts"       : int(timestamp-self.first_frame_timestamp),
2642                    }
2643        if self.lossless:
2644            client_options["quality"] = 100
2645        else:
2646            client_options["quality"] = min(99, self.quality)   #ensure we cap it at 99 because this is lossy
2647        if self.scaling:
2648            client_options["scaled_size"] = self.encoder_width, self.encoder_height
2649        cdef double end = monotonic()
2650        self.frames += 1
2651        self.last_frame_times.append((start, end))
2652        cdef double elapsed = end-start
2653        self.time += elapsed
2654        #log("memory: %iMB free, %iMB total", self.free_memory//1024//1024, self.total_memory//1024//1024)
2655        log("compress_image(..) %5s %3s returning %9s bytes (%.1f%%) for %4s %s-frame no %6i took %3.1fms",
2656            get_type(), get_version(),
2657            size, 100.0*size/input_size, self.encoding, PIC_TYPES.get(picParams.pictureType, picParams.pictureType), self.frames, 1000.0*elapsed)
2658        return data, client_options
2659
2660
2661    cdef NV_ENC_PRESET_CONFIG *get_preset_config(self, name, GUID encode_GUID, GUID preset_GUID) except *:
2662        """ you must free it after use! """
2663        cdef NV_ENC_PRESET_CONFIG *presetConfig
2664        cdef NVENCSTATUS r
2665        assert self.context, "context is not initialized"
2666        presetConfig = <NV_ENC_PRESET_CONFIG*> cmalloc(sizeof(NV_ENC_PRESET_CONFIG), "preset config")
2667        memset(presetConfig, 0, sizeof(NV_ENC_PRESET_CONFIG))
2668        presetConfig.version = NV_ENC_PRESET_CONFIG_VER
2669        presetConfig.presetCfg.version = NV_ENC_CONFIG_VER
2670        if DEBUG_API:
2671            log("nvEncGetEncodePresetConfig(%s, %s)", codecstr(encode_GUID), presetstr(preset_GUID))
2672        if len(name)==2 and name[0]=="P":
2673            tuning = self.get_tuning()
2674            log("tuning=%s (%i)", TUNING_STR.get(tuning, "unknown"), tuning)
2675            r = self.functionList.nvEncGetEncodePresetConfigEx(self.context, encode_GUID, preset_GUID, tuning, presetConfig)
2676        else:
2677            r = self.functionList.nvEncGetEncodePresetConfig(self.context, encode_GUID, preset_GUID, presetConfig)
2678        if r!=0:
2679            log.warn("failed to get preset config for %s (%s / %s): %s", name, guidstr(encode_GUID), guidstr(preset_GUID), NV_ENC_STATUS_TXT.get(r, r))
2680            return NULL
2681        return presetConfig
2682
2683    def get_tuning(self):
2684        if self.lossless:
2685            return NV_ENC_TUNING_INFO_LOSSLESS
2686        if self.speed>80:
2687            return NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY
2688        if self.speed>50:
2689            return NV_ENC_TUNING_INFO_LOW_LATENCY
2690        return NV_ENC_TUNING_INFO_HIGH_QUALITY
2691
2692    cdef object query_presets(self, GUID encode_GUID):
2693        cdef uint32_t presetCount
2694        cdef uint32_t presetsRetCount
2695        cdef GUID* preset_GUIDs
2696        cdef GUID preset_GUID
2697        cdef NV_ENC_PRESET_CONFIG *presetConfig
2698        cdef NV_ENC_CONFIG *encConfig
2699        cdef NVENCSTATUS r
2700        assert self.context, "context is not initialized"
2701        presets = {}
2702        if DEBUG_API:
2703            log("nvEncGetEncodePresetCount(%s, %#x)", codecstr(encode_GUID), <uintptr_t> &presetCount)
2704        with nogil:
2705            r = self.functionList.nvEncGetEncodePresetCount(self.context, encode_GUID, &presetCount)
2706        raiseNVENC(r, "getting preset count for %s" % guidstr(encode_GUID))
2707        log("found %s preset%s:", presetCount, engs(presetCount))
2708        assert presetCount<2**8
2709        preset_GUIDs = <GUID*> cmalloc(sizeof(GUID) * presetCount, "preset GUIDs")
2710        try:
2711            if DEBUG_API:
2712                log("nvEncGetEncodePresetGUIDs(%s, %#x)", codecstr(encode_GUID), <uintptr_t> &presetCount)
2713            with nogil:
2714                r = self.functionList.nvEncGetEncodePresetGUIDs(self.context, encode_GUID, preset_GUIDs, presetCount, &presetsRetCount)
2715            raiseNVENC(r, "getting encode presets")
2716            assert presetsRetCount==presetCount
2717            unknowns = []
2718            for x in range(presetCount):
2719                preset_GUID = preset_GUIDs[x]
2720                preset_name = CODEC_PRESETS_GUIDS.get(guidstr(preset_GUID))
2721                if DEBUG_API:
2722                    log("* %s : %s", guidstr(preset_GUID), preset_name or "unknown!")
2723                if preset_name is None:
2724                    global UNKNOWN_PRESETS
2725                    if preset_name not in UNKNOWN_PRESETS:
2726                        UNKNOWN_PRESETS.append(guidstr(preset_GUID))
2727                        unknowns.append(guidstr(preset_GUID))
2728                else:
2729                    presetConfig = self.get_preset_config(preset_name, encode_GUID, preset_GUID)
2730                    if presetConfig!=NULL:
2731                        try:
2732                            encConfig = &presetConfig.presetCfg
2733                            if DEBUG_API:
2734                                log("presetConfig.presetCfg=%s", <uintptr_t> encConfig)
2735                            gop = {NVENC_INFINITE_GOPLENGTH : "infinite"}.get(encConfig.gopLength, encConfig.gopLength)
2736                            log("* %-20s P frame interval=%i, gop length=%-10s", preset_name or "unknown!", encConfig.frameIntervalP, gop)
2737                        finally:
2738                            free(presetConfig)
2739                    presets[preset_name] = guidstr(preset_GUID)
2740            if len(unknowns)>0:
2741                log.warn("Warning: found some unknown NVENC presets:")
2742                for x in unknowns:
2743                    log.warn(" * %s", x)
2744        finally:
2745            free(preset_GUIDs)
2746        if DEBUG_API:
2747            log("query_presets(%s)=%s", codecstr(encode_GUID), presets)
2748        return presets
2749
2750    cdef object query_profiles(self, GUID encode_GUID):
2751        cdef uint32_t profileCount
2752        cdef uint32_t profilesRetCount
2753        cdef GUID profile_GUID
2754        assert self.context, "context is not initialized"
2755        profiles = {}
2756        if DEBUG_API:
2757            log("nvEncGetEncodeProfileGUIDCount(%s, %#x)", codecstr(encode_GUID), <uintptr_t> &profileCount)
2758        cdef NVENCSTATUS r
2759        with nogil:
2760            r = self.functionList.nvEncGetEncodeProfileGUIDCount(self.context, encode_GUID, &profileCount)
2761        raiseNVENC(r, "getting profile count")
2762        log("%s profiles:", profileCount)
2763        assert profileCount<2**8
2764        cdef GUID* profile_GUIDs = <GUID*> cmalloc(sizeof(GUID) * profileCount, "profile GUIDs")
2765        PROFILES_GUIDS = CODEC_PROFILES_GUIDS.get(guidstr(encode_GUID), {})
2766        try:
2767            if DEBUG_API:
2768                log("nvEncGetEncodeProfileGUIDs(%s, %#x, %#x)", codecstr(encode_GUID), <uintptr_t> profile_GUIDs, <uintptr_t> &profileCount)
2769            with nogil:
2770                r = self.functionList.nvEncGetEncodeProfileGUIDs(self.context, encode_GUID, profile_GUIDs, profileCount, &profilesRetCount)
2771            raiseNVENC(r, "getting encode profiles")
2772            #(void* encoder, GUID encodeGUID, GUID* profileGUIDs, uint32_t guidArraySize, uint32_t* GUIDCount)
2773            assert profilesRetCount==profileCount
2774            for x in range(profileCount):
2775                profile_GUID = profile_GUIDs[x]
2776                profile_name = PROFILES_GUIDS.get(guidstr(profile_GUID))
2777                log("* %s : %s", guidstr(profile_GUID), profile_name)
2778                profiles[profile_name] = guidstr(profile_GUID)
2779        finally:
2780            free(profile_GUIDs)
2781        return profiles
2782
2783    cdef object query_input_formats(self, GUID encode_GUID):
2784        cdef uint32_t inputFmtCount
2785        cdef uint32_t inputFmtsRetCount
2786        cdef NV_ENC_BUFFER_FORMAT inputFmt
2787        assert self.context, "context is not initialized"
2788        input_formats = {}
2789        if DEBUG_API:
2790            log("nvEncGetInputFormatCount(%s, %#x)", codecstr(encode_GUID), <uintptr_t> &inputFmtCount)
2791        cdef NVENCSTATUS r
2792        with nogil:
2793            r = self.functionList.nvEncGetInputFormatCount(self.context, encode_GUID, &inputFmtCount)
2794        raiseNVENC(r, "getting input format count")
2795        log("%s input format type%s:", inputFmtCount, engs(inputFmtCount))
2796        assert inputFmtCount>0 and inputFmtCount<2**8
2797        cdef NV_ENC_BUFFER_FORMAT* inputFmts = <NV_ENC_BUFFER_FORMAT*> cmalloc(sizeof(int) * inputFmtCount, "input formats")
2798        try:
2799            if DEBUG_API:
2800                log("nvEncGetInputFormats(%s, %#x, %i, %#x)", codecstr(encode_GUID), <uintptr_t> inputFmts, inputFmtCount, <uintptr_t> &inputFmtsRetCount)
2801            with nogil:
2802                r = self.functionList.nvEncGetInputFormats(self.context, encode_GUID, inputFmts, inputFmtCount, &inputFmtsRetCount)
2803            raiseNVENC(r, "getting input formats")
2804            assert inputFmtsRetCount==inputFmtCount
2805            for x in range(inputFmtCount):
2806                inputFmt = inputFmts[x]
2807                log("* %#x", inputFmt)
2808                for format_mask in sorted(BUFFER_FORMAT.keys()):
2809                    if format_mask>0 and (format_mask & inputFmt)>0:
2810                        format_name = BUFFER_FORMAT.get(format_mask)
2811                        log(" + %#x : %s", format_mask, format_name)
2812                        input_formats[format_name] = hex(format_mask)
2813        finally:
2814            free(inputFmts)
2815        return input_formats
2816
2817    cdef int query_encoder_caps(self, GUID encode_GUID, NV_ENC_CAPS caps_type) except *:
2818        cdef int val
2819        cdef NV_ENC_CAPS_PARAM encCaps
2820        cdef NVENCSTATUS r
2821        assert self.context, "context is not initialized"
2822        memset(&encCaps, 0, sizeof(NV_ENC_CAPS_PARAM))
2823        encCaps.version = NV_ENC_CAPS_PARAM_VER
2824        encCaps.capsToQuery = caps_type
2825        with nogil:
2826            r = self.functionList.nvEncGetEncodeCaps(self.context, encode_GUID, &encCaps, &val)
2827        raiseNVENC(r, "getting encode caps for %s" % CAPS_NAMES.get(caps_type, caps_type))
2828        if DEBUG_API:
2829            log("query_encoder_caps(%s, %s) %s=%s", codecstr(encode_GUID), caps_type, CAPS_NAMES.get(caps_type, caps_type), val)
2830        return val
2831
2832    def query_codecs(self, full_query=False):
2833        cdef uint32_t GUIDCount
2834        cdef uint32_t GUIDRetCount
2835        cdef GUID* encode_GUIDs
2836        cdef GUID encode_GUID
2837        cdef NVENCSTATUS r
2838        assert self.context, "context is not initialized"
2839        if DEBUG_API:
2840            log("nvEncGetEncodeGUIDCount(%#x, %#x)", <uintptr_t> self.context, <uintptr_t> &GUIDCount)
2841        with nogil:
2842            r = self.functionList.nvEncGetEncodeGUIDCount(self.context, &GUIDCount)
2843        raiseNVENC(r, "getting encoder count")
2844        log("found %i encoder%s:", GUIDCount, engs(GUIDCount))
2845        assert GUIDCount<2**8
2846        encode_GUIDs = <GUID*> cmalloc(sizeof(GUID) * GUIDCount, "encode GUIDs")
2847        codecs = {}
2848        try:
2849            if DEBUG_API:
2850                log("nvEncGetEncodeGUIDs(%#x, %i, %#x)", <uintptr_t> encode_GUIDs, GUIDCount, <uintptr_t> &GUIDRetCount)
2851            with nogil:
2852                r = self.functionList.nvEncGetEncodeGUIDs(self.context, encode_GUIDs, GUIDCount, &GUIDRetCount)
2853            raiseNVENC(r, "getting list of encode GUIDs")
2854            assert GUIDRetCount==GUIDCount, "expected %s items but got %s" % (GUIDCount, GUIDRetCount)
2855            for x in range(GUIDRetCount):
2856                encode_GUID = encode_GUIDs[x]
2857                codec_name = CODEC_GUIDS.get(guidstr(encode_GUID))
2858                if not codec_name:
2859                    log("[%s] unknown codec GUID: %s", x, guidstr(encode_GUID))
2860                else:
2861                    log("[%s] %s", x, codec_name)
2862
2863                maxw = self.query_encoder_caps(encode_GUID, NV_ENC_CAPS_WIDTH_MAX)
2864                maxh = self.query_encoder_caps(encode_GUID, NV_ENC_CAPS_HEIGHT_MAX)
2865                async = self.query_encoder_caps(encode_GUID, NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT)
2866                rate_control = self.query_encoder_caps(encode_GUID, NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES)
2867                codec = {
2868                         "guid"         : guidstr(encode_GUID),
2869                         "name"         : codec_name,
2870                         "max-size"     : (maxw, maxh),
2871                         "async"        : async,
2872                         "rate-control" : rate_control
2873                         }
2874                if full_query:
2875                    presets = self.query_presets(encode_GUID)
2876                    profiles = self.query_profiles(encode_GUID)
2877                    input_formats = self.query_input_formats(encode_GUID)
2878                    codec.update({
2879                                  "presets"         : presets,
2880                                  "profiles"        : profiles,
2881                                  "input-formats"   : input_formats,
2882                                  })
2883                codecs[codec_name] = codec
2884        finally:
2885            free(encode_GUIDs)
2886        log("codecs=%s", csv(codecs.keys()))
2887        return codecs
2888
2889
2890    def open_encode_session(self):
2891        global context_counter, context_gen_counter, last_context_failure
2892        cdef NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params
2893
2894        assert self.functionList is NULL, "session already active"
2895        assert self.context is NULL, "context already set"
2896        assert self.cuda_context_ptr!=NULL, "cuda context is not set"
2897        #params = <NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS*> malloc(sizeof(NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS))
2898        log("open_encode_session() cuda_context=%s, cuda_context_ptr=%#x", self.cuda_device_context, <uintptr_t> self.cuda_context_ptr)
2899
2900        self.functionList = <NV_ENCODE_API_FUNCTION_LIST*> cmalloc(sizeof(NV_ENCODE_API_FUNCTION_LIST), "function list")
2901        assert memset(self.functionList, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST))!=NULL
2902        log("open_encode_session() functionList=%#x", <uintptr_t> self.functionList)
2903
2904        #get NVENC function pointers:
2905        memset(self.functionList, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST))
2906        self.functionList.version = NV_ENCODE_API_FUNCTION_LIST_VER
2907        if DEBUG_API:
2908            log("NvEncodeAPICreateInstance(%#x)", <uintptr_t> self.functionList)
2909        cdef NVENCSTATUS r = NvEncodeAPICreateInstance(<uintptr_t> self.functionList)
2910        raiseNVENC(r, "getting API function list")
2911        assert self.functionList.nvEncOpenEncodeSessionEx!=NULL, "looks like NvEncodeAPICreateInstance failed!"
2912
2913        #NVENC init:
2914        memset(&params, 0, sizeof(NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS))
2915        params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER
2916        params.deviceType = NV_ENC_DEVICE_TYPE_CUDA
2917        params.device = self.cuda_context_ptr
2918        params.reserved = &CLIENT_KEY_GUID
2919        params.apiVersion = NVENCAPI_VERSION
2920        if DEBUG_API:
2921            cstr = <unsigned char*> &params
2922            pstr = cstr[:sizeof(NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS)]
2923            log("calling nvEncOpenEncodeSessionEx @ %#x, NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS=%s", <uintptr_t> self.functionList.nvEncOpenEncodeSessionEx, pstr)
2924        self.context = NULL
2925        with nogil:
2926            r = self.functionList.nvEncOpenEncodeSessionEx(&params, &self.context)
2927        if DEBUG_API:
2928            log("nvEncOpenEncodeSessionEx(..)=%s", r)
2929        if r in OPEN_TRANSIENT_ERROR:
2930            last_context_failure = monotonic()
2931            msg = "could not open encode session: %s" % (nvencStatusInfo(r) or r)
2932            log(msg)
2933            raise TransientCodecException(msg)
2934        if self.context==NULL:
2935            if r!=0:
2936                msg = nvencStatusInfo(r) or str(r)
2937            else:
2938                msg = "context is NULL"
2939            last_context_failure = monotonic()
2940            raise Exception("cannot open encoding session: %s, %i contexts are in use" % (msg, context_counter.get()))
2941        raiseNVENC(r, "opening session")
2942        context_counter.increase()
2943        context_gen_counter.increase()
2944        log("success, encoder context=%#x (%s context%s in use)", <uintptr_t> self.context, context_counter, engs(context_counter))
2945
2946
2947_init_message = False
2948def init_module():
2949    log("nvenc.init_module()")
2950    #TODO: this should be a build time check:
2951    if NVENCAPI_MAJOR_VERSION<0x7:
2952        raise Exception("unsupported version of NVENC: %#x" % NVENCAPI_VERSION)
2953    log("NVENC encoder API version %s", ".".join([str(x) for x in PRETTY_VERSION]))
2954
2955    from xpra.codecs.cuda_common.cuda_context import cuda_device_context, load_device
2956
2957    cdef Encoder test_encoder
2958    #cdef uint32_t max_version
2959    #cdef NVENCSTATUS r = NvEncodeAPIGetMaxSupportedVersion(&max_version)
2960    #raiseNVENC(r, "querying max version")
2961    #log(" maximum supported version: %s", max_version)
2962
2963    #load the library / DLL:
2964    init_nvencode_library()
2965
2966    #make sure we have devices we can use:
2967    devices = init_all_devices()
2968    if len(devices)==0:
2969        log("nvenc: no compatible devices found")
2970        return
2971
2972    success = False
2973    valid_keys = []
2974    failed_keys = []
2975    try_keys = CLIENT_KEYS_STR or [None]
2976    FAILED_ENCODINGS = set()
2977    global YUV444_ENABLED, YUV444_CODEC_SUPPORT, LOSSLESS_ENABLED, ENCODINGS, MAX_SIZE
2978    if not validate_driver_yuv444lossless():
2979        if YUV444_ENABLED:
2980            YUV444_ENABLED = False
2981        if LOSSLESS_ENABLED:
2982            LOSSLESS_ENABLED = False
2983    #check NVENC availibility by creating a context:
2984    device_warnings = {}
2985    log("init_module() will try keys: %s", try_keys)
2986    for client_key in try_keys:
2987        if client_key:
2988            #this will set the global key object used by all encoder contexts:
2989            log("init_module() testing with key '%s'", client_key)
2990            global CLIENT_KEY_GUID
2991            CLIENT_KEY_GUID = c_parseguid(client_key)
2992
2993        for device_id in tuple(devices):
2994            log("testing encoder with device %s", device_id)
2995            device = load_device(device_id)
2996            cdc = cuda_device_context(device_id, device)
2997            with cdc as device_context:
2998                options = typedict({
2999                    "cuda_device"   : device_id,
3000                    "cuda-device-context" : cdc,
3001                    "threaded-init" : False,
3002                    })
3003                try:
3004                    test_encoder = Encoder(cdc)
3005                    test_encoder.init_cuda(device_context)
3006                    log("test encoder=%s", test_encoder)
3007                    test_encoder.open_encode_session()
3008                    log("init_encoder() %s", test_encoder)
3009                    codecs = test_encoder.query_codecs()
3010                    log("device %i supports: %s", device_id, codecs)
3011                except Exception as e:
3012                    log("failed to test encoder with %s", cdc, exc_info=True)
3013                    log.warn(" device %s is not supported: %s", get_device_name(device_id) or device_id, e)
3014                    devices.remove(device_id)
3015                    continue
3016                finally:
3017                    test_encoder.clean()
3018                    test_encoder = None
3019
3020                test_encodings = []
3021                for e in TEST_ENCODINGS:
3022                    if e in FAILED_ENCODINGS:
3023                        continue
3024                    nvenc_encoding_name = {
3025                                           "h264"   : "H264",
3026                                           "h265"   : "HEVC",
3027                                           }.get(e, e)
3028                    codec_query = codecs.get(nvenc_encoding_name)
3029                    if not codec_query:
3030                        wkey = "nvenc:%s-%s" % (device_id, nvenc_encoding_name)
3031                        if first_time(wkey):
3032                            log.warn("Warning: NVENC on device %s:", get_device_name(device_id) or device_id)
3033                            log.warn(" does not support %s", nvenc_encoding_name)
3034                        FAILED_ENCODINGS.add(e)
3035                        continue
3036                    #ensure MAX_SIZE is set:
3037                    cmax = MAX_SIZE.get(e)
3038                    qmax = codec_query.get("max-size")
3039                    if qmax:
3040                        #minimum of current value and value for this device:
3041                        qmx, qmy = qmax
3042                        cmx, cmy = cmax or qmax
3043                        v = min(qmx, cmx), min(qmy, cmy)
3044                        log("max-size(%s)=%s", e, v)
3045                        MAX_SIZE[e] = v
3046                    test_encodings.append(e)
3047
3048                log("will test: %s", test_encodings)
3049                for encoding in test_encodings:
3050                    colorspaces = get_input_colorspaces(encoding)
3051                    assert colorspaces, "cannot use NVENC: no colorspaces available"
3052                    src_format = colorspaces[0]
3053                    options["dst-formats"] = get_output_colorspaces(encoding, src_format)
3054                    test_encoder = None
3055                    try:
3056                        test_encoder = Encoder()
3057                        test_encoder.init_context(encoding, 1920, 1080, src_format, options)
3058                        success = True
3059                        if client_key:
3060                            log("the license key '%s' is valid", client_key)
3061                            valid_keys.append(client_key)
3062                        #check for YUV444 support
3063                        yuv444_support = YUV444_ENABLED and test_encoder.query_encoder_caps(test_encoder.get_codec(), <NV_ENC_CAPS> NV_ENC_CAPS_SUPPORT_YUV444_ENCODE)
3064                        YUV444_CODEC_SUPPORT[encoding] = bool(yuv444_support)
3065                        if YUV444_ENABLED and not yuv444_support:
3066                            wkey = "nvenc:%s-%s-%s" % (device_id, encoding, "YUV444")
3067                            if first_time(wkey):
3068                                device_warnings.setdefault(device_id, {}).setdefault(encoding, []).append("YUV444")
3069                            log("no support for YUV444 with %s", encoding)
3070                        log("%s YUV444 support: %s", encoding, YUV444_CODEC_SUPPORT.get(encoding, YUV444_ENABLED))
3071                        #check for lossless:
3072                        lossless_support = yuv444_support and LOSSLESS_ENABLED and test_encoder.query_encoder_caps(test_encoder.get_codec(), <NV_ENC_CAPS> NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE)
3073                        LOSSLESS_CODEC_SUPPORT[encoding] = lossless_support
3074                        if LOSSLESS_ENABLED and not lossless_support:
3075                            wkey = "nvenc:%s-%s-%s" % (device_id, encoding, "lossless")
3076                            if first_time(wkey):
3077                                device_warnings.setdefault(device_id, {}).setdefault(encoding, []).append("lossless")
3078                            log("no support for lossless mode with %s", encoding)
3079                        log("%s lossless support: %s", encoding, LOSSLESS_CODEC_SUPPORT.get(encoding, LOSSLESS_ENABLED))
3080                    except NVENCException as e:
3081                        log("encoder %s failed: %s", test_encoder, e)
3082                        #special handling for license key issues:
3083                        if e.code==NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY:
3084                            if client_key:
3085                                log("invalid license key '%s' (skipped)", client_key)
3086                                failed_keys.append(client_key)
3087                            else:
3088                                log("a license key is required")
3089                        elif e.code==NV_ENC_ERR_INVALID_VERSION:
3090                            #we can bail out already:
3091                            raise Exception("version mismatch, you need a newer/older codec build or newer/older drivers")
3092                        else:
3093                            #it seems that newer version will fail with
3094                            #seemingly random errors when we supply the wrong key
3095                            log.warn("error during NVENC encoder test: %s", e)
3096                            if client_key:
3097                                log(" license key '%s' may not be valid (skipped)", client_key)
3098                                failed_keys.append(client_key)
3099                            else:
3100                                log(" a license key may be required")
3101                    finally:
3102                        if test_encoder:
3103                            test_encoder.clean()
3104    if device_warnings:
3105        for device_id, encoding_warnings in device_warnings.items():
3106            log.info("NVENC on device %s:", get_device_name(device_id) or device_id)
3107            for encoding, warnings in encoding_warnings.items():
3108                log.info(" %s encoding does not support %s mode", encoding, " or ".join(warnings))
3109    if not devices:
3110        ENCODINGS[:] = []
3111        log.warn("no valid NVENC devices found")
3112        return
3113    if success:
3114        #pick the first valid license key:
3115        if len(valid_keys)>0:
3116            x = valid_keys[0]
3117            log("using the license key '%s'", x)
3118            CLIENT_KEY_GUID = c_parseguid(x)
3119        else:
3120            log("no license keys are required")
3121        ENCODINGS[:] = [x for x in TEST_ENCODINGS if x not in FAILED_ENCODINGS]
3122    else:
3123        #we got license key error(s)
3124        if len(failed_keys)>0:
3125            raise Exception("the license %s specified may be invalid" % (["key", "keys"][len(failed_keys)>1]))
3126        else:
3127            raise Exception("you may need to provide a license key")
3128    global _init_message
3129    if ENCODINGS and not _init_message:
3130        log.info("NVENC v%i successfully initialized with codecs: %s", NVENCAPI_MAJOR_VERSION, csv(ENCODINGS))
3131        _init_message = True
3132
3133
3134def cleanup_module():
3135    log("nvenc.cleanup_module()")
3136    reset_state()
3137
3138def selftest(full=False):
3139    v = get_nvidia_module_version(True)
3140    assert NVENCAPI_MAJOR_VERSION>=7, "unsupported NVENC version %i" % NVENCAPI_MAJOR_VERSION
3141    if v:
3142        NVENC_UNSUPPORTED_DRIVER_VERSION = envbool("XPRA_NVENC_UNSUPPORTED_DRIVER_VERSION", False)
3143        #SDK 7.0 requires version 367 or later
3144        #SDK 7.1 requires version 375 or later
3145        if v<(375, 0):
3146            if not NVENC_UNSUPPORTED_DRIVER_VERSION:
3147                raise ImportError("unsupported NVidia driver version %s\nuse XPRA_NVENC_UNSUPPORTED_DRIVER_VERSION=1 to force enable it" % pver(v))
3148    #this is expensive, so don't run it unless "full" is set:
3149    if full:
3150        from xpra.codecs.codec_checks import get_encoder_max_sizes
3151        from xpra.codecs.nvenc import encoder
3152        init_module()
3153        log.info("%s max dimensions: %s", encoder, get_encoder_max_sizes(encoder))
3154