1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2017-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #ifndef __NVKMS_HEADSURFACE_PRIV_H__
25 #define __NVKMS_HEADSURFACE_PRIV_H__
26 
27 #include "nvkms-types.h"
28 #include "nvkms-headsurface.h"
29 #include "nvkms-headsurface-config.h"
30 #include "nvkms-surface.h"
31 #include "nvkms-utils.h"
32 
33 #include "nvidia-push-init.h"
34 #include "nvidia-3d.h"
35 
36 #include "nv_list.h"
37 
38 /*
39  * This header file defines structures shared by the nvkms-headsurface*.c source
40  * files.  To the rest of nvkms, these structures should be opaque.
41  */
42 
43 #define NVKMS_HEAD_SURFACE_MAX_NOTIFIERS_PER_HEAD 4
44 #define NVKMS_HEAD_SURFACE_MAX_NOTIFIER_SIZE 16
45 #define NVKMS_HEAD_SURFACE_NOTIFIER_BYTES_PER_HEAD \
46     (NVKMS_HEAD_SURFACE_MAX_NOTIFIERS_PER_HEAD *   \
47      NVKMS_HEAD_SURFACE_MAX_NOTIFIER_SIZE)
48 
49 #define NVKMS_HEAD_SURFACE_MAX_FRAME_SEMAPHORES 2
50 
51 #define NVKMS_HEAD_SURFACE_SEMAPHORE_BYTES_PER_HEAD \
52     (sizeof(NvGpuSemaphore) * NVKMS_HEAD_SURFACE_MAX_FRAME_SEMAPHORES)
53 
54 #define NVKMS_HEAD_SURFACE_FRAME_SEMAPHORE_DISPLAYABLE    0xFFFFFFFF
55 #define NVKMS_HEAD_SURFACE_FRAME_SEMAPHORE_RENDERABLE     0x11111111
56 
57 /*
58  * XXX NVKMS HEADSURFACE TODO: HeadSurface uses both notifiers and semaphores
59  * for synchronization:
60  *
61  * - Notifiers to ensure the CPU waits until after the previous frame's flip
62  *   completes before starting the next frame.
63  *
64  * - Semaphores to ensure the flip to the next frame is not started until the
65  *   rendering for the next frame completes.
66  *
67  * We should simplify things by using semaphores for both.
68  */
69 typedef struct _NVHsNotifiersOneSdRec {
70     NvU8 notifier
71         [NVKMS_MAX_HEADS_PER_DISP][NVKMS_HEAD_SURFACE_NOTIFIER_BYTES_PER_HEAD];
72     NvU8 semaphore
73         [NVKMS_MAX_HEADS_PER_DISP][NVKMS_HEAD_SURFACE_SEMAPHORE_BYTES_PER_HEAD];
74 } NVHsNotifiersOneSdRec;
75 
76 #define NVKMS_HEAD_SURFACE_NOTIFIERS_SIZE_IN_BYTES 4096
77 
78 ct_assert(NVKMS_HEAD_SURFACE_NOTIFIERS_SIZE_IN_BYTES >=
79           sizeof(NVHsNotifiersOneSdRec));
80 
81 typedef struct _NVHsNotifiersRec {
82 
83     NvU32 rmHandle;
84     NvKmsSurfaceHandle nvKmsHandle;
85     const NVSurfaceEvoRec *pSurfaceEvo;
86 
87     struct {
88         NVHsNotifiersOneSdRec *ptr;
89         struct {
90             NvU8 nextSlot;
91         } apiHead[NVKMS_MAX_HEADS_PER_DISP];
92     } sd[NVKMS_MAX_SUBDEVICES];
93 
94     enum NvKmsNIsoFormat nIsoFormat;
95 
96 } NVHsNotifiersRec;
97 
98 typedef struct _NVHsSurfaceRec {
99 
100     NvKmsSurfaceHandle nvKmsHandle;
101     NvU32 rmHandle;
102 
103     Nv3dBlockLinearLog2GobsPerBlock gobsPerBlock;
104 
105     const NVSurfaceEvoRec *pSurfaceEvo;
106 
107 } NVHsSurfaceRec;
108 
109 typedef struct _NVHsDeviceEvoRec {
110 
111     NVDevEvoRec *pDevEvo;
112 
113     NvU32 gpuVASpace;
114 
115     struct {
116         Nv3dDeviceRec device;
117     } nv3d;
118 
119     NVHsNotifiersRec notifiers;
120 
121 } NVHsDeviceEvoRec;
122 
123 enum NVHsChannelTexInfoEnum {
124     NVKMS_HEADSURFACE_TEXINFO_SRC     = 0,
125     /* XXX NVKMS HEADSURFACE TODO: enable all the below  */
126     NVKMS_HEADSURFACE_TEXINFO_CURSOR  = 1,
127     NVKMS_HEADSURFACE_TEXINFO_BLEND   = 2,
128     NVKMS_HEADSURFACE_TEXINFO_OFFSET  = 3,
129     NVKMS_HEADSURFACE_TEXINFO_OVERLAY = 4,
130     /* NVKMS_HEADSURFACE_TEXINFO_LUT     = 5, */
131     NVKMS_HEADSURFACE_TEXINFO_NUM,
132 };
133 
134 typedef struct _NVHsChannelStatisticsOneEyeRec {
135     /* Running total of the number of frames rendered by headSurface. */
136     NvU64 nFrames;
137 
138     /* Running total of the GPU time spent rendering, in nanoseconds. */
139     NvU64 gpuTimeSpent;
140 
141     /* We compute the FPS for 5 second periods. */
142     struct {
143         /*
144          * Running total of the number of frames rendered by headSurface; reset
145          * every 5 seconds.
146          */
147         NvU64 nFrames;
148         /*
149          * The time, in nanoseconds, when this FPS period started, so we know
150          * when the 5 second period is done.
151          */
152         NvU64 startTime;
153         /*
154          * Most recently computed FPS for the last 5 second period.
155          */
156         NvU64 framesPerMs;
157     } fps;
158 } NVHsChannelStatisticsOneEyeRec;
159 
160 typedef struct _NVHsChannelFlipQueueEntry {
161     NVListRec flipQueueEntry;
162     NVHsLayerRequestedFlipState hwState;
163 } NVHsChannelFlipQueueEntry;
164 
165 typedef struct _NVHsChannelEvoRec {
166 
167     NVDispEvoRec *pDispEvo;
168 
169     NvU32 apiHead;
170 
171     struct {
172         NvPushChannelRec channel;
173         NvU32 handlePool[NV_PUSH_CHANNEL_HANDLE_POOL_NUM];
174     } nvPush;
175 
176     struct {
177         NvU32 handle;
178         Nv3dChannelRec channel;
179         Nv3dRenderTexInfo texInfo[NVKMS_HEADSURFACE_TEXINFO_NUM];
180     } nv3d;
181 
182     struct {
183         NvU32 handle;
184     } nv2d;
185 
186     /*
187      * Flip request parameters are too large to declare on the stack.  We
188      * preallocate them here so that we don't have to allocate and free them on
189      * every headSurface flip.
190      */
191     struct NvKmsFlipRequestOneHead scratchParams;
192 
193     /*
194      * The index into NVDevEvoRec::apiHeadSurfaceAllDisps[apiHead]::surface[] to use
195      * for the next frame of headSurface.
196      */
197     NvU8 nextIndex;
198 
199     /*
200      * When neededForSwapGroup is true, frames of headSurface are rendered to
201      * alternating offsets within double-sized headSurface surfaces.  nextOffset
202      * is either 0 or 1, to select the offset of the next headSurface frame.
203      */
204     NvU8 nextOffset;
205 
206     /*
207      * HeadSurface flips are semaphore interlocked with headSurface rendering.
208      * We need to use a different semaphore offset for subsequent flips.
209      * frameSemaphoreIndex is used to alternate between
210      * NVKMS_HEAD_SURFACE_MAX_FRAME_SEMAPHORES offsets.
211      */
212     NvU8 frameSemaphoreIndex;
213 
214     NVHsChannelConfig config;
215 
216     NVVBlankCallbackPtr vBlankCallback;
217 
218     /*
219      * NVHsChannelEvoRec keeps a list of flip queue entries, and the "current"
220      * entry.  NVHsChannelFlipQueueEntry is a single entry in the flip queue.
221      *
222      * Each entry describes a validated flip request.  When NVKMS is called to
223      * build the next frame of headSurface, it inspects if the next entry in the
224      * queue is ready to flip (e.g., any semaphore acquires have been
225      * satisfied).  If the next flip queue entry is ready, we use it to replace
226      * the current entry.  Otherwise, we continue to use the existing current
227      * entry.
228      *
229      * Surfaces within an NVHsChannelFlipQueueEntry have their reference counts:
230      *
231      * - incremented when the NVHsChannelFlipQueueEntry is added to the flip
232      *   queue.
233      *
234      * - decremented when the NVHsChannelFlipQueueEntry is removed from current
235      *   (i.e., when we do the equivalent of "flip away").
236      *
237      * To simulate EVO/NVDisplay semaphore behavior, if an
238      * NVHsChannelFlipQueueEntry specifies a semaphore:
239      *
240      * - We wait for the semaphore's acquire value to be reached before
241      *   promoting the entry from the flip queue to current.
242      *
243      * - We write the semaphore's release value when the
244      *   NVHsChannelFlipQueueEntry is removed from current (i.e., when we do the
245      *   equivalent of "flip away").
246      */
247 
248     struct {
249         NVHsLayerRequestedFlipState current;
250         NVListRec queue;
251     } flipQueue[NVKMS_MAX_LAYERS_PER_HEAD];
252 
253     /*
254      * This cached main layer surface needed when the main layer transitioning
255      * out of headSurface due to exiting a swapgroup. I.e. in this path:
256      *     nvHsConfigStop() => HsConfigRestoreMainLayerSurface()
257      */
258     struct {
259         NVSurfaceEvoPtr pSurfaceEvo[NVKMS_MAX_EYES];
260     } flipQueueMainLayerState;
261 
262     NvU64 lastCallbackUSec;
263 
264     /*
265      * For NVKMS headsurface swap groups, at some point after the flip has been
266      * issued, NVKMS needs to check the notifier associated with that flip to
267      * see if the flip has been completed and release the deferred request
268      * fifo entry associated with that flip.  This bool reflects whether that
269      * check is done during the headsurface vblank interrupt callback or later
270      * during the RG line 1 interrupt callback.
271      */
272     NvBool usingRgIntrForSwapGroups;
273 
274     /*
275      * Pointer to the RG line interrupt callback object. This is needed to
276      * enabled and disable the RG interrupt callback.
277      */
278     NVRgLine1CallbackPtr pRgIntrCallback;
279 
280 #if NVKMS_PROCFS_ENABLE
281 
282     /*
283      * We track statistics differently for SwapGroup and non-SwapGroup
284      * headSurface; abstract the grouping into "slots".  For non-SwapGroup there
285      * is only one rendered frame (one "slot").  For SwapGroup, there are three
286      * different rendered frames (so three "slots").
287      */
288 #define NVKMS_HEADSURFACE_STATS_MAX_SLOTS 3
289 
290 #define NVKMS_HEADSURFACE_STATS_SEMAPHORE_BEFORE 0
291 #define NVKMS_HEADSURFACE_STATS_SEMAPHORE_AFTER  1
292 
293     /*
294      * One semaphore before the frame, and one semaphore after the frame.
295      */
296 #define NVKMS_HEAD_SURFACE_STATS_SEMAPHORE_STAGE_COUNT 2
297 
298     /*
299      * We need semaphores for each stereo eye for each "slot".
300      */
301 #define NVKMS_HEADSURFACE_STATS_MAX_SEMAPHORES        \
302     (NVKMS_HEAD_SURFACE_STATS_SEMAPHORE_STAGE_COUNT * \
303      NVKMS_MAX_EYES *                                 \
304      NVKMS_HEADSURFACE_STATS_MAX_SLOTS)
305 
306     struct {
307 
308         NVHsChannelStatisticsOneEyeRec
309             perEye[NVKMS_MAX_EYES][NVKMS_HEADSURFACE_STATS_MAX_SLOTS];
310 
311         /* How often we were called back before the previous frame was done. */
312         NvU64 nPreviousFrameNotDone;
313 
314         /* How often we did not update HS backbuffer with non-sg content. */
315         NvU64 nOmittedNonSgHsUpdates;
316 
317         /* How often did we have fullscreen swapgroup, and didn't. */
318         NvU64 nFullscreenSgFrames;
319         NvU64 nNonFullscreenSgFrames;
320 
321         /*
322          * Statistics on which Display Memory Interface (DMI) scanline we are on
323          * when headSurface is called.
324          *
325          * pHistogram is a dynamically allocated array of counts.  The array has
326          * vVisible + 1 elements (the +1 is because the hardware-reported
327          * scanline values are in the inclusive range [0,vVisible]).  Each
328          * element contains how many times we've been called back while on that
329          * scanline.
330          *
331          * When in the blanking region, there isn't a DMI scanline.  We
332          * increment n{,Not}InBlankingPeriod to keep track of how often we are
333          * called back while in the blanking region.
334          */
335         struct {
336             NvU64 *pHistogram; /* array with vVisible elements */
337             NvU16 vVisible;
338             NvU64 nInBlankingPeriod;
339             NvU64 nNotInBlankingPeriod;
340         } scanLine;
341 
342     } statistics;
343 #else
344 #define NVKMS_HEADSURFACE_STATS_MAX_SEMAPHORES 0
345 #endif /* NVKMS_PROCFS_ENABLE */
346 
347     /*
348      * We need one semaphore for the non-stall interrupt following rendering to
349      * the next viewport offset with swapgroups enabled.
350      */
351 #define NVKMS_HEADSURFACE_VIEWPORT_OFFSET_SEMAPHORE_INDEX \
352     NVKMS_HEADSURFACE_STATS_MAX_SEMAPHORES
353 
354 #define NVKMS_HEADSURFACE_MAX_SEMAPHORES \
355     (NVKMS_HEADSURFACE_VIEWPORT_OFFSET_SEMAPHORE_INDEX + 1)
356 
357     /*
358      * Whether this channel has kicked off rendering to a new viewport offset
359      * for non-swapgroup content updates, but hasn't yet kicked off the
360      * viewport flip to the new offset.  Used to prevent rendering a new
361      * frame if rendering the previous frame took longer than a full frame of
362      * scanout.
363      */
364     NvBool viewportFlipPending;
365 
366     /*
367      * Recorded timestamp of the last headsurface flip. Used for deciding if
368      * certain blits to the headsurface can be omitted.
369      */
370     NvU64 lastHsClientFlipTimeUs;
371 
372     /*
373      * If this channel has kicked off a real flip while swapgroups were active,
374      * mark this channel as using real flips instead of blits for swapgroups,
375      * don't fast forward through headsurface flips (since every flip needs to
376      * be kicked off with every swapgroup ready event), and skip the part of
377      * the RG interrupt that would update non-swapgroup content.
378      */
379     NvBool swapGroupFlipping;
380 
381 } NVHsChannelEvoRec;
382 
Hs3dStatisticsGetSlot(const NVHsChannelEvoRec * pHsChannel,const NvHsNextFrameRequestType requestType,const NvU8 dstBufferIndex,const NvBool honorSwapGroupClipList)383 static inline NvU8 Hs3dStatisticsGetSlot(
384     const NVHsChannelEvoRec *pHsChannel,
385     const NvHsNextFrameRequestType requestType,
386     const NvU8 dstBufferIndex,
387     const NvBool honorSwapGroupClipList)
388 {
389     if (pHsChannel->config.neededForSwapGroup) {
390         switch (requestType) {
391         case NV_HS_NEXT_FRAME_REQUEST_TYPE_FIRST_FRAME:
392             /*
393              * SwapGroup FIRST_FRAME will render to pHsChannel->nextIndex with
394              * honorSwapGroupClipList==false.
395              */
396             nvAssert(dstBufferIndex < 2);
397             return dstBufferIndex;
398         case NV_HS_NEXT_FRAME_REQUEST_TYPE_VBLANK:
399             /*
400              * SwapGroup VBLANK fully populates the nextIndex buffer
401              * (honorSwapGroupClipList==false), and only populates the
402              * non-swapgroup regions of the current index.
403              */
404             return honorSwapGroupClipList ? 0 : 1;
405         case NV_HS_NEXT_FRAME_REQUEST_TYPE_SWAP_GROUP_READY:
406             return 2;
407         }
408     }
409 
410     return 0; /* non-SwapGroup always only uses slot 0 */
411 }
412 
413 /*!
414  * Get the offset, in words, of the frame semaphore within NVHsNotifiersOneSdRec
415  * that corresponds to (head, frameSemaphoreIndex).
416  */
HsGetFrameSemaphoreOffsetInWords(const NVHsChannelEvoRec * pHsChannel)417 static inline NvU16 HsGetFrameSemaphoreOffsetInWords(
418     const NVHsChannelEvoRec *pHsChannel)
419 {
420     const NvU16 semBase =
421         offsetof(NVHsNotifiersOneSdRec, semaphore[pHsChannel->apiHead]);
422     const NvU16 semOffset = sizeof(NvGpuSemaphore) *
423         pHsChannel->frameSemaphoreIndex;
424 
425     const NvU16 offsetInBytes = semBase + semOffset;
426 
427     /*
428      * NVHsNotifiersOneSdRec::semaphore should be word-aligned, and
429      * sizeof(NvGpuSemaphore) is a multiple of words, so the offset to any
430      * NvGpuSemaphore within the array should be word-aligned.
431      */
432     nvAssert((offsetInBytes % 4) == 0);
433 
434     return offsetInBytes / 4;
435 }
436 
HsIncrementFrameSemaphoreIndex(NVHsChannelEvoRec * pHsChannel)437 static inline void HsIncrementFrameSemaphoreIndex(
438     NVHsChannelEvoRec *pHsChannel)
439 {
440     pHsChannel->frameSemaphoreIndex++;
441     pHsChannel->frameSemaphoreIndex %= NVKMS_HEAD_SURFACE_MAX_FRAME_SEMAPHORES;
442 }
443 
HsGetPreviousOffset(const NVHsChannelEvoRec * pHsChannel)444 static inline NvU8 HsGetPreviousOffset(
445     const NVHsChannelEvoRec *pHsChannel)
446 {
447     nvAssert(pHsChannel->config.neededForSwapGroup);
448 
449     nvAssert(pHsChannel->config.surfaceSize.height ==
450              (pHsChannel->config.frameSize.height * 2));
451 
452     return A_minus_b_with_wrap_U8(pHsChannel->nextOffset, 1,
453                                   NVKMS_HEAD_SURFACE_MAX_BUFFERS);
454 }
455 
HsIncrementNextOffset(const NVHsDeviceEvoRec * pHsDevice,NVHsChannelEvoRec * pHsChannel)456 static inline void HsIncrementNextOffset(
457     const NVHsDeviceEvoRec *pHsDevice,
458     NVHsChannelEvoRec *pHsChannel)
459 {
460     nvAssert(pHsChannel->config.neededForSwapGroup);
461 
462     nvAssert(pHsChannel->config.surfaceSize.height ==
463              (pHsChannel->config.frameSize.height * 2));
464 
465     pHsChannel->nextOffset++;
466     pHsChannel->nextOffset %= 2;
467 }
468 
HsIncrementNextIndex(const NVHsDeviceEvoRec * pHsDevice,NVHsChannelEvoRec * pHsChannel)469 static inline void HsIncrementNextIndex(
470     const NVHsDeviceEvoRec *pHsDevice,
471     NVHsChannelEvoRec *pHsChannel)
472 {
473     const NVDevEvoRec *pDevEvo = pHsDevice->pDevEvo;
474     const NvU32 surfaceCount =
475         pDevEvo->apiHeadSurfaceAllDisps[pHsChannel->apiHead].surfaceCount;
476 
477     nvAssert(surfaceCount > 0);
478 
479     pHsChannel->nextIndex++;
480     pHsChannel->nextIndex %= surfaceCount;
481 }
482 
HsChangeSurfaceFlipRefCount(NVDevEvoPtr pDevEvo,NVSurfaceEvoPtr pSurfaceEvo,NvBool increase)483 static inline void HsChangeSurfaceFlipRefCount(
484     NVDevEvoPtr pDevEvo,
485     NVSurfaceEvoPtr pSurfaceEvo,
486     NvBool increase)
487 {
488     if (pSurfaceEvo != NULL) {
489         if (increase) {
490             nvEvoIncrementSurfaceRefCnts(pSurfaceEvo);
491         } else {
492             nvEvoDecrementSurfaceRefCnts(pDevEvo, pSurfaceEvo);
493         }
494     }
495 }
496 
497 /*!
498  * Get the last NVHsLayerRequestedFlipState entry in the pHsChannel's flip queue for
499  * the specified layer.
500  *
501  * If the flip queue is empty, return the 'current' entry.  Otherwise, return
502  * the most recently queued entry.
503  *
504  * This function cannot fail.
505  */
HsGetLastFlipQueueEntry(const NVHsChannelEvoRec * pHsChannel,const NvU8 layer)506 static inline const NVHsLayerRequestedFlipState *HsGetLastFlipQueueEntry(
507     const NVHsChannelEvoRec *pHsChannel,
508     const NvU8 layer)
509 {
510     const NVListRec *pFlipQueue = &pHsChannel->flipQueue[layer].queue;
511     const NVHsChannelFlipQueueEntry *pEntry;
512 
513     /*
514      * XXX NVKMS HEADSURFACE TODO: use nvListIsEmpty() once bugfix_main is
515      * updated to make nvListIsEmpty()'s argument const; see changelist
516      * 23614050.
517      *
518      * if (nvListIsEmpty(pFlipQueue)) {
519      */
520     if (pFlipQueue->next == pFlipQueue) {
521         return &pHsChannel->flipQueue[layer].current;
522     }
523 
524     pEntry = nvListLastEntry(pFlipQueue,
525                              NVHsChannelFlipQueueEntry,
526                              flipQueueEntry);
527 
528     return &pEntry->hwState;
529 }
530 
531 #endif /* __NVKMS_HEADSURFACE_PRIV_H__ */
532