1/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "src/gpu/mtl/GrMtlResourceProvider.h"
9
10#include "include/gpu/GrContextOptions.h"
11#include "src/gpu/GrContextPriv.h"
12#include "src/gpu/mtl/GrMtlCommandBuffer.h"
13#include "src/gpu/mtl/GrMtlGpu.h"
14#include "src/gpu/mtl/GrMtlPipelineState.h"
15#include "src/gpu/mtl/GrMtlUtil.h"
16
17#include "src/sksl/SkSLCompiler.h"
18
19#if !__has_feature(objc_arc)
20#error This file must be compiled with Arc. Use -fobjc-arc flag
21#endif
22
23GrMtlResourceProvider::GrMtlResourceProvider(GrMtlGpu* gpu)
24    : fGpu(gpu) {
25    fPipelineStateCache.reset(new PipelineStateCache(gpu));
26    fBufferSuballocator.reset(new BufferSuballocator(gpu->device(), kBufferSuballocatorStartSize));
27    // TODO: maxBufferLength seems like a reasonable metric to determine fBufferSuballocatorMaxSize
28    // but may need tuning. Might also need a GrContextOption to let the client set this.
29#ifdef SK_BUILD_FOR_MAC
30    int64_t maxBufferLength = 1024*1024*1024;
31#else
32    int64_t maxBufferLength = 256*1024*1024;
33#endif
34    if (@available(iOS 12, macOS 10.14, *)) {
35       maxBufferLength = gpu->device().maxBufferLength;
36    }
37    fBufferSuballocatorMaxSize = maxBufferLength/16;
38}
39
40GrMtlPipelineState* GrMtlResourceProvider::findOrCreateCompatiblePipelineState(
41        GrRenderTarget* renderTarget,
42        const GrProgramInfo& programInfo,
43        GrPrimitiveType primitiveType) {
44    return fPipelineStateCache->refPipelineState(renderTarget, programInfo, primitiveType);
45}
46
47////////////////////////////////////////////////////////////////////////////////////////////////
48
49GrMtlDepthStencil* GrMtlResourceProvider::findOrCreateCompatibleDepthStencilState(
50        const GrStencilSettings& stencil, GrSurfaceOrigin origin) {
51    GrMtlDepthStencil* depthStencilState;
52    GrMtlDepthStencil::Key key = GrMtlDepthStencil::GenerateKey(stencil, origin);
53    depthStencilState = fDepthStencilStates.find(key);
54    if (!depthStencilState) {
55        depthStencilState = GrMtlDepthStencil::Create(fGpu, stencil, origin);
56        fDepthStencilStates.add(depthStencilState);
57    }
58    SkASSERT(depthStencilState);
59    return depthStencilState;
60}
61
62GrMtlSampler* GrMtlResourceProvider::findOrCreateCompatibleSampler(const GrSamplerState& params) {
63    GrMtlSampler* sampler;
64    sampler = fSamplers.find(GrMtlSampler::GenerateKey(params));
65    if (!sampler) {
66        sampler = GrMtlSampler::Create(fGpu, params);
67        fSamplers.add(sampler);
68    }
69    SkASSERT(sampler);
70    return sampler;
71}
72
73void GrMtlResourceProvider::destroyResources() {
74    // Iterate through all stored GrMtlSamplers and unref them before resetting the hash.
75    SkTDynamicHash<GrMtlSampler, GrMtlSampler::Key>::Iter samplerIter(&fSamplers);
76    for (; !samplerIter.done(); ++samplerIter) {
77        (*samplerIter).unref();
78    }
79    fSamplers.reset();
80
81    // Iterate through all stored GrMtlDepthStencils and unref them before resetting the hash.
82    SkTDynamicHash<GrMtlDepthStencil, GrMtlDepthStencil::Key>::Iter dsIter(&fDepthStencilStates);
83    for (; !dsIter.done(); ++dsIter) {
84        (*dsIter).unref();
85    }
86    fDepthStencilStates.reset();
87
88    fPipelineStateCache->release();
89}
90
91////////////////////////////////////////////////////////////////////////////////////////////////
92
93#ifdef GR_PIPELINE_STATE_CACHE_STATS
94// Display pipeline state cache usage
95static const bool c_DisplayMtlPipelineCache{false};
96#endif
97
98struct GrMtlResourceProvider::PipelineStateCache::Entry {
99    Entry(GrMtlGpu* gpu, GrMtlPipelineState* pipelineState)
100    : fGpu(gpu)
101    , fPipelineState(pipelineState) {}
102
103    GrMtlGpu* fGpu;
104    std::unique_ptr<GrMtlPipelineState> fPipelineState;
105};
106
107GrMtlResourceProvider::PipelineStateCache::PipelineStateCache(GrMtlGpu* gpu)
108    : fMap(gpu->getContext()->priv().options().fRuntimeProgramCacheSize)
109    , fGpu(gpu)
110#ifdef GR_PIPELINE_STATE_CACHE_STATS
111    , fTotalRequests(0)
112    , fCacheMisses(0)
113#endif
114{}
115
116GrMtlResourceProvider::PipelineStateCache::~PipelineStateCache() {
117    SkASSERT(0 == fMap.count());
118    // dump stats
119#ifdef GR_PIPELINE_STATE_CACHE_STATS
120    if (c_DisplayMtlPipelineCache) {
121        SkDebugf("--- Pipeline State Cache ---\n");
122        SkDebugf("Total requests: %d\n", fTotalRequests);
123        SkDebugf("Cache misses: %d\n", fCacheMisses);
124        SkDebugf("Cache miss %%: %f\n", (fTotalRequests > 0) ?
125                 100.f * fCacheMisses / fTotalRequests :
126                 0.f);
127        SkDebugf("---------------------\n");
128    }
129#endif
130}
131
132void GrMtlResourceProvider::PipelineStateCache::release() {
133    fMap.reset();
134}
135
136GrMtlPipelineState* GrMtlResourceProvider::PipelineStateCache::refPipelineState(
137        GrRenderTarget* renderTarget,
138        const GrProgramInfo& programInfo,
139        GrPrimitiveType primType) {
140#ifdef GR_PIPELINE_STATE_CACHE_STATS
141    ++fTotalRequests;
142#endif
143
144    // TODO: unify GL, VK and Mtl
145    // Get GrMtlProgramDesc
146    GrMtlPipelineStateBuilder::Desc desc;
147    if (!GrMtlPipelineStateBuilder::Desc::Build(&desc, renderTarget, programInfo, primType, fGpu)) {
148        GrCapsDebugf(fGpu->caps(), "Failed to build mtl program descriptor!\n");
149        return nullptr;
150    }
151
152    std::unique_ptr<Entry>* entry = fMap.find(desc);
153    if (!entry) {
154#ifdef GR_PIPELINE_STATE_CACHE_STATS
155        ++fCacheMisses;
156#endif
157        GrMtlPipelineState* pipelineState(GrMtlPipelineStateBuilder::CreatePipelineState(
158            fGpu, renderTarget, programInfo, &desc));
159        if (!pipelineState) {
160            return nullptr;
161        }
162        entry = fMap.insert(desc, std::unique_ptr<Entry>(new Entry(fGpu, pipelineState)));
163        return (*entry)->fPipelineState.get();
164    }
165    return (*entry)->fPipelineState.get();
166}
167
168////////////////////////////////////////////////////////////////////////////////////////////////
169
170static id<MTLBuffer> alloc_dynamic_buffer(id<MTLDevice> device, size_t size) {
171    NSUInteger options = 0;
172    if (@available(macOS 10.11, iOS 9.0, *)) {
173#ifdef SK_BUILD_FOR_MAC
174        options |= MTLResourceStorageModeManaged;
175#else
176        options |= MTLResourceStorageModeShared;
177#endif
178    }
179    return [device newBufferWithLength: size
180                               options: options];
181
182}
183
184// The idea here is that we create a ring buffer which is used for all dynamic allocations
185// below a certain size. When a dynamic GrMtlBuffer is mapped, it grabs a portion of this
186// buffer and uses it. On a subsequent map it will grab a different portion of the buffer.
187// This prevents the buffer from overwriting itself before it's submitted to the command
188// stream.
189
190GrMtlResourceProvider::BufferSuballocator::BufferSuballocator(id<MTLDevice> device, size_t size)
191        : fBuffer(alloc_dynamic_buffer(device, size))
192        , fTotalSize(size)
193        , fHead(0)
194        , fTail(0) {
195    // We increment fHead and fTail without bound and let overflow handle any wrapping.
196    // Because of this, size needs to be a power of two.
197    SkASSERT(SkIsPow2(size));
198}
199
200id<MTLBuffer> GrMtlResourceProvider::BufferSuballocator::getAllocation(size_t size,
201                                                                       size_t* offset) {
202    // capture current state locally (because fTail could be overwritten by the completion handler)
203    size_t head, tail;
204    SkAutoSpinlock lock(fMutex);
205    head = fHead;
206    tail = fTail;
207
208    // The head and tail indices increment without bound, wrapping with overflow,
209    // so we need to mod them down to the actual bounds of the allocation to determine
210    // which blocks are available.
211    size_t modHead = head & (fTotalSize - 1);
212    size_t modTail = tail & (fTotalSize - 1);
213
214    bool full = (head != tail && modHead == modTail);
215
216
217    // We don't want large allocations to eat up this buffer, so we allocate them separately.
218    if (full || size > fTotalSize/2) {
219        return nil;
220    }
221
222    // case 1: free space lies at the beginning and/or the end of the buffer
223    if (modHead >= modTail) {
224        // check for room at the end
225        if (fTotalSize - modHead < size) {
226            // no room at the end, check the beginning
227            if (modTail < size) {
228                // no room at the beginning
229                return nil;
230            }
231            // we are going to allocate from the beginning, adjust head to '0' position
232            head += fTotalSize - modHead;
233            modHead = 0;
234        }
235    // case 2: free space lies in the middle of the buffer, check for room there
236    } else if (modTail - modHead < size) {
237        // no room in the middle
238        return nil;
239    }
240
241    *offset = modHead;
242    // We're not sure what the usage of the next allocation will be --
243    // to be safe we'll use 16 byte alignment.
244    fHead = GrSizeAlignUp(head + size, 16);
245    return fBuffer;
246}
247
248void GrMtlResourceProvider::BufferSuballocator::addCompletionHandler(
249        GrMtlCommandBuffer* cmdBuffer) {
250    this->ref();
251    SkAutoSpinlock lock(fMutex);
252    size_t newTail = fHead;
253    cmdBuffer->addCompletedHandler(^(id <MTLCommandBuffer>commandBuffer) {
254        // Make sure SkAutoSpinlock goes out of scope before
255        // the BufferSuballocator is potentially deleted.
256        {
257            SkAutoSpinlock lock(fMutex);
258            fTail = newTail;
259        }
260        this->unref();
261    });
262}
263
264id<MTLBuffer> GrMtlResourceProvider::getDynamicBuffer(size_t size, size_t* offset) {
265    id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset);
266    if (buffer) {
267        return buffer;
268    }
269
270    // Try to grow allocation (old allocation will age out).
271    // We grow up to a maximum size, and only grow if the requested allocation will
272    // fit into half of the new buffer (to prevent very large transient buffers forcing
273    // growth when they'll never fit anyway).
274    if (fBufferSuballocator->size() < fBufferSuballocatorMaxSize &&
275        size <= fBufferSuballocator->size()) {
276        fBufferSuballocator.reset(new BufferSuballocator(fGpu->device(),
277                                                         2*fBufferSuballocator->size()));
278        id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset);
279        if (buffer) {
280            return buffer;
281        }
282    }
283
284    *offset = 0;
285    return alloc_dynamic_buffer(fGpu->device(), size);
286}
287
288void GrMtlResourceProvider::addBufferCompletionHandler(GrMtlCommandBuffer* cmdBuffer) {
289    fBufferSuballocator->addCompletionHandler(cmdBuffer);
290}
291