1/* 2 * Copyright 2018 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "src/gpu/mtl/GrMtlResourceProvider.h" 9 10#include "include/gpu/GrContextOptions.h" 11#include "src/gpu/GrContextPriv.h" 12#include "src/gpu/mtl/GrMtlCommandBuffer.h" 13#include "src/gpu/mtl/GrMtlGpu.h" 14#include "src/gpu/mtl/GrMtlPipelineState.h" 15#include "src/gpu/mtl/GrMtlUtil.h" 16 17#include "src/sksl/SkSLCompiler.h" 18 19#if !__has_feature(objc_arc) 20#error This file must be compiled with Arc. Use -fobjc-arc flag 21#endif 22 23GrMtlResourceProvider::GrMtlResourceProvider(GrMtlGpu* gpu) 24 : fGpu(gpu) { 25 fPipelineStateCache.reset(new PipelineStateCache(gpu)); 26 fBufferSuballocator.reset(new BufferSuballocator(gpu->device(), kBufferSuballocatorStartSize)); 27 // TODO: maxBufferLength seems like a reasonable metric to determine fBufferSuballocatorMaxSize 28 // but may need tuning. Might also need a GrContextOption to let the client set this. 29#ifdef SK_BUILD_FOR_MAC 30 int64_t maxBufferLength = 1024*1024*1024; 31#else 32 int64_t maxBufferLength = 256*1024*1024; 33#endif 34 if (@available(iOS 12, macOS 10.14, *)) { 35 maxBufferLength = gpu->device().maxBufferLength; 36 } 37 fBufferSuballocatorMaxSize = maxBufferLength/16; 38} 39 40GrMtlPipelineState* GrMtlResourceProvider::findOrCreateCompatiblePipelineState( 41 GrRenderTarget* renderTarget, 42 const GrProgramInfo& programInfo, 43 GrPrimitiveType primitiveType) { 44 return fPipelineStateCache->refPipelineState(renderTarget, programInfo, primitiveType); 45} 46 47//////////////////////////////////////////////////////////////////////////////////////////////// 48 49GrMtlDepthStencil* GrMtlResourceProvider::findOrCreateCompatibleDepthStencilState( 50 const GrStencilSettings& stencil, GrSurfaceOrigin origin) { 51 GrMtlDepthStencil* depthStencilState; 52 GrMtlDepthStencil::Key key = GrMtlDepthStencil::GenerateKey(stencil, origin); 53 depthStencilState = fDepthStencilStates.find(key); 54 if (!depthStencilState) { 55 depthStencilState = GrMtlDepthStencil::Create(fGpu, stencil, origin); 56 fDepthStencilStates.add(depthStencilState); 57 } 58 SkASSERT(depthStencilState); 59 return depthStencilState; 60} 61 62GrMtlSampler* GrMtlResourceProvider::findOrCreateCompatibleSampler(const GrSamplerState& params) { 63 GrMtlSampler* sampler; 64 sampler = fSamplers.find(GrMtlSampler::GenerateKey(params)); 65 if (!sampler) { 66 sampler = GrMtlSampler::Create(fGpu, params); 67 fSamplers.add(sampler); 68 } 69 SkASSERT(sampler); 70 return sampler; 71} 72 73void GrMtlResourceProvider::destroyResources() { 74 // Iterate through all stored GrMtlSamplers and unref them before resetting the hash. 75 SkTDynamicHash<GrMtlSampler, GrMtlSampler::Key>::Iter samplerIter(&fSamplers); 76 for (; !samplerIter.done(); ++samplerIter) { 77 (*samplerIter).unref(); 78 } 79 fSamplers.reset(); 80 81 // Iterate through all stored GrMtlDepthStencils and unref them before resetting the hash. 82 SkTDynamicHash<GrMtlDepthStencil, GrMtlDepthStencil::Key>::Iter dsIter(&fDepthStencilStates); 83 for (; !dsIter.done(); ++dsIter) { 84 (*dsIter).unref(); 85 } 86 fDepthStencilStates.reset(); 87 88 fPipelineStateCache->release(); 89} 90 91//////////////////////////////////////////////////////////////////////////////////////////////// 92 93#ifdef GR_PIPELINE_STATE_CACHE_STATS 94// Display pipeline state cache usage 95static const bool c_DisplayMtlPipelineCache{false}; 96#endif 97 98struct GrMtlResourceProvider::PipelineStateCache::Entry { 99 Entry(GrMtlGpu* gpu, GrMtlPipelineState* pipelineState) 100 : fGpu(gpu) 101 , fPipelineState(pipelineState) {} 102 103 GrMtlGpu* fGpu; 104 std::unique_ptr<GrMtlPipelineState> fPipelineState; 105}; 106 107GrMtlResourceProvider::PipelineStateCache::PipelineStateCache(GrMtlGpu* gpu) 108 : fMap(gpu->getContext()->priv().options().fRuntimeProgramCacheSize) 109 , fGpu(gpu) 110#ifdef GR_PIPELINE_STATE_CACHE_STATS 111 , fTotalRequests(0) 112 , fCacheMisses(0) 113#endif 114{} 115 116GrMtlResourceProvider::PipelineStateCache::~PipelineStateCache() { 117 SkASSERT(0 == fMap.count()); 118 // dump stats 119#ifdef GR_PIPELINE_STATE_CACHE_STATS 120 if (c_DisplayMtlPipelineCache) { 121 SkDebugf("--- Pipeline State Cache ---\n"); 122 SkDebugf("Total requests: %d\n", fTotalRequests); 123 SkDebugf("Cache misses: %d\n", fCacheMisses); 124 SkDebugf("Cache miss %%: %f\n", (fTotalRequests > 0) ? 125 100.f * fCacheMisses / fTotalRequests : 126 0.f); 127 SkDebugf("---------------------\n"); 128 } 129#endif 130} 131 132void GrMtlResourceProvider::PipelineStateCache::release() { 133 fMap.reset(); 134} 135 136GrMtlPipelineState* GrMtlResourceProvider::PipelineStateCache::refPipelineState( 137 GrRenderTarget* renderTarget, 138 const GrProgramInfo& programInfo, 139 GrPrimitiveType primType) { 140#ifdef GR_PIPELINE_STATE_CACHE_STATS 141 ++fTotalRequests; 142#endif 143 144 // TODO: unify GL, VK and Mtl 145 // Get GrMtlProgramDesc 146 GrMtlPipelineStateBuilder::Desc desc; 147 if (!GrMtlPipelineStateBuilder::Desc::Build(&desc, renderTarget, programInfo, primType, fGpu)) { 148 GrCapsDebugf(fGpu->caps(), "Failed to build mtl program descriptor!\n"); 149 return nullptr; 150 } 151 152 std::unique_ptr<Entry>* entry = fMap.find(desc); 153 if (!entry) { 154#ifdef GR_PIPELINE_STATE_CACHE_STATS 155 ++fCacheMisses; 156#endif 157 GrMtlPipelineState* pipelineState(GrMtlPipelineStateBuilder::CreatePipelineState( 158 fGpu, renderTarget, programInfo, &desc)); 159 if (!pipelineState) { 160 return nullptr; 161 } 162 entry = fMap.insert(desc, std::unique_ptr<Entry>(new Entry(fGpu, pipelineState))); 163 return (*entry)->fPipelineState.get(); 164 } 165 return (*entry)->fPipelineState.get(); 166} 167 168//////////////////////////////////////////////////////////////////////////////////////////////// 169 170static id<MTLBuffer> alloc_dynamic_buffer(id<MTLDevice> device, size_t size) { 171 NSUInteger options = 0; 172 if (@available(macOS 10.11, iOS 9.0, *)) { 173#ifdef SK_BUILD_FOR_MAC 174 options |= MTLResourceStorageModeManaged; 175#else 176 options |= MTLResourceStorageModeShared; 177#endif 178 } 179 return [device newBufferWithLength: size 180 options: options]; 181 182} 183 184// The idea here is that we create a ring buffer which is used for all dynamic allocations 185// below a certain size. When a dynamic GrMtlBuffer is mapped, it grabs a portion of this 186// buffer and uses it. On a subsequent map it will grab a different portion of the buffer. 187// This prevents the buffer from overwriting itself before it's submitted to the command 188// stream. 189 190GrMtlResourceProvider::BufferSuballocator::BufferSuballocator(id<MTLDevice> device, size_t size) 191 : fBuffer(alloc_dynamic_buffer(device, size)) 192 , fTotalSize(size) 193 , fHead(0) 194 , fTail(0) { 195 // We increment fHead and fTail without bound and let overflow handle any wrapping. 196 // Because of this, size needs to be a power of two. 197 SkASSERT(SkIsPow2(size)); 198} 199 200id<MTLBuffer> GrMtlResourceProvider::BufferSuballocator::getAllocation(size_t size, 201 size_t* offset) { 202 // capture current state locally (because fTail could be overwritten by the completion handler) 203 size_t head, tail; 204 SkAutoSpinlock lock(fMutex); 205 head = fHead; 206 tail = fTail; 207 208 // The head and tail indices increment without bound, wrapping with overflow, 209 // so we need to mod them down to the actual bounds of the allocation to determine 210 // which blocks are available. 211 size_t modHead = head & (fTotalSize - 1); 212 size_t modTail = tail & (fTotalSize - 1); 213 214 bool full = (head != tail && modHead == modTail); 215 216 217 // We don't want large allocations to eat up this buffer, so we allocate them separately. 218 if (full || size > fTotalSize/2) { 219 return nil; 220 } 221 222 // case 1: free space lies at the beginning and/or the end of the buffer 223 if (modHead >= modTail) { 224 // check for room at the end 225 if (fTotalSize - modHead < size) { 226 // no room at the end, check the beginning 227 if (modTail < size) { 228 // no room at the beginning 229 return nil; 230 } 231 // we are going to allocate from the beginning, adjust head to '0' position 232 head += fTotalSize - modHead; 233 modHead = 0; 234 } 235 // case 2: free space lies in the middle of the buffer, check for room there 236 } else if (modTail - modHead < size) { 237 // no room in the middle 238 return nil; 239 } 240 241 *offset = modHead; 242 // We're not sure what the usage of the next allocation will be -- 243 // to be safe we'll use 16 byte alignment. 244 fHead = GrSizeAlignUp(head + size, 16); 245 return fBuffer; 246} 247 248void GrMtlResourceProvider::BufferSuballocator::addCompletionHandler( 249 GrMtlCommandBuffer* cmdBuffer) { 250 this->ref(); 251 SkAutoSpinlock lock(fMutex); 252 size_t newTail = fHead; 253 cmdBuffer->addCompletedHandler(^(id <MTLCommandBuffer>commandBuffer) { 254 // Make sure SkAutoSpinlock goes out of scope before 255 // the BufferSuballocator is potentially deleted. 256 { 257 SkAutoSpinlock lock(fMutex); 258 fTail = newTail; 259 } 260 this->unref(); 261 }); 262} 263 264id<MTLBuffer> GrMtlResourceProvider::getDynamicBuffer(size_t size, size_t* offset) { 265 id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset); 266 if (buffer) { 267 return buffer; 268 } 269 270 // Try to grow allocation (old allocation will age out). 271 // We grow up to a maximum size, and only grow if the requested allocation will 272 // fit into half of the new buffer (to prevent very large transient buffers forcing 273 // growth when they'll never fit anyway). 274 if (fBufferSuballocator->size() < fBufferSuballocatorMaxSize && 275 size <= fBufferSuballocator->size()) { 276 fBufferSuballocator.reset(new BufferSuballocator(fGpu->device(), 277 2*fBufferSuballocator->size())); 278 id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset); 279 if (buffer) { 280 return buffer; 281 } 282 } 283 284 *offset = 0; 285 return alloc_dynamic_buffer(fGpu->device(), size); 286} 287 288void GrMtlResourceProvider::addBufferCompletionHandler(GrMtlCommandBuffer* cmdBuffer) { 289 fBufferSuballocator->addCompletionHandler(cmdBuffer); 290} 291