1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "SpirvShader.hpp"
16 
17 #include "System/Types.hpp"
18 
19 #include "Vulkan/VkDescriptorSetLayout.hpp"
20 #include "Vulkan/VkPipelineLayout.hpp"
21 
22 #include <spirv/unified1/spirv.hpp>
23 
24 namespace {
25 
SpirvFormatToVulkanFormat(spv::ImageFormat format)26 VkFormat SpirvFormatToVulkanFormat(spv::ImageFormat format)
27 {
28 	switch(format)
29 	{
30 		case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
31 		case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
32 		case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
33 		case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
34 		case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
35 		case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
36 		case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
37 		case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
38 		case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
39 		case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
40 		case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
41 		case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
42 		case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
43 		case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
44 		case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
45 		case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
46 		case spv::ImageFormatRg16f: return VK_FORMAT_R16G16_SFLOAT;
47 		case spv::ImageFormatRg16i: return VK_FORMAT_R16G16_SINT;
48 		case spv::ImageFormatRg16ui: return VK_FORMAT_R16G16_UINT;
49 
50 		default:
51 			UNSUPPORTED("SPIR-V ImageFormat %u", format);
52 			return VK_FORMAT_UNDEFINED;
53 	}
54 }
55 
sRGBtoLinear(sw::SIMD::Float c)56 sw::SIMD::Float sRGBtoLinear(sw::SIMD::Float c)
57 {
58 	sw::SIMD::Float lc = c * sw::SIMD::Float(1.0f / 12.92f);
59 	sw::SIMD::Float ec = sw::power((c + sw::SIMD::Float(0.055f)) * sw::SIMD::Float(1.0f / 1.055f), sw::SIMD::Float(2.4f));
60 
61 	sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float(0.04045f));
62 
63 	return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec)));  // TODO: IfThenElse()
64 }
65 
66 }  // anonymous namespace
67 
68 namespace sw {
69 
EmitImageSampleImplicitLod(Variant variant,InsnIterator insn,EmitState * state) const70 SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
71 {
72 	return EmitImageSample({ variant, Implicit }, insn, state);
73 }
74 
EmitImageGather(Variant variant,InsnIterator insn,EmitState * state) const75 SpirvShader::EmitResult SpirvShader::EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const
76 {
77 	ImageInstruction instruction = { variant, Gather };
78 	instruction.gatherComponent = !instruction.isDref() ? getObject(insn.word(5)).constantValue[0] : 0;
79 
80 	return EmitImageSample(instruction, insn, state);
81 }
82 
EmitImageSampleExplicitLod(Variant variant,InsnIterator insn,EmitState * state) const83 SpirvShader::EmitResult SpirvShader::EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
84 {
85 	auto isDref = (variant == Dref) || (variant == ProjDref);
86 	uint32_t imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(isDref ? 6 : 5));
87 	imageOperands &= ~spv::ImageOperandsConstOffsetMask;  // Dealt with later.
88 
89 	if((imageOperands & spv::ImageOperandsLodMask) == imageOperands)
90 	{
91 		return EmitImageSample({ variant, Lod }, insn, state);
92 	}
93 	else if((imageOperands & spv::ImageOperandsGradMask) == imageOperands)
94 	{
95 		return EmitImageSample({ variant, Grad }, insn, state);
96 	}
97 	else
98 		UNSUPPORTED("Image Operands %x", imageOperands);
99 
100 	return EmitResult::Continue;
101 }
102 
EmitImageFetch(InsnIterator insn,EmitState * state) const103 SpirvShader::EmitResult SpirvShader::EmitImageFetch(InsnIterator insn, EmitState *state) const
104 {
105 	return EmitImageSample({ None, Fetch }, insn, state);
106 }
107 
EmitImageSample(ImageInstruction instruction,InsnIterator insn,EmitState * state) const108 SpirvShader::EmitResult SpirvShader::EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const
109 {
110 	Type::ID resultTypeId = insn.word(1);
111 	Object::ID resultId = insn.word(2);
112 	Object::ID sampledImageId = insn.word(3);  // For OpImageFetch this is just an Image, not a SampledImage.
113 	Object::ID coordinateId = insn.word(4);
114 	auto &resultType = getType(resultTypeId);
115 
116 	auto &result = state->createIntermediate(resultId, resultType.sizeInComponents);
117 	auto imageDescriptor = state->getPointer(sampledImageId).base;  // vk::SampledImageDescriptor*
118 
119 	// If using a separate sampler, look through the OpSampledImage instruction to find the sampler descriptor
120 	auto &sampledImage = getObject(sampledImageId);
121 	auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ? state->getPointer(sampledImage.definition.word(4)).base : imageDescriptor;
122 
123 	auto coordinate = GenericValue(this, state, coordinateId);
124 	auto &coordinateType = getType(coordinate.type);
125 
126 	Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler);  // vk::Sampler*
127 	Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture);    // sw::Texture*
128 
129 	// Above we assumed that if the SampledImage operand is not the result of an OpSampledImage,
130 	// it must be a combined image sampler loaded straight from the descriptor set. For OpImageFetch
131 	// it's just an Image operand, so there's no sampler descriptor data.
132 	if(getType(sampledImage.type).opcode() != spv::OpTypeSampledImage)
133 	{
134 		sampler = Pointer<Byte>(nullptr);
135 	}
136 
137 	uint32_t imageOperands = spv::ImageOperandsMaskNone;
138 	bool lodOrBias = false;
139 	Object::ID lodOrBiasId = 0;
140 	bool grad = false;
141 	Object::ID gradDxId = 0;
142 	Object::ID gradDyId = 0;
143 	bool constOffset = false;
144 	Object::ID offsetId = 0;
145 	bool sample = false;
146 	Object::ID sampleId = 0;
147 
148 	uint32_t operand = (instruction.isDref() || instruction.samplerMethod == Gather) ? 6 : 5;
149 
150 	if(insn.wordCount() > operand)
151 	{
152 		imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(operand++));
153 
154 		if(imageOperands & spv::ImageOperandsBiasMask)
155 		{
156 			lodOrBias = true;
157 			lodOrBiasId = insn.word(operand);
158 			operand++;
159 			imageOperands &= ~spv::ImageOperandsBiasMask;
160 
161 			ASSERT(instruction.samplerMethod == Implicit);
162 			instruction.samplerMethod = Bias;
163 		}
164 
165 		if(imageOperands & spv::ImageOperandsLodMask)
166 		{
167 			lodOrBias = true;
168 			lodOrBiasId = insn.word(operand);
169 			operand++;
170 			imageOperands &= ~spv::ImageOperandsLodMask;
171 		}
172 
173 		if(imageOperands & spv::ImageOperandsGradMask)
174 		{
175 			ASSERT(!lodOrBias);  // SPIR-V 1.3: "It is invalid to set both the Lod and Grad bits." Bias is for ImplicitLod, Grad for ExplicitLod.
176 			grad = true;
177 			gradDxId = insn.word(operand + 0);
178 			gradDyId = insn.word(operand + 1);
179 			operand += 2;
180 			imageOperands &= ~spv::ImageOperandsGradMask;
181 		}
182 
183 		if(imageOperands & spv::ImageOperandsConstOffsetMask)
184 		{
185 			constOffset = true;
186 			offsetId = insn.word(operand);
187 			operand++;
188 			imageOperands &= ~spv::ImageOperandsConstOffsetMask;
189 		}
190 
191 		if(imageOperands & spv::ImageOperandsSampleMask)
192 		{
193 			sample = true;
194 			sampleId = insn.word(operand);
195 			imageOperands &= ~spv::ImageOperandsSampleMask;
196 
197 			ASSERT(instruction.samplerMethod == Fetch);
198 			instruction.sample = true;
199 		}
200 
201 		if(imageOperands != 0)
202 		{
203 			UNSUPPORTED("Image operand %x", imageOperands);
204 		}
205 	}
206 
207 	Array<SIMD::Float> in(16);  // Maximum 16 input parameter components.
208 
209 	uint32_t coordinates = coordinateType.sizeInComponents - instruction.isProj();
210 	instruction.coordinates = coordinates;
211 
212 	uint32_t i = 0;
213 	for(; i < coordinates; i++)
214 	{
215 		if(instruction.isProj())
216 		{
217 			in[i] = coordinate.Float(i) / coordinate.Float(coordinates);  // TODO(b/129523279): Optimize using reciprocal.
218 		}
219 		else
220 		{
221 			in[i] = coordinate.Float(i);
222 		}
223 	}
224 
225 	if(instruction.isDref())
226 	{
227 		auto drefValue = GenericValue(this, state, insn.word(5));
228 
229 		if(instruction.isProj())
230 		{
231 			in[i] = drefValue.Float(0) / coordinate.Float(coordinates);  // TODO(b/129523279): Optimize using reciprocal.
232 		}
233 		else
234 		{
235 			in[i] = drefValue.Float(0);
236 		}
237 
238 		i++;
239 	}
240 
241 	if(lodOrBias)
242 	{
243 		auto lodValue = GenericValue(this, state, lodOrBiasId);
244 		in[i] = lodValue.Float(0);
245 		i++;
246 	}
247 	else if(grad)
248 	{
249 		auto dxValue = GenericValue(this, state, gradDxId);
250 		auto dyValue = GenericValue(this, state, gradDyId);
251 		auto &dxyType = getType(dxValue.type);
252 		ASSERT(dxyType.sizeInComponents == getType(dyValue.type).sizeInComponents);
253 
254 		instruction.grad = dxyType.sizeInComponents;
255 
256 		for(uint32_t j = 0; j < dxyType.sizeInComponents; j++, i++)
257 		{
258 			in[i] = dxValue.Float(j);
259 		}
260 
261 		for(uint32_t j = 0; j < dxyType.sizeInComponents; j++, i++)
262 		{
263 			in[i] = dyValue.Float(j);
264 		}
265 	}
266 	else if(instruction.samplerMethod == Fetch)
267 	{
268 		// The instruction didn't provide a lod operand, but the sampler's Fetch
269 		// function requires one to be present. If no lod is supplied, the default
270 		// is zero.
271 		in[i] = As<SIMD::Float>(SIMD::Int(0));
272 		i++;
273 	}
274 
275 	if(constOffset)
276 	{
277 		auto offsetValue = GenericValue(this, state, offsetId);
278 		auto &offsetType = getType(offsetValue.type);
279 
280 		instruction.offset = offsetType.sizeInComponents;
281 
282 		for(uint32_t j = 0; j < offsetType.sizeInComponents; j++, i++)
283 		{
284 			in[i] = As<SIMD::Float>(offsetValue.Int(j));  // Integer values, but transfered as float.
285 		}
286 	}
287 
288 	if(sample)
289 	{
290 		auto sampleValue = GenericValue(this, state, sampleId);
291 		in[i] = As<SIMD::Float>(sampleValue.Int(0));
292 	}
293 
294 	auto cacheIt = state->routine->samplerCache.find(resultId);
295 	ASSERT(cacheIt != state->routine->samplerCache.end());
296 	auto &cache = cacheIt->second;
297 	auto cacheHit = cache.imageDescriptor == imageDescriptor && cache.sampler == sampler;
298 
299 	If(!cacheHit)
300 	{
301 		cache.function = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler);
302 		cache.imageDescriptor = imageDescriptor;
303 		cache.sampler = sampler;
304 	}
305 
306 	Array<SIMD::Float> out(4);
307 	Call<ImageSampler>(cache.function, texture, &in[0], &out[0], state->routine->constants);
308 
309 	for(auto i = 0u; i < resultType.sizeInComponents; i++) { result.move(i, out[i]); }
310 
311 	return EmitResult::Continue;
312 }
313 
EmitImageQuerySizeLod(InsnIterator insn,EmitState * state) const314 SpirvShader::EmitResult SpirvShader::EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const
315 {
316 	auto &resultTy = getType(Type::ID(insn.word(1)));
317 	auto resultId = Object::ID(insn.word(2));
318 	auto imageId = Object::ID(insn.word(3));
319 	auto lodId = Object::ID(insn.word(4));
320 
321 	auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
322 	GetImageDimensions(state, resultTy, imageId, lodId, dst);
323 
324 	return EmitResult::Continue;
325 }
326 
EmitImageQuerySize(InsnIterator insn,EmitState * state) const327 SpirvShader::EmitResult SpirvShader::EmitImageQuerySize(InsnIterator insn, EmitState *state) const
328 {
329 	auto &resultTy = getType(Type::ID(insn.word(1)));
330 	auto resultId = Object::ID(insn.word(2));
331 	auto imageId = Object::ID(insn.word(3));
332 	auto lodId = Object::ID(0);
333 
334 	auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
335 	GetImageDimensions(state, resultTy, imageId, lodId, dst);
336 
337 	return EmitResult::Continue;
338 }
339 
EmitImageQueryLod(InsnIterator insn,EmitState * state) const340 SpirvShader::EmitResult SpirvShader::EmitImageQueryLod(InsnIterator insn, EmitState *state) const
341 {
342 	return EmitImageSample({ None, Query }, insn, state);
343 }
344 
GetImageDimensions(EmitState const * state,Type const & resultTy,Object::ID imageId,Object::ID lodId,Intermediate & dst) const345 void SpirvShader::GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
346 {
347 	auto routine = state->routine;
348 	auto &image = getObject(imageId);
349 	auto &imageType = getType(image.type);
350 
351 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
352 	bool isArrayed = imageType.definition.word(5) != 0;
353 	bool isCubeMap = imageType.definition.word(3) == spv::DimCube;
354 
355 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
356 	auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
357 	auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
358 
359 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
360 
361 	Pointer<Int> extent;
362 	Int arrayLayers;
363 
364 	switch(bindingLayout.descriptorType)
365 	{
366 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
367 		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
368 		{
369 			extent = descriptor + OFFSET(vk::StorageImageDescriptor, extent);                           // int[3]*
370 			arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers));  // uint32_t
371 			break;
372 		}
373 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
374 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
375 		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
376 		{
377 			extent = descriptor + OFFSET(vk::SampledImageDescriptor, extent);                           // int[3]*
378 			arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, arrayLayers));  // uint32_t
379 			break;
380 		}
381 		default:
382 			UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
383 	}
384 
385 	auto dimensions = resultTy.sizeInComponents - (isArrayed ? 1 : 0);
386 	std::vector<Int> out;
387 	if(lodId != 0)
388 	{
389 		auto lodVal = GenericValue(this, state, lodId);
390 		ASSERT(getType(lodVal.type).sizeInComponents == 1);
391 		auto lod = lodVal.Int(0);
392 		auto one = SIMD::Int(1);
393 		for(uint32_t i = 0; i < dimensions; i++)
394 		{
395 			dst.move(i, Max(SIMD::Int(extent[i]) >> lod, one));
396 		}
397 	}
398 	else
399 	{
400 		for(uint32_t i = 0; i < dimensions; i++)
401 		{
402 			dst.move(i, SIMD::Int(extent[i]));
403 		}
404 	}
405 
406 	if(isArrayed)
407 	{
408 		auto numElements = isCubeMap ? (arrayLayers / 6) : RValue<Int>(arrayLayers);
409 		dst.move(dimensions, SIMD::Int(numElements));
410 	}
411 }
412 
EmitImageQueryLevels(InsnIterator insn,EmitState * state) const413 SpirvShader::EmitResult SpirvShader::EmitImageQueryLevels(InsnIterator insn, EmitState *state) const
414 {
415 	auto &resultTy = getType(Type::ID(insn.word(1)));
416 	ASSERT(resultTy.sizeInComponents == 1);
417 	auto resultId = Object::ID(insn.word(2));
418 	auto imageId = Object::ID(insn.word(3));
419 
420 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
421 	auto setLayout = state->routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
422 	auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
423 
424 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
425 	Int mipLevels = 0;
426 	switch(bindingLayout.descriptorType)
427 	{
428 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
429 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
430 		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
431 			mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels));  // uint32_t
432 			break;
433 		default:
434 			UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
435 	}
436 
437 	auto &dst = state->createIntermediate(resultId, 1);
438 	dst.move(0, SIMD::Int(mipLevels));
439 
440 	return EmitResult::Continue;
441 }
442 
EmitImageQuerySamples(InsnIterator insn,EmitState * state) const443 SpirvShader::EmitResult SpirvShader::EmitImageQuerySamples(InsnIterator insn, EmitState *state) const
444 {
445 	auto &resultTy = getType(Type::ID(insn.word(1)));
446 	ASSERT(resultTy.sizeInComponents == 1);
447 	auto resultId = Object::ID(insn.word(2));
448 	auto imageId = Object::ID(insn.word(3));
449 	auto imageTy = getType(getObject(imageId).type);
450 	ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
451 	ASSERT(imageTy.definition.word(3) == spv::Dim2D);
452 	ASSERT(imageTy.definition.word(6 /* MS */) == 1);
453 
454 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
455 	auto setLayout = state->routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
456 	auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
457 
458 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
459 	Int sampleCount = 0;
460 	switch(bindingLayout.descriptorType)
461 	{
462 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
463 			sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));  // uint32_t
464 			break;
465 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
466 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
467 		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
468 			sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount));  // uint32_t
469 			break;
470 		default:
471 			UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
472 	}
473 
474 	auto &dst = state->createIntermediate(resultId, 1);
475 	dst.move(0, SIMD::Int(sampleCount));
476 
477 	return EmitResult::Continue;
478 }
479 
GetTexelAddress(EmitState const * state,SIMD::Pointer ptr,GenericValue const & coordinate,Type const & imageType,Pointer<Byte> descriptor,int texelSize,Object::ID sampleId,bool useStencilAspect) const480 SIMD::Pointer SpirvShader::GetTexelAddress(EmitState const *state, SIMD::Pointer ptr, GenericValue const &coordinate, Type const &imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const
481 {
482 	auto routine = state->routine;
483 	bool isArrayed = imageType.definition.word(5) != 0;
484 	auto dim = static_cast<spv::Dim>(imageType.definition.word(3));
485 	int dims = getType(coordinate.type).sizeInComponents - (isArrayed ? 1 : 0);
486 
487 	SIMD::Int u = coordinate.Int(0);
488 	SIMD::Int v = SIMD::Int(0);
489 
490 	if(getType(coordinate.type).sizeInComponents > 1)
491 	{
492 		v = coordinate.Int(1);
493 	}
494 
495 	if(dim == spv::DimSubpassData)
496 	{
497 		u += routine->windowSpacePosition[0];
498 		v += routine->windowSpacePosition[1];
499 	}
500 
501 	auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
502 	                                                          ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
503 	                                                          : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
504 	auto slicePitch = SIMD::Int(
505 	    *Pointer<Int>(descriptor + (useStencilAspect
506 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
507 	                                    : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
508 	auto samplePitch = SIMD::Int(
509 	    *Pointer<Int>(descriptor + (useStencilAspect
510 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
511 	                                    : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
512 
513 	ptr += u * SIMD::Int(texelSize);
514 	if(dims > 1)
515 	{
516 		ptr += v * rowPitch;
517 	}
518 	if(dims > 2)
519 	{
520 		ptr += coordinate.Int(2) * slicePitch;
521 	}
522 	if(isArrayed)
523 	{
524 		ptr += coordinate.Int(dims) * slicePitch;
525 	}
526 
527 	if(dim == spv::DimSubpassData)
528 	{
529 		// Multiview input attachment access is to the layer corresponding to the current view
530 		ptr += SIMD::Int(routine->viewID) * slicePitch;
531 	}
532 
533 	if(sampleId.value())
534 	{
535 		GenericValue sample(this, state, sampleId);
536 		ptr += sample.Int(0) * samplePitch;
537 	}
538 
539 	return ptr;
540 }
541 
EmitImageRead(InsnIterator insn,EmitState * state) const542 SpirvShader::EmitResult SpirvShader::EmitImageRead(InsnIterator insn, EmitState *state) const
543 {
544 	auto &resultType = getType(Type::ID(insn.word(1)));
545 	auto imageId = Object::ID(insn.word(3));
546 	auto &image = getObject(imageId);
547 	auto &imageType = getType(image.type);
548 	Object::ID resultId = insn.word(2);
549 
550 	Object::ID sampleId = 0;
551 
552 	if(insn.wordCount() > 5)
553 	{
554 		int operand = 6;
555 		auto imageOperands = insn.word(5);
556 		if(imageOperands & spv::ImageOperandsSampleMask)
557 		{
558 			sampleId = insn.word(operand++);
559 			imageOperands &= ~spv::ImageOperandsSampleMask;
560 		}
561 
562 		// Should be no remaining image operands.
563 		ASSERT(!imageOperands);
564 	}
565 
566 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
567 	auto dim = static_cast<spv::Dim>(imageType.definition.word(3));
568 
569 	auto coordinate = GenericValue(this, state, insn.word(4));
570 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
571 
572 	// For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
573 	// the renderpass data instead. In all other cases, we can use the format in the instruction.
574 	auto vkFormat = (dim == spv::DimSubpassData)
575 	                    ? inputAttachmentFormats[d.InputAttachmentIndex]
576 	                    : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(8)));
577 
578 	// Depth+Stencil image attachments select aspect based on the Sampled Type of the
579 	// OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
580 	auto useStencilAspect = (vkFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
581 	                         getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
582 
583 	if(useStencilAspect)
584 	{
585 		vkFormat = VK_FORMAT_S8_UINT;
586 	}
587 
588 	auto pointer = state->getPointer(imageId);
589 	Pointer<Byte> binding = pointer.base;
590 	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + (useStencilAspect
591 	                                                                 ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
592 	                                                                 : OFFSET(vk::StorageImageDescriptor, ptr)));
593 
594 	auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
595 
596 	auto &dst = state->createIntermediate(resultId, resultType.sizeInComponents);
597 
598 	auto texelSize = vk::Format(vkFormat).bytes();
599 	auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
600 	auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect);
601 
602 	// "The value returned by a read of an invalid texel is undefined,
603 	//  unless that read operation is from a buffer resource and the robustBufferAccess feature is enabled."
604 	// TODO: Don't always assume a buffer resource.
605 	auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
606 
607 	SIMD::Int packed[4];
608 	// Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch
609 	// of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits.
610 	// TODO: specialize for small formats?
611 	for(auto i = 0; i < (texelSize + 3) / 4; i++)
612 	{
613 		packed[i] = texelPtr.Load<SIMD::Int>(robustness, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
614 		texelPtr += sizeof(float);
615 	}
616 
617 	// Format support requirements here come from two sources:
618 	// - Minimum required set of formats for loads from storage images
619 	// - Any format supported as a color or depth/stencil attachment, for input attachments
620 	switch(vkFormat)
621 	{
622 		case VK_FORMAT_R32G32B32A32_SFLOAT:
623 		case VK_FORMAT_R32G32B32A32_SINT:
624 		case VK_FORMAT_R32G32B32A32_UINT:
625 			dst.move(0, packed[0]);
626 			dst.move(1, packed[1]);
627 			dst.move(2, packed[2]);
628 			dst.move(3, packed[3]);
629 			break;
630 		case VK_FORMAT_R32_SINT:
631 		case VK_FORMAT_R32_UINT:
632 			dst.move(0, packed[0]);
633 			// Fill remaining channels with 0,0,1 (of the correct type)
634 			dst.move(1, SIMD::Int(0));
635 			dst.move(2, SIMD::Int(0));
636 			dst.move(3, SIMD::Int(1));
637 			break;
638 		case VK_FORMAT_R32_SFLOAT:
639 		case VK_FORMAT_D32_SFLOAT:
640 		case VK_FORMAT_D32_SFLOAT_S8_UINT:
641 			dst.move(0, packed[0]);
642 			// Fill remaining channels with 0,0,1 (of the correct type)
643 			dst.move(1, SIMD::Float(0));
644 			dst.move(2, SIMD::Float(0));
645 			dst.move(3, SIMD::Float(1));
646 			break;
647 		case VK_FORMAT_D16_UNORM:
648 			dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xffff)) * SIMD::Float(1.0f / 65535.0f));
649 			dst.move(1, SIMD::Float(0));
650 			dst.move(2, SIMD::Float(0));
651 			dst.move(3, SIMD::Float(1));
652 			break;
653 		case VK_FORMAT_R16G16B16A16_SINT:
654 			dst.move(0, (packed[0] << 16) >> 16);
655 			dst.move(1, (packed[0]) >> 16);
656 			dst.move(2, (packed[1] << 16) >> 16);
657 			dst.move(3, (packed[1]) >> 16);
658 			break;
659 		case VK_FORMAT_R16G16B16A16_UINT:
660 			dst.move(0, packed[0] & SIMD::Int(0xffff));
661 			dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
662 			dst.move(2, packed[1] & SIMD::Int(0xffff));
663 			dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff));
664 			break;
665 		case VK_FORMAT_R16G16B16A16_SFLOAT:
666 			dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
667 			dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
668 			dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
669 			dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
670 			break;
671 		case VK_FORMAT_R8G8B8A8_SNORM:
672 			dst.move(0, Min(Max(SIMD::Float(((packed[0] << 24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
673 			dst.move(1, Min(Max(SIMD::Float(((packed[0] << 16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
674 			dst.move(2, Min(Max(SIMD::Float(((packed[0] << 8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
675 			dst.move(3, Min(Max(SIMD::Float(((packed[0]) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
676 			break;
677 		case VK_FORMAT_R8G8B8A8_UNORM:
678 		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
679 			dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
680 			dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
681 			dst.move(2, SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
682 			dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
683 			break;
684 		case VK_FORMAT_R8G8B8A8_SRGB:
685 		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
686 			dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
687 			dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
688 			dst.move(2, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
689 			dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
690 			break;
691 		case VK_FORMAT_B8G8R8A8_UNORM:
692 			dst.move(0, SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
693 			dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
694 			dst.move(2, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
695 			dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
696 			break;
697 		case VK_FORMAT_B8G8R8A8_SRGB:
698 			dst.move(0, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
699 			dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
700 			dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
701 			dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
702 			break;
703 		case VK_FORMAT_R8G8B8A8_UINT:
704 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
705 			dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
706 			dst.move(1, ((As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF)));
707 			dst.move(2, ((As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF)));
708 			dst.move(3, ((As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF)));
709 			break;
710 		case VK_FORMAT_R8G8B8A8_SINT:
711 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
712 			dst.move(0, (packed[0] << 24) >> 24);
713 			dst.move(1, (packed[0] << 16) >> 24);
714 			dst.move(2, (packed[0] << 8) >> 24);
715 			dst.move(3, (packed[0]) >> 24);
716 			break;
717 		case VK_FORMAT_R8_UNORM:
718 			dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
719 			dst.move(1, SIMD::Float(0));
720 			dst.move(2, SIMD::Float(0));
721 			dst.move(3, SIMD::Float(1));
722 			break;
723 		case VK_FORMAT_R8_UINT:
724 		case VK_FORMAT_S8_UINT:
725 			dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
726 			dst.move(1, SIMD::UInt(0));
727 			dst.move(2, SIMD::UInt(0));
728 			dst.move(3, SIMD::UInt(1));
729 			break;
730 		case VK_FORMAT_R8_SINT:
731 			dst.move(0, (packed[0] << 24) >> 24);
732 			dst.move(1, SIMD::Int(0));
733 			dst.move(2, SIMD::Int(0));
734 			dst.move(3, SIMD::Int(1));
735 			break;
736 		case VK_FORMAT_R8G8_UNORM:
737 			dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
738 			dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
739 			dst.move(2, SIMD::Float(0));
740 			dst.move(3, SIMD::Float(1));
741 			break;
742 		case VK_FORMAT_R8G8_UINT:
743 			dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
744 			dst.move(1, ((As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF)));
745 			dst.move(2, SIMD::UInt(0));
746 			dst.move(3, SIMD::UInt(1));
747 			break;
748 		case VK_FORMAT_R8G8_SINT:
749 			dst.move(0, (packed[0] << 24) >> 24);
750 			dst.move(1, (packed[0] << 16) >> 24);
751 			dst.move(2, SIMD::Int(0));
752 			dst.move(3, SIMD::Int(1));
753 			break;
754 		case VK_FORMAT_R16_SFLOAT:
755 			dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
756 			dst.move(1, SIMD::Float(0));
757 			dst.move(2, SIMD::Float(0));
758 			dst.move(3, SIMD::Float(1));
759 			break;
760 		case VK_FORMAT_R16_UINT:
761 			dst.move(0, packed[0] & SIMD::Int(0xffff));
762 			dst.move(1, SIMD::UInt(0));
763 			dst.move(2, SIMD::UInt(0));
764 			dst.move(3, SIMD::UInt(1));
765 			break;
766 		case VK_FORMAT_R16_SINT:
767 			dst.move(0, (packed[0] << 16) >> 16);
768 			dst.move(1, SIMD::Int(0));
769 			dst.move(2, SIMD::Int(0));
770 			dst.move(3, SIMD::Int(1));
771 			break;
772 		case VK_FORMAT_R16G16_SFLOAT:
773 			dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
774 			dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
775 			dst.move(2, SIMD::Float(0));
776 			dst.move(3, SIMD::Float(1));
777 			break;
778 		case VK_FORMAT_R16G16_UINT:
779 			dst.move(0, packed[0] & SIMD::Int(0xffff));
780 			dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
781 			dst.move(2, SIMD::UInt(0));
782 			dst.move(3, SIMD::UInt(1));
783 			break;
784 		case VK_FORMAT_R16G16_SINT:
785 			dst.move(0, (packed[0] << 16) >> 16);
786 			dst.move(1, (packed[0]) >> 16);
787 			dst.move(2, SIMD::Int(0));
788 			dst.move(3, SIMD::Int(1));
789 			break;
790 		case VK_FORMAT_R32G32_SINT:
791 		case VK_FORMAT_R32G32_UINT:
792 			dst.move(0, packed[0]);
793 			dst.move(1, packed[1]);
794 			dst.move(2, SIMD::Int(0));
795 			dst.move(3, SIMD::Int(1));
796 			break;
797 		case VK_FORMAT_R32G32_SFLOAT:
798 			dst.move(0, packed[0]);
799 			dst.move(1, packed[1]);
800 			dst.move(2, SIMD::Float(0));
801 			dst.move(3, SIMD::Float(1));
802 			break;
803 		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
804 			dst.move(0, (packed[0]) & SIMD::Int(0x3FF));
805 			dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
806 			dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
807 			dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
808 			break;
809 		case VK_FORMAT_A2R10G10B10_UINT_PACK32:
810 			dst.move(2, (packed[0]) & SIMD::Int(0x3FF));
811 			dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
812 			dst.move(0, (packed[0] >> 20) & SIMD::Int(0x3FF));
813 			dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
814 			break;
815 		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
816 			dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
817 			dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
818 			dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
819 			dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
820 			break;
821 		case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
822 			dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
823 			dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
824 			dst.move(0, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
825 			dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
826 			break;
827 		case VK_FORMAT_R5G6B5_UNORM_PACK16:
828 			dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
829 			dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
830 			dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
831 			dst.move(3, SIMD::Float(1));
832 			break;
833 		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
834 			dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
835 			dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
836 			dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
837 			dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
838 			break;
839 		default:
840 			UNSUPPORTED("VkFormat %d", int(vkFormat));
841 			break;
842 	}
843 
844 	return EmitResult::Continue;
845 }
846 
EmitImageWrite(InsnIterator insn,EmitState * state) const847 SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState *state) const
848 {
849 	auto imageId = Object::ID(insn.word(1));
850 	auto &image = getObject(imageId);
851 	auto &imageType = getType(image.type);
852 
853 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
854 
855 	// TODO(b/131171141): Not handling any image operands yet.
856 	ASSERT(insn.wordCount() == 4);
857 
858 	auto coordinate = GenericValue(this, state, insn.word(2));
859 	auto texel = GenericValue(this, state, insn.word(3));
860 
861 	Pointer<Byte> binding = state->getPointer(imageId).base;
862 	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
863 	auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
864 
865 	SIMD::Int packed[4];
866 	auto numPackedElements = 0u;
867 	int texelSize = 0;
868 	auto format = static_cast<spv::ImageFormat>(imageType.definition.word(8));
869 	switch(format)
870 	{
871 		case spv::ImageFormatRgba32f:
872 		case spv::ImageFormatRgba32i:
873 		case spv::ImageFormatRgba32ui:
874 			texelSize = 16;
875 			packed[0] = texel.Int(0);
876 			packed[1] = texel.Int(1);
877 			packed[2] = texel.Int(2);
878 			packed[3] = texel.Int(3);
879 			numPackedElements = 4;
880 			break;
881 		case spv::ImageFormatR32f:
882 		case spv::ImageFormatR32i:
883 		case spv::ImageFormatR32ui:
884 			texelSize = 4;
885 			packed[0] = texel.Int(0);
886 			numPackedElements = 1;
887 			break;
888 		case spv::ImageFormatRgba8:
889 			texelSize = 4;
890 			packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
891 			            ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
892 			            ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
893 			            ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
894 			numPackedElements = 1;
895 			break;
896 		case spv::ImageFormatRgba8Snorm:
897 			texelSize = 4;
898 			packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
899 			             SIMD::Int(0xFF)) |
900 			            ((SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
901 			              SIMD::Int(0xFF))
902 			             << 8) |
903 			            ((SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
904 			              SIMD::Int(0xFF))
905 			             << 16) |
906 			            ((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
907 			              SIMD::Int(0xFF))
908 			             << 24);
909 			numPackedElements = 1;
910 			break;
911 		case spv::ImageFormatRgba8i:
912 		case spv::ImageFormatRgba8ui:
913 			texelSize = 4;
914 			packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xff))) |
915 			            (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) |
916 			            (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) |
917 			            (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24);
918 			numPackedElements = 1;
919 			break;
920 		case spv::ImageFormatRgba16f:
921 			texelSize = 8;
922 			packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true);
923 			packed[1] = floatToHalfBits(texel.UInt(2), false) | floatToHalfBits(texel.UInt(3), true);
924 			numPackedElements = 2;
925 			break;
926 		case spv::ImageFormatRgba16i:
927 		case spv::ImageFormatRgba16ui:
928 			texelSize = 8;
929 			packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xffff)) << 16);
930 			packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xffff)) << 16);
931 			numPackedElements = 2;
932 			break;
933 		case spv::ImageFormatRg32f:
934 		case spv::ImageFormatRg32i:
935 		case spv::ImageFormatRg32ui:
936 			texelSize = 8;
937 			packed[0] = texel.Int(0);
938 			packed[1] = texel.Int(1);
939 			numPackedElements = 2;
940 			break;
941 		case spv::ImageFormatRg16f:
942 			texelSize = 4;
943 			packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true);
944 			numPackedElements = 1;
945 			break;
946 		case spv::ImageFormatRg16i:
947 		case spv::ImageFormatRg16ui:
948 			texelSize = 4;
949 			packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xffff)) << 16);
950 			numPackedElements = 1;
951 			break;
952 
953 		case spv::ImageFormatR11fG11fB10f:
954 		case spv::ImageFormatR16f:
955 		case spv::ImageFormatRgba16:
956 		case spv::ImageFormatRgb10A2:
957 		case spv::ImageFormatRg16:
958 		case spv::ImageFormatRg8:
959 		case spv::ImageFormatR16:
960 		case spv::ImageFormatR8:
961 		case spv::ImageFormatRgba16Snorm:
962 		case spv::ImageFormatRg16Snorm:
963 		case spv::ImageFormatRg8Snorm:
964 		case spv::ImageFormatR16Snorm:
965 		case spv::ImageFormatR8Snorm:
966 		case spv::ImageFormatRg8i:
967 		case spv::ImageFormatR16i:
968 		case spv::ImageFormatR8i:
969 		case spv::ImageFormatRgb10a2ui:
970 		case spv::ImageFormatRg8ui:
971 		case spv::ImageFormatR16ui:
972 		case spv::ImageFormatR8ui:
973 			UNSUPPORTED("spv::ImageFormat %d", int(format));
974 			break;
975 
976 		default:
977 			UNREACHABLE("spv::ImageFormat %d", int(format));
978 			break;
979 	}
980 
981 	auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
982 	auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, 0, false);
983 
984 	// SPIR-V 1.4: "If the coordinates are outside the image, the memory location that is accessed is undefined."
985 	auto robustness = OutOfBoundsBehavior::UndefinedValue;
986 
987 	for(auto i = 0u; i < numPackedElements; i++)
988 	{
989 		texelPtr.Store(packed[i], robustness, state->activeLaneMask());
990 		texelPtr += sizeof(float);
991 	}
992 
993 	return EmitResult::Continue;
994 }
995 
EmitImageTexelPointer(InsnIterator insn,EmitState * state) const996 SpirvShader::EmitResult SpirvShader::EmitImageTexelPointer(InsnIterator insn, EmitState *state) const
997 {
998 	auto &resultType = getType(Type::ID(insn.word(1)));
999 	auto imageId = Object::ID(insn.word(3));
1000 	auto &image = getObject(imageId);
1001 	// Note: OpImageTexelPointer is unusual in that the image is passed by pointer.
1002 	// Look through to get the actual image type.
1003 	auto &imageType = getType(getType(image.type).element);
1004 	Object::ID resultId = insn.word(2);
1005 
1006 	ASSERT(imageType.opcode() == spv::OpTypeImage);
1007 	ASSERT(resultType.storageClass == spv::StorageClassImage);
1008 	ASSERT(getType(resultType.element).opcode() == spv::OpTypeInt);
1009 
1010 	auto coordinate = GenericValue(this, state, insn.word(4));
1011 
1012 	Pointer<Byte> binding = state->getPointer(imageId).base;
1013 	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
1014 	auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
1015 
1016 	auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
1017 	auto ptr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, sizeof(uint32_t), 0, false);
1018 
1019 	state->createPointer(resultId, ptr);
1020 
1021 	return EmitResult::Continue;
1022 }
1023 
EmitSampledImageCombineOrSplit(InsnIterator insn,EmitState * state) const1024 SpirvShader::EmitResult SpirvShader::EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const
1025 {
1026 	// Propagate the image pointer in both cases.
1027 	// Consumers of OpSampledImage will look through to find the sampler pointer.
1028 
1029 	Object::ID resultId = insn.word(2);
1030 	Object::ID imageId = insn.word(3);
1031 
1032 	state->createPointer(resultId, state->getPointer(imageId));
1033 
1034 	return EmitResult::Continue;
1035 }
1036 
1037 }  // namespace sw