1 //
2 // Copyright 2015 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // VaryingPacking:
7 //   Class which describes a mapping from varyings to registers, according
8 //   to the spec, or using custom packing algorithms. We also keep a register
9 //   allocation list for the D3D renderer.
10 //
11 
12 #include "libANGLE/VaryingPacking.h"
13 
14 #include "common/utilities.h"
15 #include "libANGLE/Program.h"
16 #include "libANGLE/Shader.h"
17 
18 namespace gl
19 {
20 
21 namespace
22 {
23 
24 // true if varying x has a higher priority in packing than y
ComparePackedVarying(const PackedVarying & x,const PackedVarying & y)25 bool ComparePackedVarying(const PackedVarying &x, const PackedVarying &y)
26 {
27     // If the PackedVarying 'x' or 'y' to be compared is an array element, this clones an equivalent
28     // non-array shader variable 'vx' or 'vy' for actual comparison instead.
29     sh::ShaderVariable vx, vy;
30     const sh::ShaderVariable *px, *py;
31     if (x.isArrayElement())
32     {
33         vx           = *x.varying;
34         vx.arraySizes.clear();
35         px           = &vx;
36     }
37     else
38     {
39         px = x.varying;
40     }
41 
42     if (y.isArrayElement())
43     {
44         vy           = *y.varying;
45         vy.arraySizes.clear();
46         py           = &vy;
47     }
48     else
49     {
50         py = y.varying;
51     }
52 
53     return gl::CompareShaderVar(*px, *py);
54 }
55 
56 }  // anonymous namespace
57 
58 // Implementation of VaryingPacking
VaryingPacking(GLuint maxVaryingVectors,PackMode packMode)59 VaryingPacking::VaryingPacking(GLuint maxVaryingVectors, PackMode packMode)
60     : mRegisterMap(maxVaryingVectors), mPackMode(packMode)
61 {
62 }
63 
64 VaryingPacking::~VaryingPacking() = default;
65 
66 // Packs varyings into generic varying registers, using the algorithm from
67 // See [OpenGL ES Shading Language 1.00 rev. 17] appendix A section 7 page 111
68 // Also [OpenGL ES Shading Language 3.00 rev. 4] Section 11 page 119
69 // Returns false if unsuccessful.
packVarying(const PackedVarying & packedVarying)70 bool VaryingPacking::packVarying(const PackedVarying &packedVarying)
71 {
72     const auto &varying = *packedVarying.varying;
73 
74     // "Non - square matrices of type matCxR consume the same space as a square matrix of type matN
75     // where N is the greater of C and R."
76     // Here we are a bit more conservative and allow packing non-square matrices more tightly.
77     // Make sure we use transposed matrix types to count registers correctly.
78     ASSERT(!varying.isStruct());
79     GLenum transposedType       = gl::TransposeMatrixType(varying.type);
80     unsigned int varyingRows    = gl::VariableRowCount(transposedType);
81     unsigned int varyingColumns = gl::VariableColumnCount(transposedType);
82 
83     // "Variables of type mat2 occupies 2 complete rows."
84     // For non-WebGL contexts, we allow mat2 to occupy only two columns per row.
85     if (mPackMode == PackMode::WEBGL_STRICT && varying.type == GL_FLOAT_MAT2)
86     {
87         varyingColumns = 4;
88     }
89 
90     // "Arrays of size N are assumed to take N times the size of the base type"
91     // GLSL ES 3.10 section 4.3.6: Output variables cannot be arrays of arrays or arrays of
92     // structures, so we may use getBasicTypeElementCount().
93     const unsigned int elementCount = varying.getBasicTypeElementCount();
94     varyingRows *= (packedVarying.isArrayElement() ? 1 : elementCount);
95 
96     unsigned int maxVaryingVectors = static_cast<unsigned int>(mRegisterMap.size());
97 
98     // Fail if we are packing a single over-large varying.
99     if (varyingRows > maxVaryingVectors)
100     {
101         return false;
102     }
103 
104     // "For 2, 3 and 4 component variables packing is started using the 1st column of the 1st row.
105     // Variables are then allocated to successive rows, aligning them to the 1st column."
106     if (varyingColumns >= 2 && varyingColumns <= 4)
107     {
108         for (unsigned int row = 0; row <= maxVaryingVectors - varyingRows; ++row)
109         {
110             if (isFree(row, 0, varyingRows, varyingColumns))
111             {
112                 insert(row, 0, packedVarying);
113                 return true;
114             }
115         }
116 
117         // "For 2 component variables, when there are no spare rows, the strategy is switched to
118         // using the highest numbered row and the lowest numbered column where the variable will
119         // fit."
120         if (varyingColumns == 2)
121         {
122             for (unsigned int r = maxVaryingVectors - varyingRows + 1; r-- >= 1;)
123             {
124                 if (isFree(r, 2, varyingRows, 2))
125                 {
126                     insert(r, 2, packedVarying);
127                     return true;
128                 }
129             }
130         }
131 
132         return false;
133     }
134 
135     // "1 component variables have their own packing rule. They are packed in order of size, largest
136     // first. Each variable is placed in the column that leaves the least amount of space in the
137     // column and aligned to the lowest available rows within that column."
138     ASSERT(varyingColumns == 1);
139     unsigned int contiguousSpace[4]     = {0};
140     unsigned int bestContiguousSpace[4] = {0};
141     unsigned int totalSpace[4]          = {0};
142 
143     for (unsigned int row = 0; row < maxVaryingVectors; ++row)
144     {
145         for (unsigned int column = 0; column < 4; ++column)
146         {
147             if (mRegisterMap[row][column])
148             {
149                 contiguousSpace[column] = 0;
150             }
151             else
152             {
153                 contiguousSpace[column]++;
154                 totalSpace[column]++;
155 
156                 if (contiguousSpace[column] > bestContiguousSpace[column])
157                 {
158                     bestContiguousSpace[column] = contiguousSpace[column];
159                 }
160             }
161         }
162     }
163 
164     unsigned int bestColumn = 0;
165     for (unsigned int column = 1; column < 4; ++column)
166     {
167         if (bestContiguousSpace[column] >= varyingRows &&
168             (bestContiguousSpace[bestColumn] < varyingRows ||
169              totalSpace[column] < totalSpace[bestColumn]))
170         {
171             bestColumn = column;
172         }
173     }
174 
175     if (bestContiguousSpace[bestColumn] >= varyingRows)
176     {
177         for (unsigned int row = 0; row < maxVaryingVectors; row++)
178         {
179             if (isFree(row, bestColumn, varyingRows, 1))
180             {
181                 for (unsigned int arrayIndex = 0; arrayIndex < varyingRows; ++arrayIndex)
182                 {
183                     // If varyingRows > 1, it must be an array.
184                     PackedVaryingRegister registerInfo;
185                     registerInfo.packedVarying     = &packedVarying;
186                     registerInfo.registerRow       = row + arrayIndex;
187                     registerInfo.registerColumn    = bestColumn;
188                     registerInfo.varyingArrayIndex =
189                         (packedVarying.isArrayElement() ? packedVarying.arrayIndex : arrayIndex);
190                     registerInfo.varyingRowIndex   = 0;
191                     // Do not record register info for builtins.
192                     // TODO(jmadill): Clean this up.
193                     if (!packedVarying.varying->isBuiltIn())
194                     {
195                         mRegisterList.push_back(registerInfo);
196                     }
197                     mRegisterMap[row + arrayIndex][bestColumn] = true;
198                 }
199                 break;
200             }
201         }
202         return true;
203     }
204 
205     return false;
206 }
207 
isFree(unsigned int registerRow,unsigned int registerColumn,unsigned int varyingRows,unsigned int varyingColumns) const208 bool VaryingPacking::isFree(unsigned int registerRow,
209                             unsigned int registerColumn,
210                             unsigned int varyingRows,
211                             unsigned int varyingColumns) const
212 {
213     for (unsigned int row = 0; row < varyingRows; ++row)
214     {
215         ASSERT(registerRow + row < mRegisterMap.size());
216         for (unsigned int column = 0; column < varyingColumns; ++column)
217         {
218             ASSERT(registerColumn + column < 4);
219             if (mRegisterMap[registerRow + row][registerColumn + column])
220             {
221                 return false;
222             }
223         }
224     }
225 
226     return true;
227 }
228 
insert(unsigned int registerRow,unsigned int registerColumn,const PackedVarying & packedVarying)229 void VaryingPacking::insert(unsigned int registerRow,
230                             unsigned int registerColumn,
231                             const PackedVarying &packedVarying)
232 {
233     unsigned int varyingRows    = 0;
234     unsigned int varyingColumns = 0;
235 
236     const auto &varying = *packedVarying.varying;
237     ASSERT(!varying.isStruct());
238     GLenum transposedType = gl::TransposeMatrixType(varying.type);
239     varyingRows           = gl::VariableRowCount(transposedType);
240     varyingColumns        = gl::VariableColumnCount(transposedType);
241 
242     PackedVaryingRegister registerInfo;
243     registerInfo.packedVarying  = &packedVarying;
244     registerInfo.registerColumn = registerColumn;
245 
246     // GLSL ES 3.10 section 4.3.6: Output variables cannot be arrays of arrays or arrays of
247     // structures, so we may use getBasicTypeElementCount().
248     const unsigned int arrayElementCount = varying.getBasicTypeElementCount();
249     for (unsigned int arrayElement = 0; arrayElement < arrayElementCount; ++arrayElement)
250     {
251         if (packedVarying.isArrayElement() && arrayElement != packedVarying.arrayIndex)
252         {
253             continue;
254         }
255         for (unsigned int varyingRow = 0; varyingRow < varyingRows; ++varyingRow)
256         {
257             registerInfo.registerRow     = registerRow + (arrayElement * varyingRows) + varyingRow;
258             registerInfo.varyingRowIndex = varyingRow;
259             registerInfo.varyingArrayIndex = arrayElement;
260             // Do not record register info for builtins.
261             // TODO(jmadill): Clean this up.
262             if (!packedVarying.varying->isBuiltIn())
263             {
264                 mRegisterList.push_back(registerInfo);
265             }
266 
267             for (unsigned int columnIndex = 0; columnIndex < varyingColumns; ++columnIndex)
268             {
269                 mRegisterMap[registerInfo.registerRow][registerColumn + columnIndex] = true;
270             }
271         }
272     }
273 }
274 
collectAndPackUserVaryings(gl::InfoLog & infoLog,const Program::MergedVaryings & mergedVaryings,const std::vector<std::string> & tfVaryings)275 bool VaryingPacking::collectAndPackUserVaryings(gl::InfoLog &infoLog,
276                                                 const Program::MergedVaryings &mergedVaryings,
277                                                 const std::vector<std::string> &tfVaryings)
278 {
279     std::set<std::string> uniqueFullNames;
280     mPackedVaryings.clear();
281 
282     for (const auto &ref : mergedVaryings)
283     {
284         const sh::Varying *input  = ref.second.vertex;
285         const sh::Varying *output = ref.second.fragment;
286 
287         // Only pack statically used varyings that have a matched input or output, plus special
288         // builtins.
289         if (((input && output) || (output && output->isBuiltIn())) && output->staticUse)
290         {
291             // Will get the vertex shader interpolation by default.
292             auto interpolation = ref.second.get()->interpolation;
293 
294             // Note that we lose the vertex shader static use information here. The data for the
295             // variable is taken from the fragment shader.
296             if (output->isStruct())
297             {
298                 ASSERT(!output->isArray());
299                 for (const auto &field : output->fields)
300                 {
301                     ASSERT(!field.isStruct() && !field.isArray());
302                     mPackedVaryings.push_back(PackedVarying(field, interpolation, output->name));
303                     uniqueFullNames.insert(mPackedVaryings.back().nameWithArrayIndex());
304                 }
305             }
306             else
307             {
308                 mPackedVaryings.push_back(PackedVarying(*output, interpolation));
309                 uniqueFullNames.insert(mPackedVaryings.back().nameWithArrayIndex());
310             }
311             continue;
312         }
313 
314         // Keep Transform FB varyings in the merged list always.
315         if (!input)
316         {
317             continue;
318         }
319 
320         for (const std::string &tfVarying : tfVaryings)
321         {
322             std::vector<unsigned int> subscripts;
323             std::string baseName = ParseResourceName(tfVarying, &subscripts);
324             size_t subscript     = GL_INVALID_INDEX;
325             if (!subscripts.empty())
326             {
327                 subscript = subscripts.back();
328             }
329             // Already packed for fragment shader.
330             if (uniqueFullNames.count(tfVarying) > 0 || uniqueFullNames.count(baseName) > 0)
331             {
332                 continue;
333             }
334             // Array as a whole and array element conflict has already been checked in
335             // linkValidateTransformFeedback.
336             if (baseName == input->name)
337             {
338                 // Transform feedback for varying structs is underspecified.
339                 // See Khronos bug 9856.
340                 // TODO(jmadill): Figure out how to be spec-compliant here.
341                 if (!input->isStruct() && tfVarying.compare(0, 3, "gl_") != 0)
342                 {
343                     mPackedVaryings.push_back(PackedVarying(*input, input->interpolation));
344                     mPackedVaryings.back().vertexOnly = true;
345                     mPackedVaryings.back().arrayIndex = static_cast<GLuint>(subscript);
346                     uniqueFullNames.insert(tfVarying);
347                 }
348                 // Continue to match next array element for 'input' if the current match is array
349                 // element.
350                 if (subscript == GL_INVALID_INDEX)
351                 {
352                     break;
353                 }
354             }
355         }
356     }
357 
358     std::sort(mPackedVaryings.begin(), mPackedVaryings.end(), ComparePackedVarying);
359 
360     return packUserVaryings(infoLog, mPackedVaryings, tfVaryings);
361 }
362 
363 // See comment on packVarying.
packUserVaryings(gl::InfoLog & infoLog,const std::vector<PackedVarying> & packedVaryings,const std::vector<std::string> & transformFeedbackVaryings)364 bool VaryingPacking::packUserVaryings(gl::InfoLog &infoLog,
365                                       const std::vector<PackedVarying> &packedVaryings,
366                                       const std::vector<std::string> &transformFeedbackVaryings)
367 {
368 
369     // "Variables are packed into the registers one at a time so that they each occupy a contiguous
370     // subrectangle. No splitting of variables is permitted."
371     for (const PackedVarying &packedVarying : packedVaryings)
372     {
373         if (!packVarying(packedVarying))
374         {
375             infoLog << "Could not pack varying " << packedVarying.nameWithArrayIndex();
376             return false;
377         }
378     }
379 
380     // Sort the packed register list
381     std::sort(mRegisterList.begin(), mRegisterList.end());
382 
383     // Assign semantic indices
384     for (unsigned int semanticIndex = 0;
385          semanticIndex < static_cast<unsigned int>(mRegisterList.size()); ++semanticIndex)
386     {
387         mRegisterList[semanticIndex].semanticIndex = semanticIndex;
388     }
389 
390     return true;
391 }
392 
getRegisterCount() const393 unsigned int VaryingPacking::getRegisterCount() const
394 {
395     unsigned int count = 0;
396 
397     for (const Register &reg : mRegisterMap)
398     {
399         if (reg.data[0] || reg.data[1] || reg.data[2] || reg.data[3])
400         {
401             ++count;
402         }
403     }
404 
405     return count;
406 }
407 
408 }  // namespace rx
409