1 /*
2 * Copyright 2020 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/private/GrTypesPriv.h" // GrAlignTo
9 #include "src/core/SkUtils.h" // sk_unaligned_load
10 #include "src/sksl/SkSLByteCode.h"
11 #include "src/sksl/SkSLExternalValue.h"
12
13 #include <stack>
14
15 #ifndef SKSL_INTERPRETER
16 #define SKSL_INTERPRETER
17
18 namespace SkSL {
19
20 // GCC and Clang support the "labels as values" extension which we need to implement the interpreter
21 // using threaded code. Otherwise, we fall back to using a switch statement in a for loop.
22 #if defined(__GNUC__) || defined(__clang__)
23 #define SKSL_THREADED_CODE
24 #endif
25
26 #ifdef SKSL_THREADED_CODE
27 #define LABEL(name) name:
28 #ifdef TRACE
29 #define NEXT() \
30 { \
31 const uint8_t* trace_ip = ip; \
32 printf("%d: ", (int) (trace_ip - code)); \
33 disassemble(&trace_ip); \
34 } \
35 goto *labels[(int) read<ByteCode::Instruction>(&ip)]
36 #else
37 #define NEXT() goto *labels[(int) read<ByteCode::Instruction>(&ip)]
38 #endif
39 #else
40 #define LABEL(name) case ByteCode::Instruction::name:
41 #define NEXT() continue
42 #endif
43
44 // If you trip this assert, it means that the order of the opcodes listed in ByteCodeInstruction
45 // does not match the order of the opcodes listed in the 'labels' array in innerRun().
46 #define CHECK_LABEL(name) \
47 SkASSERT(labels[(int) ByteCode::Instruction::name] == &&name)
48
49 template<typename T>
read(const uint8_t ** ip)50 static T read(const uint8_t** ip) {
51 *ip += sizeof(T);
52 return sk_unaligned_load<T>(*ip - sizeof(T));
53 }
54
55 #define BINARY_OP(inst, src, result, op) \
56 LABEL(inst) { \
57 ByteCode::Register target = read<ByteCode::Register>(&ip); \
58 ByteCode::Register src1 = read<ByteCode::Register>(&ip); \
59 ByteCode::Register src2 = read<ByteCode::Register>(&ip); \
60 fRegisters[target.fIndex].result = fRegisters[src1.fIndex].src op \
61 fRegisters[src2.fIndex].src; \
62 NEXT(); \
63 }
64
65 #define MASKED_BINARY_OP(inst, src, result, op) \
66 LABEL(inst) { \
67 ByteCode::Register target = read<ByteCode::Register>(&ip); \
68 ByteCode::Register src1 = read<ByteCode::Register>(&ip); \
69 ByteCode::Register src2 = read<ByteCode::Register>(&ip); \
70 auto m = mask(); \
71 for (int i = 0; i < width; ++i) { \
72 if (m[i]) { \
73 fRegisters[target.fIndex].result[i] = fRegisters[src1.fIndex].src[i] op \
74 fRegisters[src2.fIndex].src[i]; \
75 } \
76 } \
77 NEXT(); \
78 }
79
80 #define MASKED_VECTOR_BINARY_OP(inst, src, result, op) \
81 LABEL(inst) { \
82 ByteCode::Register target = read<ByteCode::Register>(&ip); \
83 ByteCode::Register src1 = read<ByteCode::Register>(&ip); \
84 ByteCode::Register src2 = read<ByteCode::Register>(&ip); \
85 auto m = mask(); \
86 for (int i = 0; i < width; ++i) { \
87 if (m[i]) { \
88 fRegisters[target.fIndex].result[i] = fRegisters[src1.fIndex].src[i] op \
89 fRegisters[src2.fIndex].src[i]; \
90 } \
91 } \
92 NEXT(); \
93 } \
94 LABEL(inst ## N) { \
95 uint8_t count = read<uint8_t>(&ip); \
96 ByteCode::Register target = read<ByteCode::Register>(&ip); \
97 ByteCode::Register src1 = read<ByteCode::Register>(&ip); \
98 ByteCode::Register src2 = read<ByteCode::Register>(&ip); \
99 auto m = mask(); \
100 for (int i = 0; i < width; ++i) { \
101 if (m[i]) { \
102 for (int j = 0; j < count; ++j) { \
103 fRegisters[target.fIndex + j].result[i] = fRegisters[src1.fIndex + j].src[i] \
104 op fRegisters[src2.fIndex + j].src[i]; \
105 } \
106 } \
107 } \
108 NEXT(); \
109 }
110
111 #define VECTOR_BINARY_OP(inst, src, result, op) \
112 LABEL(inst) { \
113 ByteCode::Register target = read<ByteCode::Register>(&ip); \
114 ByteCode::Register src1 = read<ByteCode::Register>(&ip); \
115 ByteCode::Register src2 = read<ByteCode::Register>(&ip); \
116 fRegisters[target.fIndex].result = fRegisters[src1.fIndex].src op \
117 fRegisters[src2.fIndex].src; \
118 NEXT(); \
119 } \
120 LABEL(inst ## N) { \
121 uint8_t count = read<uint8_t>(&ip); \
122 ByteCode::Register target = read<ByteCode::Register>(&ip); \
123 ByteCode::Register src1 = read<ByteCode::Register>(&ip); \
124 ByteCode::Register src2 = read<ByteCode::Register>(&ip); \
125 for (int i = 0; i < count; ++i) { \
126 fRegisters[target.fIndex + i].result = fRegisters[src1.fIndex + i].src op \
127 fRegisters[src2.fIndex + i].src; \
128 } \
129 NEXT(); \
130 }
131
132 #define VECTOR_UNARY_FN(inst, fn) \
133 LABEL(inst) { \
134 ByteCode::Register target = read<ByteCode::Register>(&ip); \
135 ByteCode::Register src = read<ByteCode::Register>(&ip); \
136 for (int i = 0; i < width; ++ i) { \
137 fRegisters[target.fIndex].fFloat[i] = fn(fRegisters[src.fIndex].fFloat[i]); \
138 } \
139 NEXT(); \
140 }
141
142 #define DISASSEMBLE_0(inst, name) \
143 case ByteCode::Instruction::inst: printf(name "\n"); break;
144
145 #define DISASSEMBLE_1(inst, name) \
146 case ByteCode::Instruction::inst: \
147 printf(name " $%d\n", read<ByteCode::Register>(ip).fIndex); \
148 break;
149
150 #define DISASSEMBLE_UNARY(inst, name) \
151 case ByteCode::Instruction::inst: { \
152 ByteCode::Register target = read<ByteCode::Register>(ip); \
153 ByteCode::Register src = read<ByteCode::Register>(ip); \
154 printf(name " $%d -> $%d\n", src.fIndex, target.fIndex); \
155 break; \
156 }
157
158 #define DISASSEMBLE_VECTOR_UNARY(inst, name) \
159 case ByteCode::Instruction::inst: { \
160 ByteCode::Register target = read<ByteCode::Register>(ip); \
161 ByteCode::Register src = read<ByteCode::Register>(ip); \
162 printf(name " $%d -> $%d\n", src.fIndex, target.fIndex); \
163 break; \
164 } \
165 case ByteCode::Instruction::inst ## N: { \
166 uint8_t count = read<uint8_t>(ip); \
167 ByteCode::Register target = read<ByteCode::Register>(ip); \
168 ByteCode::Register src = read<ByteCode::Register>(ip); \
169 printf(name "%d $%d -> $%d\n", count, src.fIndex, target.fIndex); \
170 break; \
171 }
172
173 #define DISASSEMBLE_BINARY(inst, name) \
174 case ByteCode::Instruction::inst: { \
175 ByteCode::Register target = read<ByteCode::Register>(ip); \
176 ByteCode::Register src1 = read<ByteCode::Register>(ip); \
177 ByteCode::Register src2 = read<ByteCode::Register>(ip); \
178 printf(name " $%d, $%d -> $%d\n", src1.fIndex, src2.fIndex, target.fIndex); \
179 break; \
180 }
181
182 #define DISASSEMBLE_VECTOR_BINARY(inst, name) \
183 case ByteCode::Instruction::inst: { \
184 ByteCode::Register target = read<ByteCode::Register>(ip); \
185 ByteCode::Register src1 = read<ByteCode::Register>(ip); \
186 ByteCode::Register src2 = read<ByteCode::Register>(ip); \
187 printf(name " $%d, $%d -> $%d\n", src1.fIndex, src2.fIndex, target.fIndex); \
188 break; \
189 } \
190 case ByteCode::Instruction::inst ## N: { \
191 uint8_t count = read<uint8_t>(ip); \
192 ByteCode::Register target = read<ByteCode::Register>(ip); \
193 ByteCode::Register src1 = read<ByteCode::Register>(ip); \
194 ByteCode::Register src2 = read<ByteCode::Register>(ip); \
195 printf(name "%d $%d, $%d -> $%d\n", count, src1.fIndex, src2.fIndex, target.fIndex); \
196 break; \
197 }
198
199 /**
200 * Operates on vectors of the specified width, so creating an Interpreter<16> means that all inputs,
201 * outputs, and internal calculations will be 16-wide vectors.
202 */
203 template<int width>
204 class Interpreter {
205 public:
206 using Vector = ByteCode::Vector<width>;
207 using VectorI = skvx::Vec<width, int32_t>;
208 using VectorF = skvx::Vec<width, float>;
209
Interpreter(std::unique_ptr<ByteCode> code)210 Interpreter(std::unique_ptr<ByteCode> code)
211 : fCode(std::move(code)) {
212 // C++ doesn't guarantee proper alignment of naively-allocated vectors, so we can't have the
213 // registers and memory directly as fields of this object without jumping through some hoops
214 // during Interpreter allocation and deallocation. We simplify this by having the backing
215 // store be a separate allocation, jumping through the hoops ourselves rather than require
216 // Interpreter's clients to be aware of alignment.
217 // Ideally, we could use std::aligned_alloc here, but as of this writing it is not available
218 // on some compilers despite claiming to support C++17.
219 fBackingStore = calloc(sizeof(Vector), MEMORY_SIZE + REGISTER_COUNT + 1);
220 fMemory = (Vector*) GrAlignTo((size_t) fBackingStore, alignof(Vector));
221 fRegisters = fMemory + MEMORY_SIZE;
222 }
223
~Interpreter()224 ~Interpreter() {
225 free(fBackingStore);
226 }
227
setUniforms(const float uniforms[])228 void setUniforms(const float uniforms[]) {
229 for (int i = 0; i < fCode->getUniformSlotCount(); ++i) {
230 fMemory[fCode->getGlobalSlotCount() + i].fFloat = VectorF(uniforms[i]);
231 }
232 }
233
234 /**
235 * Returns true on success and stores a pointer to the first slot of the result into outResult.
236 * This pointer is only guaranteed to be valid until the next run() call.
237 */
run(const ByteCodeFunction * f,Vector args[],Vector ** outResult)238 bool run(const ByteCodeFunction* f, Vector args[], Vector** outResult) {
239 SkASSERT(f);
240 VectorI condStack[MASK_STACK_SIZE];
241 memset(&condStack[0], 255, sizeof(condStack[0]));
242 VectorI maskStack[MASK_STACK_SIZE];
243 memset(&maskStack[0], 255, sizeof(maskStack[0]));
244 VectorI loopStack[LOOP_STACK_SIZE];
245 memset(&loopStack[0], 255, sizeof(loopStack[0]));
246 VectorI continueStack[LOOP_STACK_SIZE];
247 memset(&continueStack[0], 0, sizeof(continueStack[0]));
248 Vector* stack = fMemory + MEMORY_SIZE;
249 int stackCount = f->fStackSlotCount + f->fParameterSlotCount;
250 stack -= stackCount;
251 if (f->fParameterSlotCount) {
252 memcpy(stack, args, f->fParameterSlotCount * sizeof(Vector));
253 }
254 Context context(fMemory, stack, condStack, maskStack, loopStack, continueStack);
255 if (this->innerRun(f, context, 0, outResult)) {
256 int slot = 0;
257 for (const auto& p : f->fParameters) {
258 if (p.fIsOutParameter) {
259 memcpy(&args[slot], &stack[slot], p.fSlotCount * sizeof(Vector));
260 }
261 slot += p.fSlotCount;
262 }
263 return true;
264 }
265 return false;
266 }
267
268 /**
269 * Invokes the specified function with the given arguments, 'count' times. 'args' and
270 * 'outResult' are accepted and returned in structure-of-arrays form:
271 * args[0] points to an array of N values, the first argument for each invocation
272 * ...
273 * args[argCount - 1] points to an array of N values, the last argument for each invocation
274 *
275 * All values in 'args', 'outResult', and 'uniforms' are 32-bit values (typically floats,
276 * but possibly int32_t or uint32_t, depending on the types used in the SkSL).
277 * Any 'out' or 'inout' parameters will result in the 'args' array being modified.
278 */
279 bool runStriped(const ByteCodeFunction* f, int count, float* args[],
280 float* outResult[] = nullptr) {
281 SkASSERT(f);
282 Vector* stack = fMemory + MEMORY_SIZE;
283 int stackCount = f->fStackSlotCount + f->fParameterSlotCount;
284 stack -= stackCount;
285 VectorI condStack[MASK_STACK_SIZE];
286 VectorI maskStack[MASK_STACK_SIZE];
287 VectorI loopStack[LOOP_STACK_SIZE];
288 VectorI continueStack[LOOP_STACK_SIZE];
289 Vector* innerResult = nullptr;
290 Context context(fMemory, stack, condStack, maskStack, loopStack, continueStack);
291 for (int i = 0; i < count; i += width) {
292 int lanes = std::min(width, count - i);
293 size_t size = lanes * sizeof(float);
294 memset(&maskStack[0], 255, sizeof(maskStack[0]));
295 memset(&loopStack[0], 255, sizeof(loopStack[0]));
296 for (int j = lanes; j < width; ++j) {
297 maskStack[0][j] = 0;
298 loopStack[0][j] = 0;
299 }
300 memset(&continueStack[0], 0, sizeof(continueStack[0]));
301 for (int j = 0; j < f->fParameterSlotCount; ++j) {
302 memcpy(stack + j, &args[j][i], size);
303 }
304 if (!this->innerRun(f, context, i, &innerResult)) {
305 return false;
306 }
307 int slot = 0;
308 for (const auto& p : f->fParameters) {
309 if (p.fIsOutParameter) {
310 for (int j = 0; j < p.fSlotCount; ++j) {
311 memcpy(&args[slot + j][i], stack + slot + j, size);
312 }
313 }
314 slot += p.fSlotCount;
315 }
316 if (outResult) {
317 for (int j = 0; j < f->fReturnSlotCount; ++j) {
318 memcpy(&outResult[j][i], &innerResult[j], size);
319 }
320 }
321 }
322 return true;
323 }
324
getCode()325 const ByteCode& getCode() {
326 return *fCode;
327 }
328
329 private:
330 static constexpr size_t REGISTER_COUNT = 1024;
331
332 static constexpr size_t MEMORY_SIZE = 1024;
333
334 static constexpr size_t MASK_STACK_SIZE = 64;
335
336 static constexpr size_t LOOP_STACK_SIZE = 16;
337
338 struct StackFrame {
StackFrameStackFrame339 StackFrame(const ByteCodeFunction* function, const uint8_t* ip, const int stackSlotCount,
340 Vector* parameters, Vector* returnValue)
341 : fFunction(function)
342 , fIP(ip)
343 , fStackSlotCount(stackSlotCount)
344 , fParameters(parameters)
345 , fReturnValue(returnValue) {}
346
347 const ByteCodeFunction* fFunction;
348 const uint8_t* fIP;
349 const int fStackSlotCount;
350 Vector* fParameters;
351 Vector* fReturnValue;
352 };
353
354 struct Context {
ContextContext355 Context(Vector* memory, Vector* stack, VectorI* condStack, VectorI* maskStack,
356 VectorI* loopStack,VectorI* continueStack)
357 : fMemory(memory)
358 , fStack(stack)
359 , fCondStack(condStack)
360 , fMaskStack(maskStack)
361 , fLoopStack(loopStack)
362 , fContinueStack(continueStack) {}
363
364 Vector* fMemory;
365 Vector* fStack;
366 VectorI* fCondStack;
367 VectorI* fMaskStack;
368 VectorI* fLoopStack;
369 VectorI* fContinueStack;
370 std::stack<StackFrame> fCallStack;
371 };
372
373 // $x = register
374 // @x = memory cell
375 // &x = parameter
disassemble(const uint8_t ** ip)376 void disassemble(const uint8_t** ip) {
377 ByteCode::Instruction inst = read<ByteCode::Instruction>(ip);
378 switch (inst) {
379 DISASSEMBLE_VECTOR_BINARY(kAddF, "addF")
380 DISASSEMBLE_VECTOR_BINARY(kAddI, "addI")
381 DISASSEMBLE_BINARY(kAnd, "and")
382 DISASSEMBLE_BINARY(kCompareEQF, "compare eqF")
383 DISASSEMBLE_BINARY(kCompareEQI, "compare eqI")
384 DISASSEMBLE_BINARY(kCompareNEQF, "compare neqF")
385 DISASSEMBLE_BINARY(kCompareNEQI, "compare neqI")
386 DISASSEMBLE_BINARY(kCompareGTF, "compare gtF")
387 DISASSEMBLE_BINARY(kCompareGTS, "compare gtS")
388 DISASSEMBLE_BINARY(kCompareGTU, "compare gtU")
389 DISASSEMBLE_BINARY(kCompareGTEQF, "compare gteqF")
390 DISASSEMBLE_BINARY(kCompareGTEQS, "compare gteqS")
391 DISASSEMBLE_BINARY(kCompareGTEQU, "compare gteqU")
392 DISASSEMBLE_BINARY(kCompareLTF, "compare ltF")
393 DISASSEMBLE_BINARY(kCompareLTS, "compare ltS")
394 DISASSEMBLE_BINARY(kCompareLTU, "compare ltU")
395 DISASSEMBLE_BINARY(kCompareLTEQF, "compare lteqF")
396 DISASSEMBLE_BINARY(kCompareLTEQS, "compare lteqS")
397 DISASSEMBLE_BINARY(kCompareLTEQU, "compare lteqU")
398 DISASSEMBLE_VECTOR_BINARY(kSubtractF, "subF")
399 DISASSEMBLE_VECTOR_BINARY(kSubtractI, "subI")
400 DISASSEMBLE_VECTOR_BINARY(kDivideF, "divF")
401 DISASSEMBLE_VECTOR_BINARY(kDivideS, "divS")
402 DISASSEMBLE_VECTOR_BINARY(kDivideU, "divU")
403 DISASSEMBLE_VECTOR_BINARY(kRemainderS, "remS")
404 DISASSEMBLE_VECTOR_BINARY(kRemainderU, "remU")
405 DISASSEMBLE_VECTOR_BINARY(kRemainderF, "remF")
406 DISASSEMBLE_VECTOR_BINARY(kMultiplyF, "mulF")
407 DISASSEMBLE_VECTOR_BINARY(kMultiplyI, "mulI")
408 DISASSEMBLE_BINARY(kOr, "or")
409 DISASSEMBLE_BINARY(kXor, "xor")
410 DISASSEMBLE_0(kNop, "nop")
411 case ByteCode::Instruction::kBoundsCheck: {
412 ByteCode::Register r = read<ByteCode::Register>(ip);
413 int length = read<int>(ip);
414 printf("boundsCheck 0 <= $%d < %d\n", r.fIndex, length);
415 break;
416 }
417 case ByteCode::Instruction::kBranch:
418 printf("branch %d\n", read<ByteCode::Pointer>(ip).fAddress);
419 break;
420 case ByteCode::Instruction::kBranchIfAllFalse:
421 printf("branchIfAllFalse %d\n", read<ByteCode::Pointer>(ip).fAddress);
422 break;
423 DISASSEMBLE_0(kBreak, "break")
424 case ByteCode::Instruction::kCall: {
425 ByteCode::Register target = read<ByteCode::Register>(ip);
426 uint8_t idx = read<uint8_t>(ip);
427 ByteCode::Register args = read<ByteCode::Register>(ip);
428 ByteCodeFunction* f = fCode->fFunctions[idx].get();
429 printf("call %s($%d...) -> $%d", f->fName.c_str(), args.fIndex, target.fIndex);
430 printf("\n");
431 break;
432 }
433 case ByteCode::Instruction::kCallExternal: {
434 ByteCode::Register target = read<ByteCode::Register>(ip);
435 uint8_t idx = read<uint8_t>(ip);
436 uint8_t targetCount = read<uint8_t>(ip);
437 ByteCode::Register args = read<ByteCode::Register>(ip);
438 uint8_t argCount = read<uint8_t>(ip);
439 ExternalValue* ev = fCode->fExternalValues[idx];
440 printf("callExternal %s($%d(%d)...) -> $%d(%d)", String(ev->fName).c_str(),
441 args.fIndex, argCount, target.fIndex, targetCount);
442 printf("\n");
443 break;
444 }
445 DISASSEMBLE_0(kContinue, "continue")
446 DISASSEMBLE_UNARY(kCopy, "copy")
447 DISASSEMBLE_UNARY(kCos, "cos")
448 DISASSEMBLE_UNARY(kFloatToSigned, "FtoS")
449 DISASSEMBLE_UNARY(kFloatToUnsigned, "FtoU")
450 case ByteCode::Instruction::kImmediate: {
451 ByteCode::Register target = read<ByteCode::Register>(ip);
452 ByteCode::Immediate src = read<ByteCode::Immediate>(ip);
453 printf("immediate (%d | %f) -> $%d\n", src.fInt, src.fFloat, target.fIndex);
454 break;
455 }
456 DISASSEMBLE_UNARY(kInverse2x2, "inverse2x2")
457 DISASSEMBLE_UNARY(kInverse3x3, "inverse3x3")
458 DISASSEMBLE_UNARY(kInverse4x4, "inverse4x4")
459 DISASSEMBLE_VECTOR_UNARY(kLoad, "load")
460 case ByteCode::Instruction::kLoadDirect: {
461 ByteCode::Register target = read<ByteCode::Register>(ip);
462 ByteCode::Pointer src = read<ByteCode::Pointer>(ip);
463 printf("loadDirect @%d -> $%d\n", src.fAddress, target.fIndex);
464 break;
465 }
466 case ByteCode::Instruction::kLoadDirectN: {
467 uint8_t count = read<uint8_t>(ip);
468 ByteCode::Register target = read<ByteCode::Register>(ip);
469 ByteCode::Pointer src = read<ByteCode::Pointer>(ip);
470 printf("loadDirect%d @%d -> $%d\n", count, src.fAddress, target.fIndex);
471 break;
472 }
473 DISASSEMBLE_VECTOR_UNARY(kLoadParameter, "loadParameter")
474 case ByteCode::Instruction::kLoadParameterDirect: {
475 ByteCode::Register target = read<ByteCode::Register>(ip);
476 ByteCode::Pointer src = read<ByteCode::Pointer>(ip);
477 printf("loadParameterDirect &%d -> $%d\n", src.fAddress, target.fIndex);
478 break;
479 }
480 case ByteCode::Instruction::kLoadParameterDirectN: {
481 uint8_t count = read<uint8_t>(ip);
482 ByteCode::Register target = read<ByteCode::Register>(ip);
483 ByteCode::Pointer src = read<ByteCode::Pointer>(ip);
484 printf("loadParameterDirect%d &%d -> $%d\n", count, src.fAddress, target.fIndex);
485 break;
486 }
487 DISASSEMBLE_VECTOR_UNARY(kLoadStack, "loadStack")
488 case ByteCode::Instruction::kLoadStackDirect: {
489 ByteCode::Register target = read<ByteCode::Register>(ip);
490 ByteCode::Pointer src = read<ByteCode::Pointer>(ip);
491 printf("loadStackDirect @%d -> $%d\n", src.fAddress, target.fIndex);
492 break;
493 }
494 case ByteCode::Instruction::kLoadStackDirectN: {
495 uint8_t count = read<uint8_t>(ip);
496 ByteCode::Register target = read<ByteCode::Register>(ip);
497 ByteCode::Pointer src = read<ByteCode::Pointer>(ip);
498 printf("loadStackDirect%d @%d -> $%d\n", count, src.fAddress, target.fIndex);
499 break;
500 }
501 DISASSEMBLE_0(kLoopBegin, "loopBegin")
502 DISASSEMBLE_0(kLoopEnd, "loopEnd")
503 DISASSEMBLE_1(kLoopMask, "loopMask")
504 DISASSEMBLE_0(kLoopNext, "loopNext")
505 DISASSEMBLE_0(kMaskNegate, "maskNegate")
506 DISASSEMBLE_0(kMaskPop, "maskPop")
507 DISASSEMBLE_1(kMaskPush, "maskPush")
508 case ByteCode::Instruction::kMatrixMultiply: {
509 ByteCode::Register target = read<ByteCode::Register>(ip);
510 ByteCode::Register left = read<ByteCode::Register>(ip);
511 ByteCode::Register right = read<ByteCode::Register>(ip);
512 uint8_t leftColsAndRightRows = read<uint8_t>(ip);
513 uint8_t leftRows = read<uint8_t>(ip);
514 uint8_t rightColumns = read<uint8_t>(ip);
515 printf("matrixMultiply $%d, $%d, %d, %d, %d -> $%d\n", left.fIndex, right.fIndex,
516 leftColsAndRightRows, leftRows, rightColumns, target.fIndex);
517 break;
518 }
519 case ByteCode::Instruction::kMatrixToMatrix: {
520 ByteCode::Register target = read<ByteCode::Register>(ip);
521 ByteCode::Register src = read<ByteCode::Register>(ip);
522 uint8_t srcColumns = read<uint8_t>(ip);
523 uint8_t srcRows = read<uint8_t>(ip);
524 uint8_t dstColumns = read<uint8_t>(ip);
525 uint8_t dstRows = read<uint8_t>(ip);
526 printf("matrixToMatrix $%d, %dx%d to %dx%d -> $%d\n", src.fIndex, srcColumns,
527 srcRows, dstColumns, dstRows, target.fIndex);
528 break;
529 }
530 DISASSEMBLE_UNARY(kNegateF, "negateF")
531 DISASSEMBLE_UNARY(kNegateS, "negateS")
532 DISASSEMBLE_UNARY(kNot, "not")
533 case ByteCode::Instruction::kReadExternal: {
534 ByteCode::Register target = read<ByteCode::Register>(ip);
535 uint8_t count = read<uint8_t>(ip);
536 uint8_t index = read<uint8_t>(ip);
537 printf("readExternal %d, %d -> $%d\n", count, index, target.fIndex);
538 break;
539 }
540 DISASSEMBLE_1(kPrint, "print")
541 DISASSEMBLE_0(kReturn, "return")
542 DISASSEMBLE_1(kReturnValue, "returnValue")
543 case ByteCode::Instruction::kScalarToMatrix: {
544 ByteCode::Register target = read<ByteCode::Register>(ip);
545 ByteCode::Register src = read<ByteCode::Register>(ip);
546 uint8_t columns = read<uint8_t>(ip);
547 uint8_t rows = read<uint8_t>(ip);
548 printf("scalarToMatrix $%d, %dx%d -> $%d\n", src.fIndex, columns, rows,
549 target.fIndex);
550 break;
551 }
552 case ByteCode::Instruction::kSelect: {
553 ByteCode::Register target = read<ByteCode::Register>(ip);
554 ByteCode::Register test = read<ByteCode::Register>(ip);
555 ByteCode::Register src1 = read<ByteCode::Register>(ip);
556 ByteCode::Register src2 = read<ByteCode::Register>(ip);
557 printf("select $%d, $%d, $%d -> %d\n", test.fIndex, src1.fIndex, src2.fIndex,
558 target.fIndex);
559 break;
560 }
561 DISASSEMBLE_BINARY(kShiftLeft, "shiftLeft")
562 DISASSEMBLE_BINARY(kShiftRightS, "shiftRightS")
563 DISASSEMBLE_BINARY(kShiftRightU, "shiftRightU")
564 DISASSEMBLE_UNARY(kSignedToFloat, "signedToFloat")
565 DISASSEMBLE_UNARY(kSin, "sin")
566 case ByteCode::Instruction::kSplat: {
567 uint8_t count = read<uint8_t>(ip);
568 ByteCode::Pointer target = read<ByteCode::Pointer>(ip);
569 ByteCode::Register src = read<ByteCode::Register>(ip);
570 printf("splat%d $%d -> @%d\n", count, src.fIndex, target.fAddress);
571 break;
572 }
573 DISASSEMBLE_UNARY(kSqrt, "sqrt")
574 DISASSEMBLE_VECTOR_UNARY(kStore, "store")
575 case ByteCode::Instruction::kStoreDirect: {
576 ByteCode::Pointer target = read<ByteCode::Pointer>(ip);
577 ByteCode::Register src = read<ByteCode::Register>(ip);
578 printf("store $%d -> @%d\n", src.fIndex, target.fAddress);
579 break;
580 }
581 case ByteCode::Instruction::kStoreDirectN: {
582 uint8_t count = read<uint8_t>(ip);
583 ByteCode::Pointer target = read<ByteCode::Pointer>(ip);
584 ByteCode::Register src = read<ByteCode::Register>(ip);
585 printf("store%d $%d -> @%d\n", count, src.fIndex, target.fAddress);
586 break;
587 }
588 DISASSEMBLE_VECTOR_UNARY(kStoreParameter, "storeParameter")
589 case ByteCode::Instruction::kStoreParameterDirect: {
590 ByteCode::Pointer target = read<ByteCode::Pointer>(ip);
591 ByteCode::Register src = read<ByteCode::Register>(ip);
592 printf("storeParameterDirect $%d -> &%d\n", src.fIndex, target.fAddress);
593 break;
594 }
595 case ByteCode::Instruction::kStoreParameterDirectN: {
596 uint8_t count = read<uint8_t>(ip);
597 ByteCode::Pointer target = read<ByteCode::Pointer>(ip);
598 ByteCode::Register src = read<ByteCode::Register>(ip);
599 printf("storeParameterDirect%d $%d -> &%d\n", count, src.fIndex, target.fAddress);
600 break;
601 }
602 DISASSEMBLE_VECTOR_UNARY(kStoreStack, "storeStack")
603 case ByteCode::Instruction::kStoreStackDirect: {
604 ByteCode::Pointer target = read<ByteCode::Pointer>(ip);
605 ByteCode::Register src = read<ByteCode::Register>(ip);
606 printf("storeStackDirect $%d -> @%d\n", src.fIndex, target.fAddress);
607 break;
608 }
609 case ByteCode::Instruction::kStoreStackDirectN: {
610 uint8_t count = read<uint8_t>(ip);
611 ByteCode::Pointer target = read<ByteCode::Pointer>(ip);
612 ByteCode::Register src = read<ByteCode::Register>(ip);
613 printf("storeStackDirect%d $%d -> @%d\n", count, src.fIndex, target.fAddress);
614 break;
615 }
616 DISASSEMBLE_UNARY(kTan, "tan")
617 DISASSEMBLE_UNARY(kUnsignedToFloat, "unsignedToFloat")
618 case ByteCode::Instruction::kWriteExternal: {
619 uint8_t index = read<uint8_t>(ip);
620 uint8_t count = read<uint8_t>(ip);
621 ByteCode::Register src = read<ByteCode::Register>(ip);
622 printf("writeExternal $%d, %d -> %d\n", src.fIndex, count, index);
623 break;
624 }
625 default:
626 printf("unsupported: %d\n", (int) inst);
627 SkASSERT(false);
628 }
629 }
630
VecMod(Vector x,Vector y)631 static Vector VecMod(Vector x, Vector y) {
632 return Vector(x.fFloat - skvx::trunc(x.fFloat / y.fFloat) * y.fFloat);
633 }
634
635 #define CHECK_STACK_BOUNDS(address) \
636 SkASSERT(context.fStack + address >= fMemory && \
637 context.fStack + address <= fMemory + MEMORY_SIZE)
638
Inverse2x2(Vector * in,Vector * out)639 static void Inverse2x2(Vector* in, Vector* out) {
640 VectorF a = in[0].fFloat,
641 b = in[1].fFloat,
642 c = in[2].fFloat,
643 d = in[3].fFloat;
644 VectorF idet = VectorF(1) / (a*d - b*c);
645 out[0].fFloat = d * idet;
646 out[1].fFloat = -b * idet;
647 out[2].fFloat = -c * idet;
648 out[3].fFloat = a * idet;
649 }
650
Inverse3x3(Vector * in,Vector * out)651 static void Inverse3x3(Vector* in, Vector* out) {
652 VectorF a11 = in[0].fFloat, a12 = in[3].fFloat, a13 = in[6].fFloat,
653 a21 = in[1].fFloat, a22 = in[4].fFloat, a23 = in[7].fFloat,
654 a31 = in[2].fFloat, a32 = in[5].fFloat, a33 = in[8].fFloat;
655 VectorF idet = VectorF(1) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 -
656 a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31);
657 out[0].fFloat = (a22 * a33 - a23 * a32) * idet;
658 out[1].fFloat = (a23 * a31 - a21 * a33) * idet;
659 out[2].fFloat = (a21 * a32 - a22 * a31) * idet;
660 out[3].fFloat = (a13 * a32 - a12 * a33) * idet;
661 out[4].fFloat = (a11 * a33 - a13 * a31) * idet;
662 out[5].fFloat = (a12 * a31 - a11 * a32) * idet;
663 out[6].fFloat = (a12 * a23 - a13 * a22) * idet;
664 out[7].fFloat = (a13 * a21 - a11 * a23) * idet;
665 out[8].fFloat = (a11 * a22 - a12 * a21) * idet;
666 }
667
668
Inverse4x4(Vector * in,Vector * out)669 static void Inverse4x4(Vector* in, Vector* out) {
670 #define inf(index) in[index].fFloat
671 #define outf(index) out[index].fFloat
672 VectorF a00 = inf(0), a10 = inf(4), a20 = inf( 8), a30 = inf(12),
673 a01 = inf(1), a11 = inf(5), a21 = inf( 9), a31 = inf(13),
674 a02 = inf(2), a12 = inf(6), a22 = inf(10), a32 = inf(14),
675 a03 = inf(3), a13 = inf(7), a23 = inf(11), a33 = inf(15);
676
677 VectorF b00 = a00 * a11 - a01 * a10,
678 b01 = a00 * a12 - a02 * a10,
679 b02 = a00 * a13 - a03 * a10,
680 b03 = a01 * a12 - a02 * a11,
681 b04 = a01 * a13 - a03 * a11,
682 b05 = a02 * a13 - a03 * a12,
683 b06 = a20 * a31 - a21 * a30,
684 b07 = a20 * a32 - a22 * a30,
685 b08 = a20 * a33 - a23 * a30,
686 b09 = a21 * a32 - a22 * a31,
687 b10 = a21 * a33 - a23 * a31,
688 b11 = a22 * a33 - a23 * a32;
689
690 VectorF idet = VectorF(1) /
691 (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
692
693 b00 *= idet;
694 b01 *= idet;
695 b02 *= idet;
696 b03 *= idet;
697 b04 *= idet;
698 b05 *= idet;
699 b06 *= idet;
700 b07 *= idet;
701 b08 *= idet;
702 b09 *= idet;
703 b10 *= idet;
704 b11 *= idet;
705
706 outf( 0) = a11 * b11 - a12 * b10 + a13 * b09;
707 outf( 1) = a02 * b10 - a01 * b11 - a03 * b09;
708 outf( 2) = a31 * b05 - a32 * b04 + a33 * b03;
709 outf( 3) = a22 * b04 - a21 * b05 - a23 * b03;
710 outf( 4) = a12 * b08 - a10 * b11 - a13 * b07;
711 outf( 5) = a00 * b11 - a02 * b08 + a03 * b07;
712 outf( 6) = a32 * b02 - a30 * b05 - a33 * b01;
713 outf( 7) = a20 * b05 - a22 * b02 + a23 * b01;
714 outf( 8) = a10 * b10 - a11 * b08 + a13 * b06;
715 outf( 9) = a01 * b08 - a00 * b10 - a03 * b06;
716 outf(10) = a30 * b04 - a31 * b02 + a33 * b00;
717 outf(11) = a21 * b02 - a20 * b04 - a23 * b00;
718 outf(12) = a11 * b07 - a10 * b09 - a12 * b06;
719 outf(13) = a00 * b09 - a01 * b07 + a02 * b06;
720 outf(14) = a31 * b01 - a30 * b03 - a32 * b00;
721 outf(15) = a20 * b03 - a21 * b01 + a22 * b00;
722 #undef inf
723 #undef outf
724 }
725
innerRun(const ByteCodeFunction * f,Context context,int baseIndex,Vector ** outResult)726 bool innerRun(const ByteCodeFunction* f, Context context, int baseIndex, Vector** outResult) {
727 #ifdef SKSL_THREADED_CODE
728 static const void* labels[] = {
729 // If you aren't familiar with it, the &&label syntax is the GCC / Clang "labels as
730 // values" extension. If you add anything to this array, be sure to add the
731 // corresponding CHECK_LABEL() assert below.
732 &&kNop,
733 &&kAbort,
734 &&kAddF,
735 &&kAddFN,
736 &&kAddI,
737 &&kAddIN,
738 &&kAnd,
739 &&kBoundsCheck,
740 &&kBranch,
741 &&kBranchIfAllFalse,
742 &&kBreak,
743 &&kCall,
744 &&kCallExternal,
745 &&kCompareEQF,
746 &&kCompareEQI,
747 &&kCompareNEQF,
748 &&kCompareNEQI,
749 &&kCompareGTF,
750 &&kCompareGTS,
751 &&kCompareGTU,
752 &&kCompareGTEQF,
753 &&kCompareGTEQS,
754 &&kCompareGTEQU,
755 &&kCompareLTF,
756 &&kCompareLTS,
757 &&kCompareLTU,
758 &&kCompareLTEQF,
759 &&kCompareLTEQS,
760 &&kCompareLTEQU,
761 &&kContinue,
762 &&kCopy,
763 &&kCos,
764 &&kDivideF,
765 &&kDivideFN,
766 &&kDivideS,
767 &&kDivideSN,
768 &&kDivideU,
769 &&kDivideUN,
770 &&kFloatToSigned,
771 &&kFloatToUnsigned,
772 &&kImmediate,
773 &&kInverse2x2,
774 &&kInverse3x3,
775 &&kInverse4x4,
776 &&kLoad,
777 &&kLoadN,
778 &&kLoadDirect,
779 &&kLoadDirectN,
780 &&kLoadParameter,
781 &&kLoadParameterN,
782 &&kLoadParameterDirect,
783 &&kLoadParameterDirectN,
784 &&kLoadStack,
785 &&kLoadStackN,
786 &&kLoadStackDirect,
787 &&kLoadStackDirectN,
788 &&kLoopBegin,
789 &&kLoopEnd,
790 &&kLoopMask,
791 &&kLoopNext,
792 &&kMaskNegate,
793 &&kMaskPop,
794 &&kMaskPush,
795 &&kMatrixMultiply,
796 &&kMatrixToMatrix,
797 &&kMultiplyF,
798 &&kMultiplyFN,
799 &&kMultiplyI,
800 &&kMultiplyIN,
801 &&kNegateF,
802 &&kNegateS,
803 &&kNot,
804 &&kOr,
805 &&kPrint,
806 &&kReadExternal,
807 &&kRemainderF,
808 &&kRemainderFN,
809 &&kRemainderS,
810 &&kRemainderSN,
811 &&kRemainderU,
812 &&kRemainderUN,
813 &&kReturn,
814 &&kReturnValue,
815 &&kScalarToMatrix,
816 &&kSelect,
817 &&kShiftLeft,
818 &&kShiftRightS,
819 &&kShiftRightU,
820 &&kSignedToFloat,
821 &&kSin,
822 &&kSplat,
823 &&kSqrt,
824 &&kStore,
825 &&kStoreN,
826 &&kStoreDirect,
827 &&kStoreDirectN,
828 &&kStoreParameter,
829 &&kStoreParameterN,
830 &&kStoreParameterDirect,
831 &&kStoreParameterDirectN,
832 &&kStoreStack,
833 &&kStoreStackN,
834 &&kStoreStackDirect,
835 &&kStoreStackDirectN,
836 &&kSubtractF,
837 &&kSubtractFN,
838 &&kSubtractI,
839 &&kSubtractIN,
840 &&kTan,
841 &&kUnsignedToFloat,
842 &&kWriteExternal,
843 &&kXor
844 };
845 CHECK_LABEL(kNop);
846 CHECK_LABEL(kAbort);
847 CHECK_LABEL(kAddF);
848 CHECK_LABEL(kAddI);
849 CHECK_LABEL(kAnd);
850 CHECK_LABEL(kBoundsCheck);
851 CHECK_LABEL(kBranch);
852 CHECK_LABEL(kBranchIfAllFalse);
853 CHECK_LABEL(kBreak);
854 CHECK_LABEL(kCall);
855 CHECK_LABEL(kCallExternal);
856 CHECK_LABEL(kCompareEQF);
857 CHECK_LABEL(kCompareEQI);
858 CHECK_LABEL(kCompareNEQF);
859 CHECK_LABEL(kCompareNEQI);
860 CHECK_LABEL(kCompareGTF);
861 CHECK_LABEL(kCompareGTS);
862 CHECK_LABEL(kCompareGTU);
863 CHECK_LABEL(kCompareGTEQF);
864 CHECK_LABEL(kCompareGTEQS);
865 CHECK_LABEL(kCompareGTEQU);
866 CHECK_LABEL(kCompareLTF);
867 CHECK_LABEL(kCompareLTS);
868 CHECK_LABEL(kCompareLTU);
869 CHECK_LABEL(kCompareLTEQF);
870 CHECK_LABEL(kCompareLTEQS);
871 CHECK_LABEL(kCompareLTEQU);
872 CHECK_LABEL(kContinue);
873 CHECK_LABEL(kCopy);
874 CHECK_LABEL(kCos);
875 CHECK_LABEL(kDivideF);
876 CHECK_LABEL(kDivideFN);
877 CHECK_LABEL(kDivideS);
878 CHECK_LABEL(kDivideSN);
879 CHECK_LABEL(kDivideU);
880 CHECK_LABEL(kDivideUN);
881 CHECK_LABEL(kFloatToSigned);
882 CHECK_LABEL(kFloatToUnsigned);
883 CHECK_LABEL(kImmediate);
884 CHECK_LABEL(kInverse2x2);
885 CHECK_LABEL(kInverse3x3);
886 CHECK_LABEL(kInverse4x4);
887 CHECK_LABEL(kLoad);
888 CHECK_LABEL(kLoadN);
889 CHECK_LABEL(kLoadDirect);
890 CHECK_LABEL(kLoadDirectN);
891 CHECK_LABEL(kLoadParameter);
892 CHECK_LABEL(kLoadParameterN);
893 CHECK_LABEL(kLoadParameterDirect);
894 CHECK_LABEL(kLoadParameterDirectN);
895 CHECK_LABEL(kLoadStack);
896 CHECK_LABEL(kLoadStackN);
897 CHECK_LABEL(kLoadStackDirect);
898 CHECK_LABEL(kLoadStackDirectN);
899 CHECK_LABEL(kLoopBegin);
900 CHECK_LABEL(kLoopEnd);
901 CHECK_LABEL(kLoopMask);
902 CHECK_LABEL(kLoopNext);
903 CHECK_LABEL(kMaskNegate);
904 CHECK_LABEL(kMaskPop);
905 CHECK_LABEL(kMaskPush);
906 CHECK_LABEL(kMatrixMultiply);
907 CHECK_LABEL(kMatrixToMatrix);
908 CHECK_LABEL(kMultiplyF);
909 CHECK_LABEL(kMultiplyFN);
910 CHECK_LABEL(kMultiplyI);
911 CHECK_LABEL(kMultiplyIN);
912 CHECK_LABEL(kNegateF);
913 CHECK_LABEL(kNegateS);
914 CHECK_LABEL(kNot);
915 CHECK_LABEL(kOr);
916 CHECK_LABEL(kPrint);
917 CHECK_LABEL(kReadExternal);
918 CHECK_LABEL(kRemainderF);
919 CHECK_LABEL(kRemainderFN);
920 CHECK_LABEL(kRemainderS);
921 CHECK_LABEL(kRemainderSN);
922 CHECK_LABEL(kRemainderU);
923 CHECK_LABEL(kRemainderUN);
924 CHECK_LABEL(kReturn);
925 CHECK_LABEL(kReturnValue);
926 CHECK_LABEL(kScalarToMatrix);
927 CHECK_LABEL(kSelect);
928 CHECK_LABEL(kShiftLeft);
929 CHECK_LABEL(kShiftRightS);
930 CHECK_LABEL(kShiftRightU);
931 CHECK_LABEL(kSignedToFloat);
932 CHECK_LABEL(kSin);
933 CHECK_LABEL(kSplat);
934 CHECK_LABEL(kSqrt);
935 CHECK_LABEL(kStore);
936 CHECK_LABEL(kStoreN);
937 CHECK_LABEL(kStoreDirect);
938 CHECK_LABEL(kStoreDirectN);
939 CHECK_LABEL(kStoreParameter);
940 CHECK_LABEL(kStoreParameterN);
941 CHECK_LABEL(kStoreParameterDirect);
942 CHECK_LABEL(kStoreParameterDirectN);
943 CHECK_LABEL(kStoreStack);
944 CHECK_LABEL(kStoreStackN);
945 CHECK_LABEL(kStoreStackDirect);
946 CHECK_LABEL(kStoreStackDirectN);
947 CHECK_LABEL(kSubtractF);
948 CHECK_LABEL(kSubtractFN);
949 CHECK_LABEL(kSubtractI);
950 CHECK_LABEL(kSubtractIN);
951 CHECK_LABEL(kTan);
952 CHECK_LABEL(kUnsignedToFloat);
953 CHECK_LABEL(kWriteExternal);
954 CHECK_LABEL(kXor);
955 #endif
956 auto mask = [&]() { return *context.fMaskStack & *context.fLoopStack; };
957 auto parameterBase = [&]() {
958 return context.fCallStack.empty() ? context.fStack
959 : context.fCallStack.top().fParameters;
960 };
961 const uint8_t* code = f->fCode.data();
962 const uint8_t* ip = code;
963 #ifdef SKSL_THREADED_CODE
964 #ifdef TRACE
965 const uint8_t* trace_ip = ip;
966 printf("0: ");
967 disassemble(&trace_ip);
968 #endif
969 goto *labels[(int) read<ByteCode::Instruction>(&ip)];
970 #else
971 for (;;) {
972 #ifdef TRACE
973 const uint8_t* trace_ip = ip;
974 disassemble(&trace_ip);
975 #endif
976 ByteCode::Instruction inst = read<ByteCode::Instruction>(&ip);
977 switch (inst) {
978 #endif
979 VECTOR_BINARY_OP(kAddF, fFloat, fFloat, +)
980 VECTOR_BINARY_OP(kAddI, fInt, fInt, +)
981 BINARY_OP(kAnd, fInt, fInt, &)
982 BINARY_OP(kCompareEQF, fFloat, fInt, ==)
983 BINARY_OP(kCompareEQI, fInt, fInt, ==)
984 BINARY_OP(kCompareNEQF, fFloat, fInt, !=)
985 BINARY_OP(kCompareNEQI, fInt, fInt, !=)
986 BINARY_OP(kCompareGTF, fFloat, fInt, >)
987 BINARY_OP(kCompareGTS, fInt, fInt, >)
988 BINARY_OP(kCompareGTU, fUInt, fUInt, >)
989 BINARY_OP(kCompareGTEQF, fFloat, fInt, >=)
990 BINARY_OP(kCompareGTEQS, fInt, fInt, >=)
991 BINARY_OP(kCompareGTEQU, fUInt, fUInt, >=)
992 BINARY_OP(kCompareLTF, fFloat, fInt, <)
993 BINARY_OP(kCompareLTS, fInt, fInt, <)
994 BINARY_OP(kCompareLTU, fUInt, fUInt, <)
995 BINARY_OP(kCompareLTEQF, fFloat, fInt, <=)
996 BINARY_OP(kCompareLTEQS, fInt, fInt, <=)
997 BINARY_OP(kCompareLTEQU, fUInt, fUInt, <=)
998 VECTOR_BINARY_OP(kSubtractF, fFloat, fFloat, -)
999 VECTOR_BINARY_OP(kSubtractI, fInt, fInt, -)
1000 VECTOR_BINARY_OP(kDivideF, fFloat, fFloat, /)
1001 MASKED_VECTOR_BINARY_OP(kDivideS, fInt, fInt, /)
1002 MASKED_VECTOR_BINARY_OP(kDivideU, fUInt, fUInt, /)
1003 MASKED_VECTOR_BINARY_OP(kRemainderS, fInt, fInt, %)
1004 MASKED_VECTOR_BINARY_OP(kRemainderU, fUInt, fUInt, %)
1005 VECTOR_BINARY_OP(kMultiplyF, fFloat, fFloat, *)
1006 VECTOR_BINARY_OP(kMultiplyI, fInt, fInt, *)
1007 BINARY_OP(kOr, fInt, fInt, |)
1008 BINARY_OP(kXor, fInt, fInt, ^)
1009 LABEL(kAbort)
1010 SkASSERT(false);
1011 return false;
1012 LABEL(kBoundsCheck) {
1013 ByteCode::Register r = read<ByteCode::Register>(&ip);
1014 int length = read<int>(&ip);
1015 if (skvx::any(mask() & ((fRegisters[r.fIndex].fInt < 0) |
1016 (fRegisters[r.fIndex].fInt >= length)))) {
1017 return false;
1018 }
1019 NEXT();
1020 }
1021 LABEL(kBranch) {
1022 ByteCode::Pointer target = read<ByteCode::Pointer>(&ip);
1023 ip = code + target.fAddress;
1024 NEXT();
1025 }
1026 LABEL(kBranchIfAllFalse) {
1027 ByteCode::Pointer target = read<ByteCode::Pointer>(&ip);
1028 if (!skvx::any(mask())) {
1029 ip = code + target.fAddress;
1030 }
1031 NEXT();
1032 }
1033 LABEL(kBreak)
1034 *context.fLoopStack &= ~mask();
1035 NEXT();
1036 LABEL(kCall) {
1037 ByteCode::Register returnValue = read<ByteCode::Register>(&ip);
1038 uint8_t idx = read<uint8_t>(&ip);
1039 ByteCode::Register args = read<ByteCode::Register>(&ip);
1040 const ByteCodeFunction* target = fCode->fFunctions[idx].get();
1041 int stackSlotCount = target->fStackSlotCount + target->fParameterSlotCount;
1042 context.fCallStack.push(StackFrame(f, ip, stackSlotCount,
1043 &fRegisters[args.fIndex],
1044 &fRegisters[returnValue.fIndex]));
1045 f = target;
1046 code = f->fCode.data();
1047 ip = code;
1048 context.fStack -= stackSlotCount;
1049 memcpy(context.fStack, &fRegisters[args.fIndex],
1050 f->fParameterSlotCount * sizeof(Vector));
1051 NEXT();
1052 }
1053 LABEL(kCallExternal) {
1054 ByteCode::Register target = read<ByteCode::Register>(&ip);
1055 uint8_t index = read<uint8_t>(&ip);
1056 uint8_t targetSize = read<uint8_t>(&ip);
1057 ByteCode::Register arguments = read<ByteCode::Register>(&ip);
1058 uint8_t argumentSize = read<uint8_t>(&ip);
1059 ExternalValue* v = fCode->fExternalValues[index];
1060 float tmpReturn[64];
1061 SkASSERT(targetSize < 64);
1062 float tmpArgs[64];
1063 SkASSERT(argumentSize < 64);
1064 VectorI m = mask();
1065 for (int i = 0; i < width; ++i) {
1066 if (m[i]) {
1067 for (int j = 0; j < argumentSize; j++) {
1068 tmpArgs[j] = fRegisters[arguments.fIndex + j].fFloat[i];
1069 }
1070 v->call(baseIndex + i, tmpArgs, tmpReturn);
1071 for (int j = 0; j < targetSize; j++) {
1072 fRegisters[target.fIndex + j].fFloat[i] = tmpReturn[j];
1073 }
1074 }
1075 }
1076 NEXT();
1077 }
1078 LABEL(kContinue) {
1079 VectorI m = mask();
1080 *context.fContinueStack |= m;
1081 *context.fLoopStack &= ~m;
1082 NEXT();
1083 }
1084 LABEL(kCopy) {
1085 ByteCode::Register target = read<ByteCode::Register>(&ip);
1086 ByteCode::Register src = read<ByteCode::Register>(&ip);
1087 fRegisters[target.fIndex].fInt = fRegisters[src.fIndex].fInt;
1088 NEXT();
1089 }
1090 VECTOR_UNARY_FN(kCos, cosf)
1091 LABEL(kFloatToSigned) {
1092 ByteCode::Register target = read<ByteCode::Register>(&ip);
1093 ByteCode::Register src = read<ByteCode::Register>(&ip);
1094 fRegisters[target.fIndex] = Vector(skvx::cast<int32_t>(
1095 fRegisters[src.fIndex].fFloat));
1096 NEXT();
1097 }
1098 LABEL(kFloatToUnsigned) {
1099 ByteCode::Register target = read<ByteCode::Register>(&ip);
1100 ByteCode::Register src = read<ByteCode::Register>(&ip);
1101 fRegisters[target.fIndex] = Vector(skvx::cast<uint32_t>(
1102 fRegisters[src.fIndex].fFloat));
1103 NEXT();
1104 }
1105 LABEL(kImmediate) {
1106 ByteCode::Register target = read<ByteCode::Register>(&ip);
1107 ByteCode::Immediate src = read<ByteCode::Immediate>(&ip);
1108 fRegisters[target.fIndex].fInt = src.fInt;
1109 NEXT();
1110 }
1111 LABEL(kInverse2x2) {
1112 ByteCode::Register target = read<ByteCode::Register>(&ip);
1113 ByteCode::Register src = read<ByteCode::Register>(&ip);
1114 Inverse2x2(&fRegisters[src.fIndex], &fRegisters[target.fIndex]);
1115 NEXT();
1116 }
1117 LABEL(kInverse3x3) {
1118 ByteCode::Register target = read<ByteCode::Register>(&ip);
1119 ByteCode::Register src = read<ByteCode::Register>(&ip);
1120 Inverse3x3(&fRegisters[src.fIndex], &fRegisters[target.fIndex]);
1121 NEXT();
1122 }
1123 LABEL(kInverse4x4) {
1124 ByteCode::Register target = read<ByteCode::Register>(&ip);
1125 ByteCode::Register src = read<ByteCode::Register>(&ip);
1126 Inverse4x4(&fRegisters[src.fIndex], &fRegisters[target.fIndex]);
1127 NEXT();
1128 }
1129 LABEL(kLoad) {
1130 ByteCode::Register target = read<ByteCode::Register>(&ip);
1131 ByteCode::Register src = read<ByteCode::Register>(&ip);
1132 VectorI m = mask();
1133 for (int i = 0; i < width; ++i) {
1134 if (m[i]) {
1135 fRegisters[target.fIndex].fInt[i] =
1136 fMemory[fRegisters[src.fIndex].fInt[i]].fInt[i];
1137 }
1138 }
1139 NEXT();
1140 }
1141 LABEL(kLoadN) {
1142 uint8_t count = read<uint8_t>(&ip);
1143 ByteCode::Register target = read<ByteCode::Register>(&ip);
1144 ByteCode::Register src = read<ByteCode::Register>(&ip);
1145 VectorI m = mask();
1146 for (int i = 0; i < width; ++i) {
1147 if (m[i]) {
1148 for (int j = 0; j < count; ++j) {
1149 fRegisters[target.fIndex + j].fInt[i] =
1150 fMemory[fRegisters[src.fIndex].fInt[i] + j].fInt[i];
1151 }
1152 }
1153 }
1154 NEXT();
1155 }
1156 LABEL(kLoadDirect) {
1157 ByteCode::Register target = read<ByteCode::Register>(&ip);
1158 ByteCode::Pointer src = read<ByteCode::Pointer>(&ip);
1159 fRegisters[target.fIndex].fInt = fMemory[src.fAddress].fInt;
1160 NEXT();
1161 }
1162 LABEL(kLoadDirectN) {
1163 uint8_t count = read<uint8_t>(&ip);
1164 ByteCode::Register target = read<ByteCode::Register>(&ip);
1165 ByteCode::Pointer src = read<ByteCode::Pointer>(&ip);
1166 for (int i = 0; i < count; ++i) {
1167 fRegisters[target.fIndex + i].fInt = fMemory[src.fAddress + i].fInt;
1168 }
1169 NEXT();
1170 }
1171 LABEL(kLoadParameter) {
1172 ByteCode::Register target = read<ByteCode::Register>(&ip);
1173 ByteCode::Register src = read<ByteCode::Register>(&ip);
1174 Vector* base = parameterBase();
1175 VectorI m = mask();
1176 for (int i = 0; i < width; ++i) {
1177 if (m[i]) {
1178 fRegisters[target.fIndex].fInt[i] =
1179 base[fRegisters[src.fIndex].fInt[i]].fInt[i];
1180 }
1181 }
1182 NEXT();
1183 }
1184 LABEL(kLoadParameterN) {
1185 uint8_t count = read<uint8_t>(&ip);
1186 ByteCode::Register target = read<ByteCode::Register>(&ip);
1187 ByteCode::Register src = read<ByteCode::Register>(&ip);
1188 Vector* base = parameterBase();
1189 VectorI m = mask();
1190 for (int i = 0; i < width; ++i) {
1191 if (m[i]) {
1192 for (int j = 0; j < count; ++j) {
1193 fRegisters[target.fIndex + j].fInt[i] =
1194 base[fRegisters[src.fIndex].fInt[i] + j].fInt[i];
1195 }
1196 }
1197 }
1198 NEXT();
1199 }
1200 LABEL(kLoadParameterDirect) {
1201 ByteCode::Register target = read<ByteCode::Register>(&ip);
1202 ByteCode::Pointer src = read<ByteCode::Pointer>(&ip);
1203 Vector* base = parameterBase();
1204 fRegisters[target.fIndex].fInt = base[src.fAddress].fInt;
1205 NEXT();
1206 }
1207 LABEL(kLoadParameterDirectN) {
1208 uint8_t count = read<uint8_t>(&ip);
1209 ByteCode::Register target = read<ByteCode::Register>(&ip);
1210 ByteCode::Pointer src = read<ByteCode::Pointer>(&ip);
1211 Vector* base = parameterBase();
1212 for (int i = 0; i < count; ++i) {
1213 fRegisters[target.fIndex + i].fInt = base[src.fAddress + i].fInt;
1214 }
1215 NEXT();
1216 }
1217 LABEL(kLoadStack) {
1218 ByteCode::Register target = read<ByteCode::Register>(&ip);
1219 ByteCode::Register src = read<ByteCode::Register>(&ip);
1220 VectorI m = mask();
1221 for (int i = 0; i < width; ++i) {
1222 if (m[i]) {
1223 fRegisters[target.fIndex].fInt[i] =
1224 context.fStack[fRegisters[src.fIndex].fInt[i]].fInt[i];
1225 }
1226 }
1227 NEXT();
1228 }
1229 LABEL(kLoadStackN) {
1230 uint8_t count = read<uint8_t>(&ip);
1231 ByteCode::Register target = read<ByteCode::Register>(&ip);
1232 ByteCode::Register src = read<ByteCode::Register>(&ip);
1233 VectorI m = mask();
1234 for (int i = 0; i < width; ++i) {
1235 if (m[i]) {
1236 for (int j = 0; j < count; ++j) {
1237 fRegisters[target.fIndex + j].fInt[i] =
1238 context.fStack[fRegisters[src.fIndex].fInt[i] + j].fInt[i];
1239 }
1240 }
1241 }
1242 NEXT();
1243 }
1244 LABEL(kLoadStackDirect) {
1245 ByteCode::Register target = read<ByteCode::Register>(&ip);
1246 ByteCode::Pointer src = read<ByteCode::Pointer>(&ip);
1247 CHECK_STACK_BOUNDS(src.fAddress);
1248 fRegisters[target.fIndex].fInt = context.fStack[src.fAddress].fInt;
1249 NEXT();
1250 }
1251 LABEL(kLoadStackDirectN) {
1252 uint8_t count = read<uint8_t>(&ip);
1253 ByteCode::Register target = read<ByteCode::Register>(&ip);
1254 ByteCode::Pointer src = read<ByteCode::Pointer>(&ip);
1255 CHECK_STACK_BOUNDS(src.fAddress);
1256 for (int i = 0; i < count; ++i) {
1257 fRegisters[target.fIndex + i].fInt = context.fStack[src.fAddress + i].fInt;
1258 }
1259 NEXT();
1260 }
1261 LABEL(kLoopBegin) {
1262 context.fLoopStack[1] = context.fLoopStack[0];
1263 ++context.fLoopStack;
1264 context.fContinueStack[1] = 0;
1265 ++context.fContinueStack;
1266 NEXT();
1267 }
1268 LABEL(kLoopEnd) {
1269 --context.fLoopStack;
1270 --context.fContinueStack;
1271 NEXT();
1272 }
1273 LABEL(kLoopMask) {
1274 ByteCode::Register value = read<ByteCode::Register>(&ip);
1275 *context.fLoopStack &= fRegisters[value.fIndex].fInt;
1276 NEXT();
1277 }
1278 LABEL(kLoopNext) {
1279 *context.fLoopStack |= *context.fContinueStack;
1280 *context.fContinueStack = 0;
1281 NEXT();
1282 }
1283 LABEL(kMaskNegate) {
1284 *context.fMaskStack = context.fMaskStack[-1] & ~context.fCondStack[0];
1285 NEXT();
1286 }
1287 LABEL(kMaskPop) {
1288 --context.fMaskStack;
1289 --context.fCondStack;
1290 NEXT();
1291 }
1292 LABEL(kMaskPush) {
1293 ByteCode::Register value = read<ByteCode::Register>(&ip);
1294 context.fCondStack[1] = fRegisters[value.fIndex].fInt;
1295 context.fMaskStack[1] = context.fMaskStack[0] & context.fCondStack[1];
1296 ++context.fCondStack;
1297 ++context.fMaskStack;
1298 NEXT();
1299 }
1300 LABEL(kMatrixMultiply) {
1301 ByteCode::Register target = read<ByteCode::Register>(&ip);
1302 ByteCode::Register left = read<ByteCode::Register>(&ip);
1303 ByteCode::Register right = read<ByteCode::Register>(&ip);
1304 uint8_t lCols = read<uint8_t>(&ip);
1305 uint8_t lRows = read<uint8_t>(&ip);
1306 uint8_t rCols = read<uint8_t>(&ip);
1307 uint8_t rRows = lCols;
1308 memset(&fRegisters[target.fIndex], 0, sizeof(Vector) * rCols * lRows);
1309 for (int c = 0; c < rCols; ++c) {
1310 for (int r = 0; r < lRows; ++r) {
1311 for (int j = 0; j < lCols; ++j) {
1312 fRegisters[target.fIndex + c * lRows + r].fFloat +=
1313 fRegisters[left.fIndex + j * lRows + r].fFloat *
1314 fRegisters[right.fIndex + c * rRows + j].fFloat;
1315 }
1316 }
1317 }
1318 NEXT();
1319 }
1320 LABEL(kMatrixToMatrix) {
1321 ByteCode::Register target = read<ByteCode::Register>(&ip);
1322 ByteCode::Register src = read<ByteCode::Register>(&ip);
1323 uint8_t srcColumns = read<uint8_t>(&ip);
1324 uint8_t srcRows = read<uint8_t>(&ip);
1325 uint8_t dstColumns = read<uint8_t>(&ip);
1326 uint8_t dstRows = read<uint8_t>(&ip);
1327 int offset = 0;
1328 for (int i = 0; i < dstColumns; ++i) {
1329 for (int j = 0; j < dstRows; ++j) {
1330 if (i < srcColumns && j < srcRows) {
1331 fRegisters[target.fIndex + offset] =
1332 fRegisters[src.fIndex + (srcRows * i) + j];
1333 } else {
1334 if (i == j) {
1335 fRegisters[target.fIndex + offset].fFloat = 1;
1336 } else {
1337 fRegisters[target.fIndex + offset].fFloat = 0;
1338 }
1339 }
1340 ++offset;
1341 }
1342 }
1343 NEXT();
1344 }
1345 LABEL(kNegateF) {
1346 ByteCode::Register target = read<ByteCode::Register>(&ip);
1347 ByteCode::Register src = read<ByteCode::Register>(&ip);
1348 fRegisters[target.fIndex].fFloat = -fRegisters[src.fIndex].fFloat;
1349 NEXT();
1350 }
1351 LABEL(kNegateS) {
1352 ByteCode::Register target = read<ByteCode::Register>(&ip);
1353 ByteCode::Register src = read<ByteCode::Register>(&ip);
1354 fRegisters[target.fIndex].fInt = -fRegisters[src.fIndex].fInt;
1355 NEXT();
1356 }
1357 LABEL(kNop)
1358 NEXT();
1359 LABEL(kNot) {
1360 ByteCode::Register target = read<ByteCode::Register>(&ip);
1361 ByteCode::Register src = read<ByteCode::Register>(&ip);
1362 fRegisters[target.fIndex].fInt = ~fRegisters[src.fIndex].fInt;
1363 NEXT();
1364 }
1365 LABEL(kPrint) {
1366 ByteCode::Register src = read<ByteCode::Register>(&ip);
1367 if (skvx::any(mask())) {
1368 printf("[");
1369 const char* separator = "";
1370 for (int i = 0; i < width; ++i) {
1371 if (mask()[i]) {
1372 printf("%s%f", separator, fRegisters[src.fIndex].fFloat[i]);
1373 }
1374 else {
1375 printf("%s-", separator);
1376 }
1377 separator = ", ";
1378 }
1379 printf("]\n");
1380 }
1381 NEXT();
1382 }
1383 LABEL(kReadExternal) {
1384 ByteCode::Register target = read<ByteCode::Register>(&ip);
1385 uint8_t count = read<uint8_t>(&ip);
1386 uint8_t index = read<uint8_t>(&ip);
1387 SkASSERT(count <= 4);
1388 SkASSERT(fCode->fExternalValues.size() > index);
1389 float tmp[4];
1390 VectorI m = mask();
1391 for (int i = 0; i < width; ++i) {
1392 if (m[i]) {
1393 fCode->fExternalValues[index]->read(baseIndex + i, tmp);
1394 for (int j = 0; j < count; ++j) {
1395 fRegisters[target.fIndex + j].fFloat[i] = tmp[j];
1396 }
1397 }
1398 }
1399 NEXT();
1400 }
1401 LABEL(kRemainderF) {
1402 ByteCode::Register target = read<ByteCode::Register>(&ip);
1403 ByteCode::Register src1 = read<ByteCode::Register>(&ip);
1404 ByteCode::Register src2 = read<ByteCode::Register>(&ip);
1405 fRegisters[target.fIndex] = VecMod(fRegisters[src1.fIndex],
1406 fRegisters[src2.fIndex]);
1407 NEXT();
1408 }
1409 LABEL(kRemainderFN) {
1410 uint8_t count = read<uint8_t>(&ip);
1411 ByteCode::Register target = read<ByteCode::Register>(&ip);
1412 ByteCode::Register src1 = read<ByteCode::Register>(&ip);
1413 ByteCode::Register src2 = read<ByteCode::Register>(&ip);
1414 for (int i = 0; i < count; ++i) {
1415 fRegisters[target.fIndex + i] = VecMod(fRegisters[src1.fIndex + i],
1416 fRegisters[src2.fIndex + i]);
1417 }
1418 NEXT();
1419 }
1420 LABEL(kReturn) {
1421 if (context.fCallStack.empty()) {
1422 return true;
1423 }
1424 StackFrame frame = context.fCallStack.top();
1425 f = frame.fFunction;
1426 code = f->fCode.data();
1427 ip = frame.fIP;
1428 context.fStack += frame.fStackSlotCount;
1429 context.fCallStack.pop();
1430 NEXT();
1431 }
1432 LABEL(kReturnValue) {
1433 ByteCode::Register returnValue = read<ByteCode::Register>(&ip);
1434 if (context.fCallStack.empty()) {
1435 if (outResult) {
1436 *outResult = &fRegisters[returnValue.fIndex];
1437 }
1438 return true;
1439 }
1440 StackFrame frame = context.fCallStack.top();
1441 ip = frame.fIP;
1442 context.fStack += frame.fStackSlotCount;
1443 memcpy(frame.fReturnValue, &fRegisters[returnValue.fIndex],
1444 sizeof(Vector) * f->fReturnSlotCount);
1445 f = frame.fFunction;
1446 code = f->fCode.data();
1447 context.fCallStack.pop();
1448 NEXT();
1449 }
1450 LABEL(kScalarToMatrix) {
1451 ByteCode::Register target = read<ByteCode::Register>(&ip);
1452 ByteCode::Register src = read<ByteCode::Register>(&ip);
1453 uint8_t columns = read<uint8_t>(&ip);
1454 uint8_t rows = read<uint8_t>(&ip);
1455 int offset = 0;
1456 for (int i = 0; i < columns; ++i) {
1457 for (int j = 0; j < rows; ++j) {
1458 if (i == j) {
1459 fRegisters[target.fIndex + offset] = fRegisters[src.fIndex];
1460 } else {
1461 fRegisters[target.fIndex + offset].fFloat = 0;
1462 }
1463 ++offset;
1464 }
1465 }
1466 NEXT();
1467 }
1468 LABEL(kSelect) {
1469 ByteCode::Register target = read<ByteCode::Register>(&ip);
1470 ByteCode::Register test = read<ByteCode::Register>(&ip);
1471 ByteCode::Register src1 = read<ByteCode::Register>(&ip);
1472 ByteCode::Register src2 = read<ByteCode::Register>(&ip);
1473 fRegisters[target.fIndex] = skvx::if_then_else(fRegisters[test.fIndex].fInt,
1474 fRegisters[src1.fIndex].fFloat,
1475 fRegisters[src2.fIndex].fFloat);
1476 NEXT();
1477 }
1478 LABEL(kShiftLeft) {
1479 ByteCode::Register target = read<ByteCode::Register>(&ip);
1480 ByteCode::Register src = read<ByteCode::Register>(&ip);
1481 uint8_t count = read<uint8_t>(&ip);
1482 fRegisters[target.fIndex].fInt = fRegisters[src.fIndex].fInt << count;
1483 NEXT();
1484 }
1485 LABEL(kShiftRightS) {
1486 ByteCode::Register target = read<ByteCode::Register>(&ip);
1487 ByteCode::Register src = read<ByteCode::Register>(&ip);
1488 int8_t count = read<int8_t>(&ip);
1489 fRegisters[target.fIndex].fInt = fRegisters[src.fIndex].fInt >> count;
1490 NEXT();
1491 }
1492 LABEL(kShiftRightU) {
1493 ByteCode::Register target = read<ByteCode::Register>(&ip);
1494 ByteCode::Register src = read<ByteCode::Register>(&ip);
1495 uint8_t count = read<uint8_t>(&ip);
1496 fRegisters[target.fIndex].fUInt = fRegisters[src.fIndex].fUInt >> count;
1497 NEXT();
1498 }
1499 LABEL(kSignedToFloat) {
1500 ByteCode::Register target = read<ByteCode::Register>(&ip);
1501 ByteCode::Register src = read<ByteCode::Register>(&ip);
1502 fRegisters[target.fIndex] = Vector(skvx::cast<float>(
1503 fRegisters[src.fIndex].fInt));
1504 NEXT();
1505 }
1506 VECTOR_UNARY_FN(kSin, sinf)
1507 LABEL(kSplat) {
1508 uint8_t count = read<uint8_t>(&ip);
1509 ByteCode::Register target = read<ByteCode::Register>(&ip);
1510 ByteCode::Register src = read<ByteCode::Register>(&ip);
1511 for (int i = 0; i < count; ++i) {
1512 fRegisters[target.fIndex + i] = fRegisters[src.fIndex];
1513 }
1514 NEXT();
1515 }
1516 LABEL(kSqrt) {
1517 ByteCode::Register target = read<ByteCode::Register>(&ip);
1518 ByteCode::Register src = read<ByteCode::Register>(&ip);
1519 fRegisters[target.fIndex].fFloat = skvx::sqrt(fRegisters[src.fIndex].fFloat);
1520 NEXT();
1521 }
1522 LABEL(kStore) {
1523 ByteCode::Register target = read<ByteCode::Register>(&ip);
1524 ByteCode::Register src = read<ByteCode::Register>(&ip);
1525 VectorI m = mask();
1526 for (int i = 0; i < width; ++i) {
1527 if (m[i]) {
1528 fMemory[fRegisters[target.fIndex].fInt[i]].fInt[i] =
1529 fRegisters[src.fIndex].fInt[i];
1530 }
1531 }
1532 NEXT();
1533 }
1534 LABEL(kStoreN) {
1535 uint8_t count = read<uint8_t>(&ip);
1536 ByteCode::Register target = read<ByteCode::Register>(&ip);
1537 ByteCode::Register src = read<ByteCode::Register>(&ip);
1538 VectorI m = mask();
1539 for (int i = 0; i < width; ++i) {
1540 if (m[i]) {
1541 for (int j = 0; j < count; ++j) {
1542 fMemory[fRegisters[target.fIndex].fInt[i] + j].fInt[i] =
1543 fRegisters[src.fIndex + j].fInt[i];
1544 }
1545 }
1546 }
1547 NEXT();
1548 }
1549 LABEL(kStoreDirect) {
1550 ByteCode::Pointer target = read<ByteCode::Pointer>(&ip);
1551 ByteCode::Register src = read<ByteCode::Register>(&ip);
1552 fMemory[target.fAddress] = skvx::if_then_else(mask(),
1553 fRegisters[src.fIndex].fFloat,
1554 fMemory[target.fAddress].fFloat);
1555 NEXT();
1556 }
1557 LABEL(kStoreDirectN) {
1558 uint8_t count = read<uint8_t>(&ip);
1559 ByteCode::Pointer target = read<ByteCode::Pointer>(&ip);
1560 ByteCode::Register src = read<ByteCode::Register>(&ip);
1561 for (int i = 0; i < count; ++i) {
1562 fMemory[target.fAddress + i] = skvx::if_then_else(
1563 mask(),
1564 fRegisters[src.fIndex + i].fFloat,
1565 fMemory[target.fAddress + i].fFloat);
1566 }
1567 NEXT();
1568 }
1569 LABEL(kStoreParameter) {
1570 ByteCode::Register target = read<ByteCode::Register>(&ip);
1571 ByteCode::Register src = read<ByteCode::Register>(&ip);
1572 Vector* base = parameterBase();
1573 VectorI m = mask();
1574 for (int i = 0; i < width; ++i) {
1575 if (m[i]) {
1576 base[fRegisters[target.fIndex].fInt[i]].fInt[i] =
1577 fRegisters[src.fIndex].fInt[i];
1578 }
1579 }
1580 NEXT();
1581 }
1582 LABEL(kStoreParameterN) {
1583 uint8_t count = read<uint8_t>(&ip);
1584 ByteCode::Register target = read<ByteCode::Register>(&ip);
1585 ByteCode::Register src = read<ByteCode::Register>(&ip);
1586 Vector* base = parameterBase();
1587 VectorI m = mask();
1588 for (int i = 0; i < width; ++i) {
1589 if (m[i]) {
1590 for (int j = 0; j < count; ++j) {
1591 base[fRegisters[target.fIndex].fInt[i] + j].fInt[i] =
1592 fRegisters[src.fIndex + j].fInt[i];
1593 }
1594 }
1595 }
1596 NEXT();
1597 }
1598 LABEL(kStoreParameterDirect) {
1599 ByteCode::Pointer target = read<ByteCode::Pointer>(&ip);
1600 ByteCode::Register src = read<ByteCode::Register>(&ip);
1601 Vector* base = parameterBase();
1602 base[target.fAddress].fFloat = skvx::if_then_else(mask(),
1603 fRegisters[src.fIndex].fFloat,
1604 base[target.fAddress].fFloat);
1605 NEXT();
1606 }
1607 LABEL(kStoreParameterDirectN) {
1608 uint8_t count = read<uint8_t>(&ip);
1609 ByteCode::Pointer target = read<ByteCode::Pointer>(&ip);
1610 ByteCode::Register src = read<ByteCode::Register>(&ip);
1611 Vector* base = parameterBase();
1612 for (int i = 0; i < count; ++i) {
1613 base[target.fAddress + i].fFloat = skvx::if_then_else(
1614 mask(),
1615 fRegisters[src.fIndex + i].fFloat,
1616 base[target.fAddress + i].fFloat);
1617 }
1618 NEXT();
1619 }
1620 LABEL(kStoreStack) {
1621 ByteCode::Register target = read<ByteCode::Register>(&ip);
1622 ByteCode::Register src = read<ByteCode::Register>(&ip);
1623 VectorI m = mask();
1624 for (int i = 0; i < width; ++i) {
1625 if (m[i]) {
1626 context.fStack[fRegisters[target.fIndex].fInt[i]].fInt[i] =
1627 fRegisters[src.fIndex].fInt[i];
1628 }
1629 }
1630 NEXT();
1631 }
1632 LABEL(kStoreStackN) {
1633 uint8_t count = read<uint8_t>(&ip);
1634 ByteCode::Register target = read<ByteCode::Register>(&ip);
1635 ByteCode::Register src = read<ByteCode::Register>(&ip);
1636 VectorI m = mask();
1637 for (int i = 0; i < width; ++i) {
1638 if (m[i]) {
1639 for (int j = 0; j < count; ++j) {
1640 context.fStack[fRegisters[target.fIndex].fInt[i] + j].fInt[i] =
1641 fRegisters[src.fIndex + j].fInt[i];
1642 }
1643 }
1644 }
1645 NEXT();
1646 }
1647 LABEL(kStoreStackDirect) {
1648 ByteCode::Pointer target = read<ByteCode::Pointer>(&ip);
1649 CHECK_STACK_BOUNDS(target.fAddress);
1650 ByteCode::Register src = read<ByteCode::Register>(&ip);
1651 context.fStack[target.fAddress] = skvx::if_then_else(
1652 mask(),
1653 fRegisters[src.fIndex].fFloat,
1654 context.fStack[target.fAddress].fFloat);
1655 NEXT();
1656 }
1657 LABEL(kStoreStackDirectN) {
1658 uint8_t count = read<uint8_t>(&ip);
1659 ByteCode::Pointer target = read<ByteCode::Pointer>(&ip);
1660 CHECK_STACK_BOUNDS(target.fAddress);
1661 ByteCode::Register src = read<ByteCode::Register>(&ip);
1662 for (int i = 0; i < count; ++i) {
1663 context.fStack[target.fAddress + i] = skvx::if_then_else(
1664 mask(),
1665 fRegisters[src.fIndex + i].fFloat,
1666 context.fStack[target.fAddress + i].fFloat);
1667 }
1668 NEXT();
1669 }
1670 VECTOR_UNARY_FN(kTan, tanf)
1671 LABEL(kUnsignedToFloat) {
1672 ByteCode::Register target = read<ByteCode::Register>(&ip);
1673 ByteCode::Register src = read<ByteCode::Register>(&ip);
1674 fRegisters[target.fIndex] = Vector(skvx::cast<float>(
1675 fRegisters[src.fIndex].fUInt));
1676 NEXT();
1677 }
1678 LABEL(kWriteExternal) {
1679 uint8_t index = read<uint8_t>(&ip);
1680 uint8_t count = read<uint8_t>(&ip);
1681 SkASSERT(count <= 4);
1682 SkASSERT(fCode->fExternalValues.size() > index);
1683 ByteCode::Register src = read<ByteCode::Register>(&ip);
1684 float tmp[4];
1685 VectorI m = mask();
1686 for (int i = 0; i < width; ++i) {
1687 if (m[i]) {
1688 for (int j = 0; j < count; ++j) {
1689 tmp[j] = fRegisters[src.fIndex + j].fFloat[i];
1690 }
1691 fCode->fExternalValues[index]->write(baseIndex + i, tmp);
1692 }
1693 }
1694 NEXT();
1695 }
1696 #ifndef SKSL_THREADED_CODE
1697 }
1698 }
1699 #endif
1700 }
1701
1702 const std::unique_ptr<ByteCode> fCode;
1703
1704 void* fBackingStore;
1705
1706 Vector* fRegisters;
1707
1708 Vector* fMemory;
1709
1710 friend class ByteCode;
1711
1712 friend class ByteCodeGenerator;
1713 };
1714
1715 #undef BINARY_OP
1716 #undef CHECK_STACK_BOUNDS
1717
1718 } // namespace
1719
1720 #endif
1721