1 // AsmJit - Machine code generation for C++
2 //
3 //  * Official AsmJit Home Page: https://asmjit.com
4 //  * Official Github Repository: https://github.com/asmjit/asmjit
5 //
6 // Copyright (c) 2008-2020 The AsmJit Authors
7 //
8 // This software is provided 'as-is', without any express or implied
9 // warranty. In no event will the authors be held liable for any damages
10 // arising from the use of this software.
11 //
12 // Permission is granted to anyone to use this software for any purpose,
13 // including commercial applications, and to alter it and redistribute it
14 // freely, subject to the following restrictions:
15 //
16 // 1. The origin of this software must not be misrepresented; you must not
17 //    claim that you wrote the original software. If you use this software
18 //    in a product, an acknowledgment in the product documentation would be
19 //    appreciated but is not required.
20 // 2. Altered source versions must be plainly marked as such, and must not be
21 //    misrepresented as being the original software.
22 // 3. This notice may not be removed or altered from any source distribution.
23 
24 #include "../core/api-build_p.h"
25 #ifndef ASMJIT_NO_COMPILER
26 
27 #include "../core/ralocal_p.h"
28 #include "../core/support.h"
29 
30 ASMJIT_BEGIN_NAMESPACE
31 
32 // ============================================================================
33 // [asmjit::RALocalAllocator - Utilities]
34 // ============================================================================
35 
RALocal_findTiedRegByWorkId(RATiedReg * tiedRegs,size_t count,uint32_t workId)36 static ASMJIT_INLINE RATiedReg* RALocal_findTiedRegByWorkId(RATiedReg* tiedRegs, size_t count, uint32_t workId) noexcept {
37   for (size_t i = 0; i < count; i++)
38     if (tiedRegs[i].workId() == workId)
39       return &tiedRegs[i];
40   return nullptr;
41 }
42 
43 // ============================================================================
44 // [asmjit::RALocalAllocator - Init / Reset]
45 // ============================================================================
46 
init()47 Error RALocalAllocator::init() noexcept {
48   PhysToWorkMap* physToWorkMap;
49   WorkToPhysMap* workToPhysMap;
50 
51   physToWorkMap = _pass->newPhysToWorkMap();
52   workToPhysMap = _pass->newWorkToPhysMap();
53   if (!physToWorkMap || !workToPhysMap)
54     return DebugUtils::errored(kErrorOutOfMemory);
55 
56   _curAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
57   _curAssignment.initMaps(physToWorkMap, workToPhysMap);
58 
59   physToWorkMap = _pass->newPhysToWorkMap();
60   workToPhysMap = _pass->newWorkToPhysMap();
61   if (!physToWorkMap || !workToPhysMap)
62     return DebugUtils::errored(kErrorOutOfMemory);
63 
64   _tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
65   _tmpAssignment.initMaps(physToWorkMap, workToPhysMap);
66 
67   return kErrorOk;
68 }
69 
70 // ============================================================================
71 // [asmjit::RALocalAllocator - Assignment]
72 // ============================================================================
73 
makeInitialAssignment()74 Error RALocalAllocator::makeInitialAssignment() noexcept {
75   FuncNode* func = _pass->func();
76   RABlock* entry = _pass->entryBlock();
77 
78   ZoneBitVector& liveIn = entry->liveIn();
79   uint32_t argCount = func->argCount();
80   uint32_t numIter = 1;
81 
82   for (uint32_t iter = 0; iter < numIter; iter++) {
83     for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
84       for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
85         // Unassigned argument.
86         VirtReg* virtReg = func->argPack(argIndex)[valueIndex];
87         if (!virtReg)
88           continue;
89 
90         // Unreferenced argument.
91         RAWorkReg* workReg = virtReg->workReg();
92         if (!workReg)
93           continue;
94 
95         // Overwritten argument.
96         uint32_t workId = workReg->workId();
97         if (!liveIn.bitAt(workId))
98           continue;
99 
100         uint32_t group = workReg->group();
101         if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone)
102           continue;
103 
104         uint32_t allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group);
105         if (iter == 0) {
106           // First iteration: Try to allocate to home RegId.
107           if (workReg->hasHomeRegId()) {
108             uint32_t physId = workReg->homeRegId();
109             if (Support::bitTest(allocableRegs, physId)) {
110               _curAssignment.assign(group, workId, physId, true);
111               _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->info().type(), physId, workReg->typeId());
112               continue;
113             }
114           }
115 
116           numIter = 2;
117         }
118         else {
119           // Second iteration: Pick any other register if the is an unassigned one or assign to stack.
120           if (allocableRegs) {
121             uint32_t physId = Support::ctz(allocableRegs);
122             _curAssignment.assign(group, workId, physId, true);
123             _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->info().type(), physId, workReg->typeId());
124           }
125           else {
126             // This register will definitely need stack, create the slot now and assign also `argIndex`
127             // to it. We will patch `_argsAssignment` later after RAStackAllocator finishes.
128             RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg);
129             if (ASMJIT_UNLIKELY(!slot))
130               return DebugUtils::errored(kErrorOutOfMemory);
131 
132             // This means STACK_ARG may be moved to STACK.
133             workReg->addFlags(RAWorkReg::kFlagStackArgToStack);
134             _pass->_numStackArgsToStackSlots++;
135           }
136         }
137       }
138     }
139   }
140 
141   return kErrorOk;
142 }
143 
replaceAssignment(const PhysToWorkMap * physToWorkMap,const WorkToPhysMap * workToPhysMap)144 Error RALocalAllocator::replaceAssignment(
145   const PhysToWorkMap* physToWorkMap,
146   const WorkToPhysMap* workToPhysMap) noexcept {
147 
148   _curAssignment.copyFrom(physToWorkMap, workToPhysMap);
149   return kErrorOk;
150 }
151 
switchToAssignment(PhysToWorkMap * dstPhysToWorkMap,WorkToPhysMap * dstWorkToPhysMap,const ZoneBitVector & liveIn,bool dstReadOnly,bool tryMode)152 Error RALocalAllocator::switchToAssignment(
153   PhysToWorkMap* dstPhysToWorkMap,
154   WorkToPhysMap* dstWorkToPhysMap,
155   const ZoneBitVector& liveIn,
156   bool dstReadOnly,
157   bool tryMode) noexcept {
158 
159   RAAssignment dst;
160   RAAssignment& cur = _curAssignment;
161 
162   dst.initLayout(_pass->_physRegCount, _pass->workRegs());
163   dst.initMaps(dstPhysToWorkMap, dstWorkToPhysMap);
164 
165   if (tryMode)
166     return kErrorOk;
167 
168   for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
169     // ------------------------------------------------------------------------
170     // STEP 1:
171     //   - KILL all registers that are not live at `dst`,
172     //   - SPILL all registers that are not assigned at `dst`.
173     // ------------------------------------------------------------------------
174 
175     if (!tryMode) {
176       Support::BitWordIterator<uint32_t> it(cur.assigned(group));
177       while (it.hasNext()) {
178         uint32_t physId = it.next();
179         uint32_t workId = cur.physToWorkId(group, physId);
180 
181         // Must be true as we iterate over assigned registers.
182         ASMJIT_ASSERT(workId != RAAssignment::kWorkNone);
183 
184         // KILL if it's not live on entry.
185         if (!liveIn.bitAt(workId)) {
186           onKillReg(group, workId, physId);
187           continue;
188         }
189 
190         // SPILL if it's not assigned on entry.
191         uint32_t altId = dst.workToPhysId(group, workId);
192         if (altId == RAAssignment::kPhysNone) {
193           ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
194         }
195       }
196     }
197 
198     // ------------------------------------------------------------------------
199     // STEP 2:
200     //   - MOVE and SWAP registers from their current assignments into their
201     //     DST assignments.
202     //   - Build `willLoadRegs` mask of registers scheduled for `onLoadReg()`.
203     // ------------------------------------------------------------------------
204 
205     // Current run-id (1 means more aggressive decisions).
206     int32_t runId = -1;
207     // Remaining registers scheduled for `onLoadReg()`.
208     uint32_t willLoadRegs = 0;
209     // Remaining registers to be allocated in this loop.
210     uint32_t affectedRegs = dst.assigned(group);
211 
212     while (affectedRegs) {
213       if (++runId == 2) {
214         if (!tryMode)
215           return DebugUtils::errored(kErrorInvalidState);
216 
217         // Stop in `tryMode` if we haven't done anything in past two rounds.
218         break;
219       }
220 
221       Support::BitWordIterator<uint32_t> it(affectedRegs);
222       while (it.hasNext()) {
223         uint32_t physId = it.next();
224         uint32_t physMask = Support::bitMask(physId);
225 
226         uint32_t curWorkId = cur.physToWorkId(group, physId);
227         uint32_t dstWorkId = dst.physToWorkId(group, physId);
228 
229         // The register must have assigned `dstWorkId` as we only iterate over assigned regs.
230         ASMJIT_ASSERT(dstWorkId != RAAssignment::kWorkNone);
231 
232         if (curWorkId != RAAssignment::kWorkNone) {
233           // Both assigned.
234           if (curWorkId != dstWorkId) {
235             // Wait a bit if this is the first run, we may avoid this if `curWorkId` moves out.
236             if (runId <= 0)
237               continue;
238 
239             uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
240             if (altPhysId == RAAssignment::kPhysNone)
241               continue;
242 
243             // Reset as we will do some changes to the current assignment.
244             runId = -1;
245 
246             if (_archTraits->hasSwap(group)) {
247               ASMJIT_PROPAGATE(onSwapReg(group, curWorkId, physId, dstWorkId, altPhysId));
248             }
249             else {
250               // SPILL the reg if it's not dirty in DST, otherwise try to MOVE.
251               if (!cur.isPhysDirty(group, physId)) {
252                 ASMJIT_PROPAGATE(onKillReg(group, curWorkId, physId));
253               }
254               else {
255                 uint32_t allocableRegs = _pass->_availableRegs[group] & ~cur.assigned(group);
256 
257                 // If possible don't conflict with assigned regs at DST.
258                 if (allocableRegs & ~dst.assigned(group))
259                   allocableRegs &= ~dst.assigned(group);
260 
261                 if (allocableRegs) {
262                   // MOVE is possible, thus preferred.
263                   uint32_t tmpPhysId = Support::ctz(allocableRegs);
264 
265                   ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId));
266                   _pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
267                 }
268                 else {
269                   // MOVE is impossible, must SPILL.
270                   ASMJIT_PROPAGATE(onSpillReg(group, curWorkId, physId));
271                 }
272               }
273 
274               goto Cleared;
275             }
276           }
277         }
278         else {
279 Cleared:
280           // DST assigned, CUR unassigned.
281           uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
282           if (altPhysId == RAAssignment::kPhysNone) {
283             if (liveIn.bitAt(dstWorkId))
284               willLoadRegs |= physMask; // Scheduled for `onLoadReg()`.
285             affectedRegs &= ~physMask;  // Unaffected from now.
286             continue;
287           }
288           ASMJIT_PROPAGATE(onMoveReg(group, dstWorkId, physId, altPhysId));
289         }
290 
291         // Both DST and CUR assigned to the same reg or CUR just moved to DST.
292         if ((dst.dirty(group) & physMask) != (cur.dirty(group) & physMask)) {
293           if ((dst.dirty(group) & physMask) == 0) {
294             // CUR dirty, DST not dirty (the assert is just to visualize the condition).
295             ASMJIT_ASSERT(!dst.isPhysDirty(group, physId) && cur.isPhysDirty(group, physId));
296 
297             // If `dstReadOnly` is true it means that that block was already
298             // processed and we cannot change from CLEAN to DIRTY. In that case
299             // the register has to be saved as it cannot enter the block DIRTY.
300             if (dstReadOnly)
301               ASMJIT_PROPAGATE(onSaveReg(group, dstWorkId, physId));
302             else
303               dst.makeDirty(group, dstWorkId, physId);
304           }
305           else {
306             // DST dirty, CUR not dirty (the assert is just to visualize the condition).
307             ASMJIT_ASSERT(dst.isPhysDirty(group, physId) && !cur.isPhysDirty(group, physId));
308 
309             cur.makeDirty(group, dstWorkId, physId);
310           }
311         }
312 
313         // Must match now...
314         ASMJIT_ASSERT(dst.physToWorkId(group, physId) == cur.physToWorkId(group, physId));
315         ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
316 
317         runId = -1;
318         affectedRegs &= ~physMask;
319       }
320     }
321 
322     // ------------------------------------------------------------------------
323     // STEP 3:
324     //   - Load registers specified by `willLoadRegs`.
325     // ------------------------------------------------------------------------
326 
327     {
328       Support::BitWordIterator<uint32_t> it(willLoadRegs);
329       while (it.hasNext()) {
330         uint32_t physId = it.next();
331 
332         if (!cur.isPhysAssigned(group, physId)) {
333           uint32_t workId = dst.physToWorkId(group, physId);
334 
335           // The algorithm is broken if it tries to load a register that is not in LIVE-IN.
336           ASMJIT_ASSERT(liveIn.bitAt(workId) == true);
337 
338           ASMJIT_PROPAGATE(onLoadReg(group, workId, physId));
339           if (dst.isPhysDirty(group, physId))
340             cur.makeDirty(group, workId, physId);
341           ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
342         }
343         else {
344           // Not possible otherwise.
345           ASMJIT_ASSERT(tryMode == true);
346         }
347       }
348     }
349   }
350 
351   if (!tryMode) {
352     // Hre is a code that dumps the conflicting part if something fails here:
353     //   if (!dst.equals(cur)) {
354     //     uint32_t physTotal = dst._layout.physTotal;
355     //     uint32_t workCount = dst._layout.workCount;
356     //
357     //     for (uint32_t physId = 0; physId < physTotal; physId++) {
358     //       uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
359     //       uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
360     //       if (dstWorkId != curWorkId)
361     //         fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
362     //     }
363     //
364     //     for (uint32_t workId = 0; workId < workCount; workId++) {
365     //       uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
366     //       uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
367     //       if (dstPhysId != curPhysId)
368     //         fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
369     //     }
370     //   }
371     ASMJIT_ASSERT(dst.equals(cur));
372   }
373 
374   return kErrorOk;
375 }
376 
spillScratchGpRegsBeforeEntry(uint32_t scratchRegs)377 Error RALocalAllocator::spillScratchGpRegsBeforeEntry(uint32_t scratchRegs) noexcept {
378   uint32_t group = BaseReg::kGroupGp;
379   Support::BitWordIterator<uint32_t> it(scratchRegs);
380 
381   while (it.hasNext()) {
382     uint32_t physId = it.next();
383     if (_curAssignment.isPhysAssigned(group, physId)) {
384       uint32_t workId = _curAssignment.physToWorkId(group, physId);
385       ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
386     }
387   }
388 
389   return kErrorOk;
390 }
391 
392 // ============================================================================
393 // [asmjit::RALocalAllocator - Allocation]
394 // ============================================================================
395 
allocInst(InstNode * node)396 Error RALocalAllocator::allocInst(InstNode* node) noexcept {
397   RAInst* raInst = node->passData<RAInst>();
398 
399   RATiedReg* outTiedRegs[Globals::kMaxPhysRegs];
400   RATiedReg* dupTiedRegs[Globals::kMaxPhysRegs];
401 
402   // The cursor must point to the previous instruction for a possible instruction insertion.
403   _cc->_setCursor(node->prev());
404 
405   _node = node;
406   _raInst = raInst;
407   _tiedTotal = raInst->_tiedTotal;
408   _tiedCount = raInst->_tiedCount;
409 
410   // Whether we already replaced register operand with memory operand.
411   bool rmAllocated = false;
412 
413   for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
414     uint32_t i, count = this->tiedCount(group);
415     RATiedReg* tiedRegs = this->tiedRegs(group);
416 
417     uint32_t willUse = _raInst->_usedRegs[group];
418     uint32_t willOut = _raInst->_clobberedRegs[group];
419     uint32_t willFree = 0;
420     uint32_t usePending = count;
421 
422     uint32_t outTiedCount = 0;
423     uint32_t dupTiedCount = 0;
424 
425     // ------------------------------------------------------------------------
426     // STEP 1:
427     //
428     // Calculate `willUse` and `willFree` masks based on tied registers we have.
429     //
430     // We don't do any assignment decisions at this stage as we just need to
431     // collect some information first. Then, after we populate all masks needed
432     // we can finally make some decisions in the second loop. The main reason
433     // for this is that we really need `willFree` to make assignment decisions
434     // for `willUse`, because if we mark some registers that will be freed, we
435     // can consider them in decision making afterwards.
436     // ------------------------------------------------------------------------
437 
438     for (i = 0; i < count; i++) {
439       RATiedReg* tiedReg = &tiedRegs[i];
440 
441       // Add OUT and KILL to `outPending` for CLOBBERing and/or OUT assignment.
442       if (tiedReg->isOutOrKill())
443         outTiedRegs[outTiedCount++] = tiedReg;
444 
445       if (tiedReg->isDuplicate())
446         dupTiedRegs[dupTiedCount++] = tiedReg;
447 
448       if (!tiedReg->isUse()) {
449         tiedReg->markUseDone();
450         usePending--;
451         continue;
452       }
453 
454       uint32_t workId = tiedReg->workId();
455       uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
456 
457       if (tiedReg->hasUseId()) {
458         // If the register has `useId` it means it can only be allocated in that register.
459         uint32_t useMask = Support::bitMask(tiedReg->useId());
460 
461         // RAInstBuilder must have collected `usedRegs` on-the-fly.
462         ASMJIT_ASSERT((willUse & useMask) != 0);
463 
464         if (assignedId == tiedReg->useId()) {
465           // If the register is already allocated in this one, mark it done and continue.
466           tiedReg->markUseDone();
467           if (tiedReg->isWrite())
468             _curAssignment.makeDirty(group, workId, assignedId);
469           usePending--;
470           willUse |= useMask;
471         }
472         else {
473           willFree |= useMask & _curAssignment.assigned(group);
474         }
475       }
476       else {
477         // Check if the register must be moved to `allocableRegs`.
478         uint32_t allocableRegs = tiedReg->allocableRegs();
479         if (assignedId != RAAssignment::kPhysNone) {
480           uint32_t assignedMask = Support::bitMask(assignedId);
481           if ((allocableRegs & ~willUse) & assignedMask) {
482             tiedReg->setUseId(assignedId);
483             tiedReg->markUseDone();
484             if (tiedReg->isWrite())
485               _curAssignment.makeDirty(group, workId, assignedId);
486             usePending--;
487             willUse |= assignedMask;
488           }
489           else {
490             willFree |= assignedMask;
491           }
492         }
493       }
494     }
495 
496     // ------------------------------------------------------------------------
497     // STEP 2:
498     //
499     // Do some decision making to find the best candidates of registers that
500     // need to be assigned, moved, and/or spilled. Only USE registers are
501     // considered here, OUT will be decided later after all CLOBBERed and OUT
502     // registers are unassigned.
503     // ------------------------------------------------------------------------
504 
505     if (usePending) {
506       // TODO: Not sure `liveRegs` should be used, maybe willUse and willFree would be enough and much more clear.
507 
508       // All registers that are currently alive without registers that will be freed.
509       uint32_t liveRegs = _curAssignment.assigned(group) & ~willFree;
510 
511       for (i = 0; i < count; i++) {
512         RATiedReg* tiedReg = &tiedRegs[i];
513         if (tiedReg->isUseDone()) continue;
514 
515         uint32_t workId = tiedReg->workId();
516         uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
517 
518         // REG/MEM: Patch register operand to memory operand if not allocated.
519         if (!rmAllocated && tiedReg->hasUseRM()) {
520           if (assignedId == RAAssignment::kPhysNone && Support::isPowerOf2(tiedReg->useRewriteMask())) {
521             RAWorkReg* workReg = workRegById(tiedReg->workId());
522             uint32_t opIndex = Support::ctz(tiedReg->useRewriteMask()) / uint32_t(sizeof(Operand) / sizeof(uint32_t));
523             uint32_t rmSize = tiedReg->rmSize();
524 
525             if (rmSize <= workReg->virtReg()->virtSize()) {
526               Operand& op = node->operands()[opIndex];
527               op = _pass->workRegAsMem(workReg);
528               op.as<BaseMem>().setSize(rmSize);
529               tiedReg->_useRewriteMask = 0;
530 
531               tiedReg->markUseDone();
532               usePending--;
533 
534               rmAllocated = true;
535               continue;
536             }
537           }
538         }
539 
540         if (!tiedReg->hasUseId()) {
541           uint32_t allocableRegs = tiedReg->allocableRegs() & ~(willFree | willUse);
542 
543           // DECIDE where to assign the USE register.
544           uint32_t useId = decideOnAssignment(group, workId, assignedId, allocableRegs);
545           uint32_t useMask = Support::bitMask(useId);
546 
547           willUse |= useMask;
548           willFree |= useMask & liveRegs;
549           tiedReg->setUseId(useId);
550 
551           if (assignedId != RAAssignment::kPhysNone) {
552             uint32_t assignedMask = Support::bitMask(assignedId);
553 
554             willFree |= assignedMask;
555             liveRegs &= ~assignedMask;
556 
557             // OPTIMIZATION: Assign the USE register here if it's possible.
558             if (!(liveRegs & useMask)) {
559               ASMJIT_PROPAGATE(onMoveReg(group, workId, useId, assignedId));
560               tiedReg->markUseDone();
561               if (tiedReg->isWrite())
562                 _curAssignment.makeDirty(group, workId, useId);
563               usePending--;
564             }
565           }
566           else {
567             // OPTIMIZATION: Assign the USE register here if it's possible.
568             if (!(liveRegs & useMask)) {
569               ASMJIT_PROPAGATE(onLoadReg(group, workId, useId));
570               tiedReg->markUseDone();
571               if (tiedReg->isWrite())
572                 _curAssignment.makeDirty(group, workId, useId);
573               usePending--;
574             }
575           }
576 
577           liveRegs |= useMask;
578         }
579       }
580     }
581 
582     // Initially all used regs will be marked clobbered.
583     uint32_t clobberedByInst = willUse | willOut;
584 
585     // ------------------------------------------------------------------------
586     // STEP 3:
587     //
588     // Free all registers that we marked as `willFree`. Only registers that are not
589     // USEd by the instruction are considered as we don't want to free regs we need.
590     // ------------------------------------------------------------------------
591 
592     if (willFree) {
593       uint32_t allocableRegs = _availableRegs[group] & ~(_curAssignment.assigned(group) | willFree | willUse | willOut);
594       Support::BitWordIterator<uint32_t> it(willFree);
595 
596       do {
597         uint32_t assignedId = it.next();
598         if (_curAssignment.isPhysAssigned(group, assignedId)) {
599           uint32_t workId = _curAssignment.physToWorkId(group, assignedId);
600 
601           // DECIDE whether to MOVE or SPILL.
602           if (allocableRegs) {
603             uint32_t reassignedId = decideOnReassignment(group, workId, assignedId, allocableRegs);
604             if (reassignedId != RAAssignment::kPhysNone) {
605               ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
606               allocableRegs ^= Support::bitMask(reassignedId);
607               continue;
608             }
609           }
610 
611           ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
612         }
613       } while (it.hasNext());
614     }
615 
616     // ------------------------------------------------------------------------
617     // STEP 4:
618     //
619     // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't
620     // allocated yet. This is a bit complicated as the allocation is iterative.
621     // In some cases we have to wait before allocating a particual physical
622     // register as it's still occupied by some other one, which we need to move
623     // before we can use it. In this case we skip it and allocate another some
624     // other instead (making it free for another iteration).
625     //
626     // NOTE: Iterations are mostly important for complicated allocations like
627     // function calls, where there can be up to N registers used at once. Asm
628     // instructions won't run the loop more than once in 99.9% of cases as they
629     // use 2..3 registers in average.
630     // ------------------------------------------------------------------------
631 
632     if (usePending) {
633       bool mustSwap = false;
634       do {
635         uint32_t oldPending = usePending;
636 
637         for (i = 0; i < count; i++) {
638           RATiedReg* thisTiedReg = &tiedRegs[i];
639           if (thisTiedReg->isUseDone()) continue;
640 
641           uint32_t thisWorkId = thisTiedReg->workId();
642           uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);
643 
644           // This would be a bug, fatal one!
645           uint32_t targetPhysId = thisTiedReg->useId();
646           ASMJIT_ASSERT(targetPhysId != thisPhysId);
647 
648           uint32_t targetWorkId = _curAssignment.physToWorkId(group, targetPhysId);
649           if (targetWorkId != RAAssignment::kWorkNone) {
650             RAWorkReg* targetWorkReg = workRegById(targetWorkId);
651 
652             // Swapping two registers can solve two allocation tasks by emitting
653             // just a single instruction. However, swap is only available on few
654             // architectures and it's definitely not available for each register
655             // group. Calling `onSwapReg()` before checking these would be fatal.
656             if (_archTraits->hasSwap(group) && thisPhysId != RAAssignment::kPhysNone) {
657               ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));
658 
659               thisTiedReg->markUseDone();
660               if (thisTiedReg->isWrite())
661                 _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
662               usePending--;
663 
664               // Double-hit.
665               RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
666               if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
667                 targetTiedReg->markUseDone();
668                 if (targetTiedReg->isWrite())
669                   _curAssignment.makeDirty(group, targetWorkId, thisPhysId);
670                 usePending--;
671               }
672               continue;
673             }
674 
675             if (!mustSwap)
676               continue;
677 
678             // Only branched here if the previous iteration did nothing. This is
679             // essentially a SWAP operation without having a dedicated instruction
680             // for that purpose (vector registers, etc). The simplest way to
681             // handle such case is to SPILL the target register.
682             ASMJIT_PROPAGATE(onSpillReg(group, targetWorkId, targetPhysId));
683           }
684 
685           if (thisPhysId != RAAssignment::kPhysNone) {
686             ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));
687 
688             thisTiedReg->markUseDone();
689             if (thisTiedReg->isWrite())
690               _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
691             usePending--;
692           }
693           else {
694             ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));
695 
696             thisTiedReg->markUseDone();
697             if (thisTiedReg->isWrite())
698               _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
699             usePending--;
700           }
701         }
702 
703         mustSwap = (oldPending == usePending);
704       } while (usePending);
705     }
706 
707     // ------------------------------------------------------------------------
708     // STEP 5:
709     //
710     // KILL registers marked as KILL/OUT.
711     // ------------------------------------------------------------------------
712 
713     uint32_t outPending = outTiedCount;
714     if (outTiedCount) {
715       for (i = 0; i < outTiedCount; i++) {
716         RATiedReg* tiedReg = outTiedRegs[i];
717 
718         uint32_t workId = tiedReg->workId();
719         uint32_t physId = _curAssignment.workToPhysId(group, workId);
720 
721         // Must check if it's allocated as KILL can be related to OUT (like KILL
722         // immediately after OUT, which could mean the register is not assigned).
723         if (physId != RAAssignment::kPhysNone) {
724           ASMJIT_PROPAGATE(onKillReg(group, workId, physId));
725           willOut &= ~Support::bitMask(physId);
726         }
727 
728         // We still maintain number of pending registers for OUT assignment.
729         // So, if this is only KILL, not OUT, we can safely decrement it.
730         outPending -= !tiedReg->isOut();
731       }
732     }
733 
734     // ------------------------------------------------------------------------
735     // STEP 6:
736     //
737     // SPILL registers that will be CLOBBERed. Since OUT and KILL were
738     // already processed this is used mostly to handle function CALLs.
739     // ------------------------------------------------------------------------
740 
741     if (willOut) {
742       Support::BitWordIterator<uint32_t> it(willOut);
743       do {
744         uint32_t physId = it.next();
745         uint32_t workId = _curAssignment.physToWorkId(group, physId);
746 
747         if (workId == RAAssignment::kWorkNone)
748           continue;
749 
750         ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
751       } while (it.hasNext());
752     }
753 
754     // ------------------------------------------------------------------------
755     // STEP 7:
756     //
757     // Duplication.
758     // ------------------------------------------------------------------------
759 
760     for (i = 0; i < dupTiedCount; i++) {
761       RATiedReg* tiedReg = dupTiedRegs[i];
762       uint32_t workId = tiedReg->workId();
763       uint32_t srcId = tiedReg->useId();
764 
765       Support::BitWordIterator<uint32_t> it(tiedReg->_allocableRegs);
766       while (it.hasNext()) {
767         uint32_t dstId = it.next();
768         if (dstId == srcId)
769           continue;
770         _pass->emitMove(workId, dstId, srcId);
771       }
772     }
773 
774     // ------------------------------------------------------------------------
775     // STEP 8:
776     //
777     // Assign OUT registers.
778     // ------------------------------------------------------------------------
779 
780     if (outPending) {
781       // Live registers, we need a separate variable (outside of `_curAssignment)
782       // to hold these because of KILLed registers. If we KILL a register here it
783       // will go out from `_curAssignment`, but we cannot assign to it in here.
784       uint32_t liveRegs = _curAssignment.assigned(group);
785 
786       // Must avoid as they have been already OUTed (added during the loop).
787       uint32_t outRegs = 0;
788 
789       // Must avoid as they collide with already allocated ones.
790       uint32_t avoidRegs = willUse & ~clobberedByInst;
791 
792       for (i = 0; i < outTiedCount; i++) {
793         RATiedReg* tiedReg = outTiedRegs[i];
794         if (!tiedReg->isOut()) continue;
795 
796         uint32_t workId = tiedReg->workId();
797         uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
798 
799         if (assignedId != RAAssignment::kPhysNone)
800           ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));
801 
802         uint32_t physId = tiedReg->outId();
803         if (physId == RAAssignment::kPhysNone) {
804           uint32_t allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs);
805 
806           if (!(allocableRegs & ~liveRegs)) {
807             // There are no more registers, decide which one to spill.
808             uint32_t spillWorkId;
809             physId = decideOnSpillFor(group, workId, allocableRegs & liveRegs, &spillWorkId);
810             ASMJIT_PROPAGATE(onSpillReg(group, spillWorkId, physId));
811           }
812           else {
813             physId = decideOnAssignment(group, workId, RAAssignment::kPhysNone, allocableRegs & ~liveRegs);
814           }
815         }
816 
817         // OUTs are CLOBBERed thus cannot be ASSIGNed right now.
818         ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));
819 
820         if (!tiedReg->isKill())
821           ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));
822 
823         tiedReg->setOutId(physId);
824         tiedReg->markOutDone();
825 
826         outRegs |= Support::bitMask(physId);
827         liveRegs &= ~Support::bitMask(physId);
828         outPending--;
829       }
830 
831       clobberedByInst |= outRegs;
832       ASMJIT_ASSERT(outPending == 0);
833     }
834 
835     _clobberedRegs[group] |= clobberedByInst;
836   }
837 
838   return kErrorOk;
839 }
840 
spillAfterAllocation(InstNode * node)841 Error RALocalAllocator::spillAfterAllocation(InstNode* node) noexcept {
842   // This is experimental feature that would spill registers that don't have
843   // home-id and are last in this basic block. This prevents saving these regs
844   // in other basic blocks and then restoring them (mostly relevant for loops).
845   RAInst* raInst = node->passData<RAInst>();
846   uint32_t count = raInst->tiedCount();
847 
848   for (uint32_t i = 0; i < count; i++) {
849     RATiedReg* tiedReg = raInst->tiedAt(i);
850     if (tiedReg->isLast()) {
851       uint32_t workId = tiedReg->workId();
852       RAWorkReg* workReg = workRegById(workId);
853       if (!workReg->hasHomeRegId()) {
854         uint32_t group = workReg->group();
855         uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
856         if (assignedId != RAAssignment::kPhysNone) {
857           _cc->_setCursor(node);
858           ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
859         }
860       }
861     }
862   }
863 
864   return kErrorOk;
865 }
866 
allocBranch(InstNode * node,RABlock * target,RABlock * cont)867 Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept {
868   // TODO: This should be used to make the branch allocation better.
869   DebugUtils::unused(cont);
870 
871   // The cursor must point to the previous instruction for a possible instruction insertion.
872   _cc->_setCursor(node->prev());
873 
874   // Use TryMode of `switchToAssignment()` if possible.
875   if (target->hasEntryAssignment()) {
876     ASMJIT_PROPAGATE(switchToAssignment(
877       target->entryPhysToWorkMap(),
878       target->entryWorkToPhysMap(),
879       target->liveIn(),
880       target->isAllocated(),
881       true));
882   }
883 
884   ASMJIT_PROPAGATE(allocInst(node));
885   ASMJIT_PROPAGATE(spillRegsBeforeEntry(target));
886 
887   if (target->hasEntryAssignment()) {
888     BaseNode* injectionPoint = _pass->extraBlock()->prev();
889     BaseNode* prevCursor = _cc->setCursor(injectionPoint);
890 
891     _tmpAssignment.copyFrom(_curAssignment);
892     ASMJIT_PROPAGATE(switchToAssignment(
893       target->entryPhysToWorkMap(),
894       target->entryWorkToPhysMap(),
895       target->liveIn(),
896       target->isAllocated(),
897       false));
898 
899     BaseNode* curCursor = _cc->cursor();
900     if (curCursor != injectionPoint) {
901       // Additional instructions emitted to switch from the current state to
902       // the `target` state. This means that we have to move these instructions
903       // into an independent code block and patch the jump location.
904       Operand& targetOp = node->op(node->opCount() - 1);
905       if (ASMJIT_UNLIKELY(!targetOp.isLabel()))
906         return DebugUtils::errored(kErrorInvalidState);
907 
908       Label trampoline = _cc->newLabel();
909       Label savedTarget = targetOp.as<Label>();
910 
911       // Patch `target` to point to the `trampoline` we just created.
912       targetOp = trampoline;
913 
914       // Clear a possible SHORT form as we have no clue now if the SHORT form would
915       // be encodable after patching the target to `trampoline` (X86 specific).
916       node->clearInstOptions(BaseInst::kOptionShortForm);
917 
918       // Finalize the switch assignment sequence.
919       ASMJIT_PROPAGATE(_pass->emitJump(savedTarget));
920       _cc->_setCursor(injectionPoint);
921       _cc->bind(trampoline);
922     }
923 
924     _cc->_setCursor(prevCursor);
925     _curAssignment.swap(_tmpAssignment);
926   }
927   else {
928     ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(target, block(), _curAssignment));
929   }
930 
931   return kErrorOk;
932 }
933 
allocJumpTable(InstNode * node,const RABlocks & targets,RABlock * cont)934 Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets, RABlock* cont) noexcept {
935   // TODO: Do we really need to use `cont`?
936   DebugUtils::unused(cont);
937 
938   if (targets.empty())
939     return DebugUtils::errored(kErrorInvalidState);
940 
941   // The cursor must point to the previous instruction for a possible instruction insertion.
942   _cc->_setCursor(node->prev());
943 
944   // All `targets` should have the same sharedAssignmentId, we just read the first.
945   RABlock* anyTarget = targets[0];
946   if (!anyTarget->hasSharedAssignmentId())
947     return DebugUtils::errored(kErrorInvalidState);
948 
949   RASharedAssignment& sharedAssignment = _pass->_sharedAssignments[anyTarget->sharedAssignmentId()];
950 
951   ASMJIT_PROPAGATE(allocInst(node));
952 
953   if (!sharedAssignment.empty()) {
954     ASMJIT_PROPAGATE(switchToAssignment(
955       sharedAssignment.physToWorkMap(),
956       sharedAssignment.workToPhysMap(),
957       sharedAssignment.liveIn(),
958       true,  // Read-only.
959       false  // Try-mode.
960     ));
961   }
962 
963   ASMJIT_PROPAGATE(spillRegsBeforeEntry(anyTarget));
964 
965   if (sharedAssignment.empty()) {
966     ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(anyTarget, block(), _curAssignment));
967   }
968 
969   return kErrorOk;
970 }
971 
972 // ============================================================================
973 // [asmjit::RALocalAllocator - Decision Making]
974 // ============================================================================
975 
decideOnAssignment(uint32_t group,uint32_t workId,uint32_t physId,uint32_t allocableRegs) const976 uint32_t RALocalAllocator::decideOnAssignment(uint32_t group, uint32_t workId, uint32_t physId, uint32_t allocableRegs) const noexcept {
977   ASMJIT_ASSERT(allocableRegs != 0);
978   DebugUtils::unused(group, physId);
979 
980   RAWorkReg* workReg = workRegById(workId);
981 
982   // Prefer home register id, if possible.
983   if (workReg->hasHomeRegId()) {
984     uint32_t homeId = workReg->homeRegId();
985     if (Support::bitTest(allocableRegs, homeId))
986       return homeId;
987   }
988 
989   // Prefer registers used upon block entries.
990   uint32_t previouslyAssignedRegs = workReg->allocatedMask();
991   if (allocableRegs & previouslyAssignedRegs)
992     allocableRegs &= previouslyAssignedRegs;
993 
994   return Support::ctz(allocableRegs);
995 }
996 
decideOnReassignment(uint32_t group,uint32_t workId,uint32_t physId,uint32_t allocableRegs) const997 uint32_t RALocalAllocator::decideOnReassignment(uint32_t group, uint32_t workId, uint32_t physId, uint32_t allocableRegs) const noexcept {
998   ASMJIT_ASSERT(allocableRegs != 0);
999   DebugUtils::unused(group, physId);
1000 
1001   RAWorkReg* workReg = workRegById(workId);
1002 
1003   // Prefer allocating back to HomeId, if possible.
1004   if (workReg->hasHomeRegId()) {
1005     if (Support::bitTest(allocableRegs, workReg->homeRegId()))
1006       return workReg->homeRegId();
1007   }
1008 
1009   // TODO: [Register Allocator] This could be improved.
1010 
1011   // Decided to SPILL.
1012   return RAAssignment::kPhysNone;
1013 }
1014 
decideOnSpillFor(uint32_t group,uint32_t workId,uint32_t spillableRegs,uint32_t * spillWorkId) const1015 uint32_t RALocalAllocator::decideOnSpillFor(uint32_t group, uint32_t workId, uint32_t spillableRegs, uint32_t* spillWorkId) const noexcept {
1016   // May be used in the future to decide which register would be best to spill so `workId` can be assigned.
1017   DebugUtils::unused(workId);
1018   ASMJIT_ASSERT(spillableRegs != 0);
1019 
1020   Support::BitWordIterator<uint32_t> it(spillableRegs);
1021   uint32_t bestPhysId = it.next();
1022   uint32_t bestWorkId = _curAssignment.physToWorkId(group, bestPhysId);
1023 
1024   // Avoid calculating the cost model if there is only one spillable register.
1025   if (it.hasNext()) {
1026     uint32_t bestCost = calculateSpillCost(group, bestWorkId, bestPhysId);
1027     do {
1028       uint32_t localPhysId = it.next();
1029       uint32_t localWorkId = _curAssignment.physToWorkId(group, localPhysId);
1030       uint32_t localCost = calculateSpillCost(group, localWorkId, localPhysId);
1031 
1032       if (localCost < bestCost) {
1033         bestCost = localCost;
1034         bestPhysId = localPhysId;
1035         bestWorkId = localWorkId;
1036       }
1037     } while (it.hasNext());
1038   }
1039 
1040   *spillWorkId = bestWorkId;
1041   return bestPhysId;
1042 }
1043 
1044 ASMJIT_END_NAMESPACE
1045 
1046 #endif // !ASMJIT_NO_COMPILER
1047