1 // AsmJit - Machine code generation for C++
2 //
3 // * Official AsmJit Home Page: https://asmjit.com
4 // * Official Github Repository: https://github.com/asmjit/asmjit
5 //
6 // Copyright (c) 2008-2020 The AsmJit Authors
7 //
8 // This software is provided 'as-is', without any express or implied
9 // warranty. In no event will the authors be held liable for any damages
10 // arising from the use of this software.
11 //
12 // Permission is granted to anyone to use this software for any purpose,
13 // including commercial applications, and to alter it and redistribute it
14 // freely, subject to the following restrictions:
15 //
16 // 1. The origin of this software must not be misrepresented; you must not
17 // claim that you wrote the original software. If you use this software
18 // in a product, an acknowledgment in the product documentation would be
19 // appreciated but is not required.
20 // 2. Altered source versions must be plainly marked as such, and must not be
21 // misrepresented as being the original software.
22 // 3. This notice may not be removed or altered from any source distribution.
23
24 #include "../core/api-build_p.h"
25 #ifndef ASMJIT_NO_COMPILER
26
27 #include "../core/ralocal_p.h"
28 #include "../core/support.h"
29
30 ASMJIT_BEGIN_NAMESPACE
31
32 // ============================================================================
33 // [asmjit::RALocalAllocator - Utilities]
34 // ============================================================================
35
RALocal_findTiedRegByWorkId(RATiedReg * tiedRegs,size_t count,uint32_t workId)36 static ASMJIT_INLINE RATiedReg* RALocal_findTiedRegByWorkId(RATiedReg* tiedRegs, size_t count, uint32_t workId) noexcept {
37 for (size_t i = 0; i < count; i++)
38 if (tiedRegs[i].workId() == workId)
39 return &tiedRegs[i];
40 return nullptr;
41 }
42
43 // ============================================================================
44 // [asmjit::RALocalAllocator - Init / Reset]
45 // ============================================================================
46
init()47 Error RALocalAllocator::init() noexcept {
48 PhysToWorkMap* physToWorkMap;
49 WorkToPhysMap* workToPhysMap;
50
51 physToWorkMap = _pass->newPhysToWorkMap();
52 workToPhysMap = _pass->newWorkToPhysMap();
53 if (!physToWorkMap || !workToPhysMap)
54 return DebugUtils::errored(kErrorOutOfMemory);
55
56 _curAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
57 _curAssignment.initMaps(physToWorkMap, workToPhysMap);
58
59 physToWorkMap = _pass->newPhysToWorkMap();
60 workToPhysMap = _pass->newWorkToPhysMap();
61 if (!physToWorkMap || !workToPhysMap)
62 return DebugUtils::errored(kErrorOutOfMemory);
63
64 _tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
65 _tmpAssignment.initMaps(physToWorkMap, workToPhysMap);
66
67 return kErrorOk;
68 }
69
70 // ============================================================================
71 // [asmjit::RALocalAllocator - Assignment]
72 // ============================================================================
73
makeInitialAssignment()74 Error RALocalAllocator::makeInitialAssignment() noexcept {
75 FuncNode* func = _pass->func();
76 RABlock* entry = _pass->entryBlock();
77
78 ZoneBitVector& liveIn = entry->liveIn();
79 uint32_t argCount = func->argCount();
80 uint32_t numIter = 1;
81
82 for (uint32_t iter = 0; iter < numIter; iter++) {
83 for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
84 for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
85 // Unassigned argument.
86 VirtReg* virtReg = func->argPack(argIndex)[valueIndex];
87 if (!virtReg)
88 continue;
89
90 // Unreferenced argument.
91 RAWorkReg* workReg = virtReg->workReg();
92 if (!workReg)
93 continue;
94
95 // Overwritten argument.
96 uint32_t workId = workReg->workId();
97 if (!liveIn.bitAt(workId))
98 continue;
99
100 uint32_t group = workReg->group();
101 if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone)
102 continue;
103
104 uint32_t allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group);
105 if (iter == 0) {
106 // First iteration: Try to allocate to home RegId.
107 if (workReg->hasHomeRegId()) {
108 uint32_t physId = workReg->homeRegId();
109 if (Support::bitTest(allocableRegs, physId)) {
110 _curAssignment.assign(group, workId, physId, true);
111 _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->info().type(), physId, workReg->typeId());
112 continue;
113 }
114 }
115
116 numIter = 2;
117 }
118 else {
119 // Second iteration: Pick any other register if the is an unassigned one or assign to stack.
120 if (allocableRegs) {
121 uint32_t physId = Support::ctz(allocableRegs);
122 _curAssignment.assign(group, workId, physId, true);
123 _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->info().type(), physId, workReg->typeId());
124 }
125 else {
126 // This register will definitely need stack, create the slot now and assign also `argIndex`
127 // to it. We will patch `_argsAssignment` later after RAStackAllocator finishes.
128 RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg);
129 if (ASMJIT_UNLIKELY(!slot))
130 return DebugUtils::errored(kErrorOutOfMemory);
131
132 // This means STACK_ARG may be moved to STACK.
133 workReg->addFlags(RAWorkReg::kFlagStackArgToStack);
134 _pass->_numStackArgsToStackSlots++;
135 }
136 }
137 }
138 }
139 }
140
141 return kErrorOk;
142 }
143
replaceAssignment(const PhysToWorkMap * physToWorkMap,const WorkToPhysMap * workToPhysMap)144 Error RALocalAllocator::replaceAssignment(
145 const PhysToWorkMap* physToWorkMap,
146 const WorkToPhysMap* workToPhysMap) noexcept {
147
148 _curAssignment.copyFrom(physToWorkMap, workToPhysMap);
149 return kErrorOk;
150 }
151
switchToAssignment(PhysToWorkMap * dstPhysToWorkMap,WorkToPhysMap * dstWorkToPhysMap,const ZoneBitVector & liveIn,bool dstReadOnly,bool tryMode)152 Error RALocalAllocator::switchToAssignment(
153 PhysToWorkMap* dstPhysToWorkMap,
154 WorkToPhysMap* dstWorkToPhysMap,
155 const ZoneBitVector& liveIn,
156 bool dstReadOnly,
157 bool tryMode) noexcept {
158
159 RAAssignment dst;
160 RAAssignment& cur = _curAssignment;
161
162 dst.initLayout(_pass->_physRegCount, _pass->workRegs());
163 dst.initMaps(dstPhysToWorkMap, dstWorkToPhysMap);
164
165 if (tryMode)
166 return kErrorOk;
167
168 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
169 // ------------------------------------------------------------------------
170 // STEP 1:
171 // - KILL all registers that are not live at `dst`,
172 // - SPILL all registers that are not assigned at `dst`.
173 // ------------------------------------------------------------------------
174
175 if (!tryMode) {
176 Support::BitWordIterator<uint32_t> it(cur.assigned(group));
177 while (it.hasNext()) {
178 uint32_t physId = it.next();
179 uint32_t workId = cur.physToWorkId(group, physId);
180
181 // Must be true as we iterate over assigned registers.
182 ASMJIT_ASSERT(workId != RAAssignment::kWorkNone);
183
184 // KILL if it's not live on entry.
185 if (!liveIn.bitAt(workId)) {
186 onKillReg(group, workId, physId);
187 continue;
188 }
189
190 // SPILL if it's not assigned on entry.
191 uint32_t altId = dst.workToPhysId(group, workId);
192 if (altId == RAAssignment::kPhysNone) {
193 ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
194 }
195 }
196 }
197
198 // ------------------------------------------------------------------------
199 // STEP 2:
200 // - MOVE and SWAP registers from their current assignments into their
201 // DST assignments.
202 // - Build `willLoadRegs` mask of registers scheduled for `onLoadReg()`.
203 // ------------------------------------------------------------------------
204
205 // Current run-id (1 means more aggressive decisions).
206 int32_t runId = -1;
207 // Remaining registers scheduled for `onLoadReg()`.
208 uint32_t willLoadRegs = 0;
209 // Remaining registers to be allocated in this loop.
210 uint32_t affectedRegs = dst.assigned(group);
211
212 while (affectedRegs) {
213 if (++runId == 2) {
214 if (!tryMode)
215 return DebugUtils::errored(kErrorInvalidState);
216
217 // Stop in `tryMode` if we haven't done anything in past two rounds.
218 break;
219 }
220
221 Support::BitWordIterator<uint32_t> it(affectedRegs);
222 while (it.hasNext()) {
223 uint32_t physId = it.next();
224 uint32_t physMask = Support::bitMask(physId);
225
226 uint32_t curWorkId = cur.physToWorkId(group, physId);
227 uint32_t dstWorkId = dst.physToWorkId(group, physId);
228
229 // The register must have assigned `dstWorkId` as we only iterate over assigned regs.
230 ASMJIT_ASSERT(dstWorkId != RAAssignment::kWorkNone);
231
232 if (curWorkId != RAAssignment::kWorkNone) {
233 // Both assigned.
234 if (curWorkId != dstWorkId) {
235 // Wait a bit if this is the first run, we may avoid this if `curWorkId` moves out.
236 if (runId <= 0)
237 continue;
238
239 uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
240 if (altPhysId == RAAssignment::kPhysNone)
241 continue;
242
243 // Reset as we will do some changes to the current assignment.
244 runId = -1;
245
246 if (_archTraits->hasSwap(group)) {
247 ASMJIT_PROPAGATE(onSwapReg(group, curWorkId, physId, dstWorkId, altPhysId));
248 }
249 else {
250 // SPILL the reg if it's not dirty in DST, otherwise try to MOVE.
251 if (!cur.isPhysDirty(group, physId)) {
252 ASMJIT_PROPAGATE(onKillReg(group, curWorkId, physId));
253 }
254 else {
255 uint32_t allocableRegs = _pass->_availableRegs[group] & ~cur.assigned(group);
256
257 // If possible don't conflict with assigned regs at DST.
258 if (allocableRegs & ~dst.assigned(group))
259 allocableRegs &= ~dst.assigned(group);
260
261 if (allocableRegs) {
262 // MOVE is possible, thus preferred.
263 uint32_t tmpPhysId = Support::ctz(allocableRegs);
264
265 ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId));
266 _pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
267 }
268 else {
269 // MOVE is impossible, must SPILL.
270 ASMJIT_PROPAGATE(onSpillReg(group, curWorkId, physId));
271 }
272 }
273
274 goto Cleared;
275 }
276 }
277 }
278 else {
279 Cleared:
280 // DST assigned, CUR unassigned.
281 uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
282 if (altPhysId == RAAssignment::kPhysNone) {
283 if (liveIn.bitAt(dstWorkId))
284 willLoadRegs |= physMask; // Scheduled for `onLoadReg()`.
285 affectedRegs &= ~physMask; // Unaffected from now.
286 continue;
287 }
288 ASMJIT_PROPAGATE(onMoveReg(group, dstWorkId, physId, altPhysId));
289 }
290
291 // Both DST and CUR assigned to the same reg or CUR just moved to DST.
292 if ((dst.dirty(group) & physMask) != (cur.dirty(group) & physMask)) {
293 if ((dst.dirty(group) & physMask) == 0) {
294 // CUR dirty, DST not dirty (the assert is just to visualize the condition).
295 ASMJIT_ASSERT(!dst.isPhysDirty(group, physId) && cur.isPhysDirty(group, physId));
296
297 // If `dstReadOnly` is true it means that that block was already
298 // processed and we cannot change from CLEAN to DIRTY. In that case
299 // the register has to be saved as it cannot enter the block DIRTY.
300 if (dstReadOnly)
301 ASMJIT_PROPAGATE(onSaveReg(group, dstWorkId, physId));
302 else
303 dst.makeDirty(group, dstWorkId, physId);
304 }
305 else {
306 // DST dirty, CUR not dirty (the assert is just to visualize the condition).
307 ASMJIT_ASSERT(dst.isPhysDirty(group, physId) && !cur.isPhysDirty(group, physId));
308
309 cur.makeDirty(group, dstWorkId, physId);
310 }
311 }
312
313 // Must match now...
314 ASMJIT_ASSERT(dst.physToWorkId(group, physId) == cur.physToWorkId(group, physId));
315 ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
316
317 runId = -1;
318 affectedRegs &= ~physMask;
319 }
320 }
321
322 // ------------------------------------------------------------------------
323 // STEP 3:
324 // - Load registers specified by `willLoadRegs`.
325 // ------------------------------------------------------------------------
326
327 {
328 Support::BitWordIterator<uint32_t> it(willLoadRegs);
329 while (it.hasNext()) {
330 uint32_t physId = it.next();
331
332 if (!cur.isPhysAssigned(group, physId)) {
333 uint32_t workId = dst.physToWorkId(group, physId);
334
335 // The algorithm is broken if it tries to load a register that is not in LIVE-IN.
336 ASMJIT_ASSERT(liveIn.bitAt(workId) == true);
337
338 ASMJIT_PROPAGATE(onLoadReg(group, workId, physId));
339 if (dst.isPhysDirty(group, physId))
340 cur.makeDirty(group, workId, physId);
341 ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
342 }
343 else {
344 // Not possible otherwise.
345 ASMJIT_ASSERT(tryMode == true);
346 }
347 }
348 }
349 }
350
351 if (!tryMode) {
352 // Hre is a code that dumps the conflicting part if something fails here:
353 // if (!dst.equals(cur)) {
354 // uint32_t physTotal = dst._layout.physTotal;
355 // uint32_t workCount = dst._layout.workCount;
356 //
357 // for (uint32_t physId = 0; physId < physTotal; physId++) {
358 // uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
359 // uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
360 // if (dstWorkId != curWorkId)
361 // fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
362 // }
363 //
364 // for (uint32_t workId = 0; workId < workCount; workId++) {
365 // uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
366 // uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
367 // if (dstPhysId != curPhysId)
368 // fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
369 // }
370 // }
371 ASMJIT_ASSERT(dst.equals(cur));
372 }
373
374 return kErrorOk;
375 }
376
spillScratchGpRegsBeforeEntry(uint32_t scratchRegs)377 Error RALocalAllocator::spillScratchGpRegsBeforeEntry(uint32_t scratchRegs) noexcept {
378 uint32_t group = BaseReg::kGroupGp;
379 Support::BitWordIterator<uint32_t> it(scratchRegs);
380
381 while (it.hasNext()) {
382 uint32_t physId = it.next();
383 if (_curAssignment.isPhysAssigned(group, physId)) {
384 uint32_t workId = _curAssignment.physToWorkId(group, physId);
385 ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
386 }
387 }
388
389 return kErrorOk;
390 }
391
392 // ============================================================================
393 // [asmjit::RALocalAllocator - Allocation]
394 // ============================================================================
395
allocInst(InstNode * node)396 Error RALocalAllocator::allocInst(InstNode* node) noexcept {
397 RAInst* raInst = node->passData<RAInst>();
398
399 RATiedReg* outTiedRegs[Globals::kMaxPhysRegs];
400 RATiedReg* dupTiedRegs[Globals::kMaxPhysRegs];
401
402 // The cursor must point to the previous instruction for a possible instruction insertion.
403 _cc->_setCursor(node->prev());
404
405 _node = node;
406 _raInst = raInst;
407 _tiedTotal = raInst->_tiedTotal;
408 _tiedCount = raInst->_tiedCount;
409
410 // Whether we already replaced register operand with memory operand.
411 bool rmAllocated = false;
412
413 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
414 uint32_t i, count = this->tiedCount(group);
415 RATiedReg* tiedRegs = this->tiedRegs(group);
416
417 uint32_t willUse = _raInst->_usedRegs[group];
418 uint32_t willOut = _raInst->_clobberedRegs[group];
419 uint32_t willFree = 0;
420 uint32_t usePending = count;
421
422 uint32_t outTiedCount = 0;
423 uint32_t dupTiedCount = 0;
424
425 // ------------------------------------------------------------------------
426 // STEP 1:
427 //
428 // Calculate `willUse` and `willFree` masks based on tied registers we have.
429 //
430 // We don't do any assignment decisions at this stage as we just need to
431 // collect some information first. Then, after we populate all masks needed
432 // we can finally make some decisions in the second loop. The main reason
433 // for this is that we really need `willFree` to make assignment decisions
434 // for `willUse`, because if we mark some registers that will be freed, we
435 // can consider them in decision making afterwards.
436 // ------------------------------------------------------------------------
437
438 for (i = 0; i < count; i++) {
439 RATiedReg* tiedReg = &tiedRegs[i];
440
441 // Add OUT and KILL to `outPending` for CLOBBERing and/or OUT assignment.
442 if (tiedReg->isOutOrKill())
443 outTiedRegs[outTiedCount++] = tiedReg;
444
445 if (tiedReg->isDuplicate())
446 dupTiedRegs[dupTiedCount++] = tiedReg;
447
448 if (!tiedReg->isUse()) {
449 tiedReg->markUseDone();
450 usePending--;
451 continue;
452 }
453
454 uint32_t workId = tiedReg->workId();
455 uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
456
457 if (tiedReg->hasUseId()) {
458 // If the register has `useId` it means it can only be allocated in that register.
459 uint32_t useMask = Support::bitMask(tiedReg->useId());
460
461 // RAInstBuilder must have collected `usedRegs` on-the-fly.
462 ASMJIT_ASSERT((willUse & useMask) != 0);
463
464 if (assignedId == tiedReg->useId()) {
465 // If the register is already allocated in this one, mark it done and continue.
466 tiedReg->markUseDone();
467 if (tiedReg->isWrite())
468 _curAssignment.makeDirty(group, workId, assignedId);
469 usePending--;
470 willUse |= useMask;
471 }
472 else {
473 willFree |= useMask & _curAssignment.assigned(group);
474 }
475 }
476 else {
477 // Check if the register must be moved to `allocableRegs`.
478 uint32_t allocableRegs = tiedReg->allocableRegs();
479 if (assignedId != RAAssignment::kPhysNone) {
480 uint32_t assignedMask = Support::bitMask(assignedId);
481 if ((allocableRegs & ~willUse) & assignedMask) {
482 tiedReg->setUseId(assignedId);
483 tiedReg->markUseDone();
484 if (tiedReg->isWrite())
485 _curAssignment.makeDirty(group, workId, assignedId);
486 usePending--;
487 willUse |= assignedMask;
488 }
489 else {
490 willFree |= assignedMask;
491 }
492 }
493 }
494 }
495
496 // ------------------------------------------------------------------------
497 // STEP 2:
498 //
499 // Do some decision making to find the best candidates of registers that
500 // need to be assigned, moved, and/or spilled. Only USE registers are
501 // considered here, OUT will be decided later after all CLOBBERed and OUT
502 // registers are unassigned.
503 // ------------------------------------------------------------------------
504
505 if (usePending) {
506 // TODO: Not sure `liveRegs` should be used, maybe willUse and willFree would be enough and much more clear.
507
508 // All registers that are currently alive without registers that will be freed.
509 uint32_t liveRegs = _curAssignment.assigned(group) & ~willFree;
510
511 for (i = 0; i < count; i++) {
512 RATiedReg* tiedReg = &tiedRegs[i];
513 if (tiedReg->isUseDone()) continue;
514
515 uint32_t workId = tiedReg->workId();
516 uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
517
518 // REG/MEM: Patch register operand to memory operand if not allocated.
519 if (!rmAllocated && tiedReg->hasUseRM()) {
520 if (assignedId == RAAssignment::kPhysNone && Support::isPowerOf2(tiedReg->useRewriteMask())) {
521 RAWorkReg* workReg = workRegById(tiedReg->workId());
522 uint32_t opIndex = Support::ctz(tiedReg->useRewriteMask()) / uint32_t(sizeof(Operand) / sizeof(uint32_t));
523 uint32_t rmSize = tiedReg->rmSize();
524
525 if (rmSize <= workReg->virtReg()->virtSize()) {
526 Operand& op = node->operands()[opIndex];
527 op = _pass->workRegAsMem(workReg);
528 op.as<BaseMem>().setSize(rmSize);
529 tiedReg->_useRewriteMask = 0;
530
531 tiedReg->markUseDone();
532 usePending--;
533
534 rmAllocated = true;
535 continue;
536 }
537 }
538 }
539
540 if (!tiedReg->hasUseId()) {
541 uint32_t allocableRegs = tiedReg->allocableRegs() & ~(willFree | willUse);
542
543 // DECIDE where to assign the USE register.
544 uint32_t useId = decideOnAssignment(group, workId, assignedId, allocableRegs);
545 uint32_t useMask = Support::bitMask(useId);
546
547 willUse |= useMask;
548 willFree |= useMask & liveRegs;
549 tiedReg->setUseId(useId);
550
551 if (assignedId != RAAssignment::kPhysNone) {
552 uint32_t assignedMask = Support::bitMask(assignedId);
553
554 willFree |= assignedMask;
555 liveRegs &= ~assignedMask;
556
557 // OPTIMIZATION: Assign the USE register here if it's possible.
558 if (!(liveRegs & useMask)) {
559 ASMJIT_PROPAGATE(onMoveReg(group, workId, useId, assignedId));
560 tiedReg->markUseDone();
561 if (tiedReg->isWrite())
562 _curAssignment.makeDirty(group, workId, useId);
563 usePending--;
564 }
565 }
566 else {
567 // OPTIMIZATION: Assign the USE register here if it's possible.
568 if (!(liveRegs & useMask)) {
569 ASMJIT_PROPAGATE(onLoadReg(group, workId, useId));
570 tiedReg->markUseDone();
571 if (tiedReg->isWrite())
572 _curAssignment.makeDirty(group, workId, useId);
573 usePending--;
574 }
575 }
576
577 liveRegs |= useMask;
578 }
579 }
580 }
581
582 // Initially all used regs will be marked clobbered.
583 uint32_t clobberedByInst = willUse | willOut;
584
585 // ------------------------------------------------------------------------
586 // STEP 3:
587 //
588 // Free all registers that we marked as `willFree`. Only registers that are not
589 // USEd by the instruction are considered as we don't want to free regs we need.
590 // ------------------------------------------------------------------------
591
592 if (willFree) {
593 uint32_t allocableRegs = _availableRegs[group] & ~(_curAssignment.assigned(group) | willFree | willUse | willOut);
594 Support::BitWordIterator<uint32_t> it(willFree);
595
596 do {
597 uint32_t assignedId = it.next();
598 if (_curAssignment.isPhysAssigned(group, assignedId)) {
599 uint32_t workId = _curAssignment.physToWorkId(group, assignedId);
600
601 // DECIDE whether to MOVE or SPILL.
602 if (allocableRegs) {
603 uint32_t reassignedId = decideOnReassignment(group, workId, assignedId, allocableRegs);
604 if (reassignedId != RAAssignment::kPhysNone) {
605 ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
606 allocableRegs ^= Support::bitMask(reassignedId);
607 continue;
608 }
609 }
610
611 ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
612 }
613 } while (it.hasNext());
614 }
615
616 // ------------------------------------------------------------------------
617 // STEP 4:
618 //
619 // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't
620 // allocated yet. This is a bit complicated as the allocation is iterative.
621 // In some cases we have to wait before allocating a particual physical
622 // register as it's still occupied by some other one, which we need to move
623 // before we can use it. In this case we skip it and allocate another some
624 // other instead (making it free for another iteration).
625 //
626 // NOTE: Iterations are mostly important for complicated allocations like
627 // function calls, where there can be up to N registers used at once. Asm
628 // instructions won't run the loop more than once in 99.9% of cases as they
629 // use 2..3 registers in average.
630 // ------------------------------------------------------------------------
631
632 if (usePending) {
633 bool mustSwap = false;
634 do {
635 uint32_t oldPending = usePending;
636
637 for (i = 0; i < count; i++) {
638 RATiedReg* thisTiedReg = &tiedRegs[i];
639 if (thisTiedReg->isUseDone()) continue;
640
641 uint32_t thisWorkId = thisTiedReg->workId();
642 uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);
643
644 // This would be a bug, fatal one!
645 uint32_t targetPhysId = thisTiedReg->useId();
646 ASMJIT_ASSERT(targetPhysId != thisPhysId);
647
648 uint32_t targetWorkId = _curAssignment.physToWorkId(group, targetPhysId);
649 if (targetWorkId != RAAssignment::kWorkNone) {
650 RAWorkReg* targetWorkReg = workRegById(targetWorkId);
651
652 // Swapping two registers can solve two allocation tasks by emitting
653 // just a single instruction. However, swap is only available on few
654 // architectures and it's definitely not available for each register
655 // group. Calling `onSwapReg()` before checking these would be fatal.
656 if (_archTraits->hasSwap(group) && thisPhysId != RAAssignment::kPhysNone) {
657 ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));
658
659 thisTiedReg->markUseDone();
660 if (thisTiedReg->isWrite())
661 _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
662 usePending--;
663
664 // Double-hit.
665 RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
666 if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
667 targetTiedReg->markUseDone();
668 if (targetTiedReg->isWrite())
669 _curAssignment.makeDirty(group, targetWorkId, thisPhysId);
670 usePending--;
671 }
672 continue;
673 }
674
675 if (!mustSwap)
676 continue;
677
678 // Only branched here if the previous iteration did nothing. This is
679 // essentially a SWAP operation without having a dedicated instruction
680 // for that purpose (vector registers, etc). The simplest way to
681 // handle such case is to SPILL the target register.
682 ASMJIT_PROPAGATE(onSpillReg(group, targetWorkId, targetPhysId));
683 }
684
685 if (thisPhysId != RAAssignment::kPhysNone) {
686 ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));
687
688 thisTiedReg->markUseDone();
689 if (thisTiedReg->isWrite())
690 _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
691 usePending--;
692 }
693 else {
694 ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));
695
696 thisTiedReg->markUseDone();
697 if (thisTiedReg->isWrite())
698 _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
699 usePending--;
700 }
701 }
702
703 mustSwap = (oldPending == usePending);
704 } while (usePending);
705 }
706
707 // ------------------------------------------------------------------------
708 // STEP 5:
709 //
710 // KILL registers marked as KILL/OUT.
711 // ------------------------------------------------------------------------
712
713 uint32_t outPending = outTiedCount;
714 if (outTiedCount) {
715 for (i = 0; i < outTiedCount; i++) {
716 RATiedReg* tiedReg = outTiedRegs[i];
717
718 uint32_t workId = tiedReg->workId();
719 uint32_t physId = _curAssignment.workToPhysId(group, workId);
720
721 // Must check if it's allocated as KILL can be related to OUT (like KILL
722 // immediately after OUT, which could mean the register is not assigned).
723 if (physId != RAAssignment::kPhysNone) {
724 ASMJIT_PROPAGATE(onKillReg(group, workId, physId));
725 willOut &= ~Support::bitMask(physId);
726 }
727
728 // We still maintain number of pending registers for OUT assignment.
729 // So, if this is only KILL, not OUT, we can safely decrement it.
730 outPending -= !tiedReg->isOut();
731 }
732 }
733
734 // ------------------------------------------------------------------------
735 // STEP 6:
736 //
737 // SPILL registers that will be CLOBBERed. Since OUT and KILL were
738 // already processed this is used mostly to handle function CALLs.
739 // ------------------------------------------------------------------------
740
741 if (willOut) {
742 Support::BitWordIterator<uint32_t> it(willOut);
743 do {
744 uint32_t physId = it.next();
745 uint32_t workId = _curAssignment.physToWorkId(group, physId);
746
747 if (workId == RAAssignment::kWorkNone)
748 continue;
749
750 ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
751 } while (it.hasNext());
752 }
753
754 // ------------------------------------------------------------------------
755 // STEP 7:
756 //
757 // Duplication.
758 // ------------------------------------------------------------------------
759
760 for (i = 0; i < dupTiedCount; i++) {
761 RATiedReg* tiedReg = dupTiedRegs[i];
762 uint32_t workId = tiedReg->workId();
763 uint32_t srcId = tiedReg->useId();
764
765 Support::BitWordIterator<uint32_t> it(tiedReg->_allocableRegs);
766 while (it.hasNext()) {
767 uint32_t dstId = it.next();
768 if (dstId == srcId)
769 continue;
770 _pass->emitMove(workId, dstId, srcId);
771 }
772 }
773
774 // ------------------------------------------------------------------------
775 // STEP 8:
776 //
777 // Assign OUT registers.
778 // ------------------------------------------------------------------------
779
780 if (outPending) {
781 // Live registers, we need a separate variable (outside of `_curAssignment)
782 // to hold these because of KILLed registers. If we KILL a register here it
783 // will go out from `_curAssignment`, but we cannot assign to it in here.
784 uint32_t liveRegs = _curAssignment.assigned(group);
785
786 // Must avoid as they have been already OUTed (added during the loop).
787 uint32_t outRegs = 0;
788
789 // Must avoid as they collide with already allocated ones.
790 uint32_t avoidRegs = willUse & ~clobberedByInst;
791
792 for (i = 0; i < outTiedCount; i++) {
793 RATiedReg* tiedReg = outTiedRegs[i];
794 if (!tiedReg->isOut()) continue;
795
796 uint32_t workId = tiedReg->workId();
797 uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
798
799 if (assignedId != RAAssignment::kPhysNone)
800 ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));
801
802 uint32_t physId = tiedReg->outId();
803 if (physId == RAAssignment::kPhysNone) {
804 uint32_t allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs);
805
806 if (!(allocableRegs & ~liveRegs)) {
807 // There are no more registers, decide which one to spill.
808 uint32_t spillWorkId;
809 physId = decideOnSpillFor(group, workId, allocableRegs & liveRegs, &spillWorkId);
810 ASMJIT_PROPAGATE(onSpillReg(group, spillWorkId, physId));
811 }
812 else {
813 physId = decideOnAssignment(group, workId, RAAssignment::kPhysNone, allocableRegs & ~liveRegs);
814 }
815 }
816
817 // OUTs are CLOBBERed thus cannot be ASSIGNed right now.
818 ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));
819
820 if (!tiedReg->isKill())
821 ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));
822
823 tiedReg->setOutId(physId);
824 tiedReg->markOutDone();
825
826 outRegs |= Support::bitMask(physId);
827 liveRegs &= ~Support::bitMask(physId);
828 outPending--;
829 }
830
831 clobberedByInst |= outRegs;
832 ASMJIT_ASSERT(outPending == 0);
833 }
834
835 _clobberedRegs[group] |= clobberedByInst;
836 }
837
838 return kErrorOk;
839 }
840
spillAfterAllocation(InstNode * node)841 Error RALocalAllocator::spillAfterAllocation(InstNode* node) noexcept {
842 // This is experimental feature that would spill registers that don't have
843 // home-id and are last in this basic block. This prevents saving these regs
844 // in other basic blocks and then restoring them (mostly relevant for loops).
845 RAInst* raInst = node->passData<RAInst>();
846 uint32_t count = raInst->tiedCount();
847
848 for (uint32_t i = 0; i < count; i++) {
849 RATiedReg* tiedReg = raInst->tiedAt(i);
850 if (tiedReg->isLast()) {
851 uint32_t workId = tiedReg->workId();
852 RAWorkReg* workReg = workRegById(workId);
853 if (!workReg->hasHomeRegId()) {
854 uint32_t group = workReg->group();
855 uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
856 if (assignedId != RAAssignment::kPhysNone) {
857 _cc->_setCursor(node);
858 ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
859 }
860 }
861 }
862 }
863
864 return kErrorOk;
865 }
866
allocBranch(InstNode * node,RABlock * target,RABlock * cont)867 Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept {
868 // TODO: This should be used to make the branch allocation better.
869 DebugUtils::unused(cont);
870
871 // The cursor must point to the previous instruction for a possible instruction insertion.
872 _cc->_setCursor(node->prev());
873
874 // Use TryMode of `switchToAssignment()` if possible.
875 if (target->hasEntryAssignment()) {
876 ASMJIT_PROPAGATE(switchToAssignment(
877 target->entryPhysToWorkMap(),
878 target->entryWorkToPhysMap(),
879 target->liveIn(),
880 target->isAllocated(),
881 true));
882 }
883
884 ASMJIT_PROPAGATE(allocInst(node));
885 ASMJIT_PROPAGATE(spillRegsBeforeEntry(target));
886
887 if (target->hasEntryAssignment()) {
888 BaseNode* injectionPoint = _pass->extraBlock()->prev();
889 BaseNode* prevCursor = _cc->setCursor(injectionPoint);
890
891 _tmpAssignment.copyFrom(_curAssignment);
892 ASMJIT_PROPAGATE(switchToAssignment(
893 target->entryPhysToWorkMap(),
894 target->entryWorkToPhysMap(),
895 target->liveIn(),
896 target->isAllocated(),
897 false));
898
899 BaseNode* curCursor = _cc->cursor();
900 if (curCursor != injectionPoint) {
901 // Additional instructions emitted to switch from the current state to
902 // the `target` state. This means that we have to move these instructions
903 // into an independent code block and patch the jump location.
904 Operand& targetOp = node->op(node->opCount() - 1);
905 if (ASMJIT_UNLIKELY(!targetOp.isLabel()))
906 return DebugUtils::errored(kErrorInvalidState);
907
908 Label trampoline = _cc->newLabel();
909 Label savedTarget = targetOp.as<Label>();
910
911 // Patch `target` to point to the `trampoline` we just created.
912 targetOp = trampoline;
913
914 // Clear a possible SHORT form as we have no clue now if the SHORT form would
915 // be encodable after patching the target to `trampoline` (X86 specific).
916 node->clearInstOptions(BaseInst::kOptionShortForm);
917
918 // Finalize the switch assignment sequence.
919 ASMJIT_PROPAGATE(_pass->emitJump(savedTarget));
920 _cc->_setCursor(injectionPoint);
921 _cc->bind(trampoline);
922 }
923
924 _cc->_setCursor(prevCursor);
925 _curAssignment.swap(_tmpAssignment);
926 }
927 else {
928 ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(target, block(), _curAssignment));
929 }
930
931 return kErrorOk;
932 }
933
allocJumpTable(InstNode * node,const RABlocks & targets,RABlock * cont)934 Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets, RABlock* cont) noexcept {
935 // TODO: Do we really need to use `cont`?
936 DebugUtils::unused(cont);
937
938 if (targets.empty())
939 return DebugUtils::errored(kErrorInvalidState);
940
941 // The cursor must point to the previous instruction for a possible instruction insertion.
942 _cc->_setCursor(node->prev());
943
944 // All `targets` should have the same sharedAssignmentId, we just read the first.
945 RABlock* anyTarget = targets[0];
946 if (!anyTarget->hasSharedAssignmentId())
947 return DebugUtils::errored(kErrorInvalidState);
948
949 RASharedAssignment& sharedAssignment = _pass->_sharedAssignments[anyTarget->sharedAssignmentId()];
950
951 ASMJIT_PROPAGATE(allocInst(node));
952
953 if (!sharedAssignment.empty()) {
954 ASMJIT_PROPAGATE(switchToAssignment(
955 sharedAssignment.physToWorkMap(),
956 sharedAssignment.workToPhysMap(),
957 sharedAssignment.liveIn(),
958 true, // Read-only.
959 false // Try-mode.
960 ));
961 }
962
963 ASMJIT_PROPAGATE(spillRegsBeforeEntry(anyTarget));
964
965 if (sharedAssignment.empty()) {
966 ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(anyTarget, block(), _curAssignment));
967 }
968
969 return kErrorOk;
970 }
971
972 // ============================================================================
973 // [asmjit::RALocalAllocator - Decision Making]
974 // ============================================================================
975
decideOnAssignment(uint32_t group,uint32_t workId,uint32_t physId,uint32_t allocableRegs) const976 uint32_t RALocalAllocator::decideOnAssignment(uint32_t group, uint32_t workId, uint32_t physId, uint32_t allocableRegs) const noexcept {
977 ASMJIT_ASSERT(allocableRegs != 0);
978 DebugUtils::unused(group, physId);
979
980 RAWorkReg* workReg = workRegById(workId);
981
982 // Prefer home register id, if possible.
983 if (workReg->hasHomeRegId()) {
984 uint32_t homeId = workReg->homeRegId();
985 if (Support::bitTest(allocableRegs, homeId))
986 return homeId;
987 }
988
989 // Prefer registers used upon block entries.
990 uint32_t previouslyAssignedRegs = workReg->allocatedMask();
991 if (allocableRegs & previouslyAssignedRegs)
992 allocableRegs &= previouslyAssignedRegs;
993
994 return Support::ctz(allocableRegs);
995 }
996
decideOnReassignment(uint32_t group,uint32_t workId,uint32_t physId,uint32_t allocableRegs) const997 uint32_t RALocalAllocator::decideOnReassignment(uint32_t group, uint32_t workId, uint32_t physId, uint32_t allocableRegs) const noexcept {
998 ASMJIT_ASSERT(allocableRegs != 0);
999 DebugUtils::unused(group, physId);
1000
1001 RAWorkReg* workReg = workRegById(workId);
1002
1003 // Prefer allocating back to HomeId, if possible.
1004 if (workReg->hasHomeRegId()) {
1005 if (Support::bitTest(allocableRegs, workReg->homeRegId()))
1006 return workReg->homeRegId();
1007 }
1008
1009 // TODO: [Register Allocator] This could be improved.
1010
1011 // Decided to SPILL.
1012 return RAAssignment::kPhysNone;
1013 }
1014
decideOnSpillFor(uint32_t group,uint32_t workId,uint32_t spillableRegs,uint32_t * spillWorkId) const1015 uint32_t RALocalAllocator::decideOnSpillFor(uint32_t group, uint32_t workId, uint32_t spillableRegs, uint32_t* spillWorkId) const noexcept {
1016 // May be used in the future to decide which register would be best to spill so `workId` can be assigned.
1017 DebugUtils::unused(workId);
1018 ASMJIT_ASSERT(spillableRegs != 0);
1019
1020 Support::BitWordIterator<uint32_t> it(spillableRegs);
1021 uint32_t bestPhysId = it.next();
1022 uint32_t bestWorkId = _curAssignment.physToWorkId(group, bestPhysId);
1023
1024 // Avoid calculating the cost model if there is only one spillable register.
1025 if (it.hasNext()) {
1026 uint32_t bestCost = calculateSpillCost(group, bestWorkId, bestPhysId);
1027 do {
1028 uint32_t localPhysId = it.next();
1029 uint32_t localWorkId = _curAssignment.physToWorkId(group, localPhysId);
1030 uint32_t localCost = calculateSpillCost(group, localWorkId, localPhysId);
1031
1032 if (localCost < bestCost) {
1033 bestCost = localCost;
1034 bestPhysId = localPhysId;
1035 bestWorkId = localWorkId;
1036 }
1037 } while (it.hasNext());
1038 }
1039
1040 *spillWorkId = bestWorkId;
1041 return bestPhysId;
1042 }
1043
1044 ASMJIT_END_NAMESPACE
1045
1046 #endif // !ASMJIT_NO_COMPILER
1047