1(*
2    Copyright David C. J. Matthews 2016-21
3
4    This library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public
6    License version 2.1 as published by the Free Software Foundation.
7
8    This library is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11    Lesser General Public License for more details.
12
13    You should have received a copy of the GNU Lesser General Public
14    License along with this library; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16*)
17
18functor X86AllocateRegisters(
19    structure ICODE: ICodeSig
20    structure IDENTIFY: X86IDENTIFYREFSSIG
21    structure CONFLICTSETS: X86GETCONFLICTSETSIG
22    structure INTSET: INTSETSIG
23
24    sharing ICODE.Sharing = IDENTIFY.Sharing = CONFLICTSETS.Sharing = INTSET
25): X86ALLOCATEREGISTERSSIG =
26struct
27    open ICODE
28    open IDENTIFY
29    open CONFLICTSETS
30    open INTSET
31
32    open Address
33
34    exception InternalError = Misc.InternalError
35
36    local
37        val regs =
38            case targetArch of
39                Native32Bit     => [edi, esi, edx, ecx, ebx, eax]
40            |   Native64Bit     => [edi, esi, edx, ecx, ebx, eax, r14, r13, r12, r11, r10, r9, r8]
41            |   ObjectId32Bit   => [edi, esi, edx, ecx, eax, r14, r13, r12, r11, r10, r9, r8]
42    in
43        val generalRegisters = List.map GenReg regs
44    end
45
46    val floatingPtRegisters =
47        case fpMode of
48            (* XMM0-5 are the only volatile SSE2 registers in Windows X64. *)
49            FPModeSSE2 => List.map XMMReg [xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
50            (* We can't include fp7 because we need one spare. *)
51            (* For the moment we only have FP0 here.  There are problems with using the
52               others because we need to ensure the stack is empty if we call any
53               non-ML function and we don't currently manage it properly. *)
54        |   FPModeX87 => List.map FPReg [fp0(*, fp1, fp2, fp3, fp4, fp5, fp6*)]
55
56    datatype allocateResult =
57        AllocateSuccess of reg vector
58    |   AllocateFailure of intSet list
59
60    fun allocateRegisters{blocks, regStates, regProps, ...} =
61    let
62        val maxPRegs = Vector.length regStates
63        and numBlocks = Vector.length blocks
64
65        (* Hint values.  The idea of hints is that by using a hinted register
66           we may avoid an unnecessary move instruction.  realHints is set when
67           a pseudo-register is going to be loaded from a specific register
68           e.g. a register argument, or moved into one e.g. ecx for a shift.
69           friends is set to the other pseudo-registers that may be associated
70           with the pseudo-register.  E.g. the argument and destination of
71           an arithmetic operation where choosing the same register for
72           each may avoid a move. *)
73        val realHints = Array.array(maxPRegs, NONE: reg option)
74
75        (* Sources and destinations.  These indicate the registers that are
76           the sources and destinations of the indexing register and are used
77           as hints.  If a register has been allocated for a source or destination
78           we may be able to reuse it.  *)
79        val sourceRegs = Array.array(maxPRegs, []: int list)
80        and destinationRegs = Array.array(maxPRegs, []: int list)
81
82        local
83            (* Turn cached locations into register arguments. *)
84            fun decache(StackLocation{cache=SOME r, ...}) = RegisterArgument r
85            |   decache(MemoryLocation{cache=SOME r, ...}) = RegisterArgument r
86            |   decache arg = arg
87
88            fun addRealHint(r, reg) =
89                case Array.sub(realHints, r) of
90                    NONE => Array.update(realHints, r, SOME reg)
91                |   SOME _ => ()
92
93            fun addSourceAndDestinationHint{src, dst} =
94            let
95                val {conflicts, ...} = Vector.sub(regStates, src)
96            in
97                (* If they conflict we can't add them. *)
98                if member(dst, conflicts)
99                then ()
100                else
101                let
102                    val currentDests = Array.sub(destinationRegs, src)
103                    val currentSources = Array.sub(sourceRegs, dst)
104                in
105                    if List.exists(fn i => i=dst) currentDests
106                    then ()
107                    else Array.update(destinationRegs, src, dst :: currentDests);
108                    if List.exists(fn i => i=src) currentSources
109                    then ()
110                    else Array.update(sourceRegs, dst, src :: currentSources)
111                end
112            end
113
114        in
115            (* Add the hints to steer the register allocation.  The idea is to avoid moves between
116               registers by getting values into the appropriate register in advance. *)
117            fun addHints{instr=LoadArgument{source, dest=PReg dreg, ...}, ...} =
118                (
119                    case decache source of
120                        RegisterArgument(PReg sreg) => addSourceAndDestinationHint {src=sreg, dst=dreg}
121                    |   _ => ()
122                )
123
124            |   addHints{instr=StoreArgument{ source, kind, ... }, ...} =
125                (
126                    case (decache source, kind, targetArch) of
127                        (* Special case for byte register on X86/32 *)
128                        (RegisterArgument(PReg sReg), MoveByte, Native32Bit) => addRealHint(sReg, GenReg ecx)
129                    |   _ => ()
130                )
131
132            |   addHints{instr=BeginFunction{regArgs, ...}, ...} =
133                    List.app (fn (PReg pr, reg) => addRealHint(pr, reg)) regArgs
134
135            |   addHints{instr=TailRecursiveCall{regArgs, ...}, ...} =
136                    List.app (fn (arg, reg) => case decache arg of RegisterArgument(PReg pr) => addRealHint(pr, reg) | _ => ()) regArgs
137
138            |   addHints{instr=FunctionCall{regArgs, dest=PReg dreg, realDest, ...}, ...} =
139                (
140                    addRealHint(dreg, realDest);
141                    List.app (fn (arg, reg) => case decache arg of RegisterArgument(PReg pr) => addRealHint(pr, reg) | _ => ()) regArgs
142                )
143
144            |   addHints{instr=InitialiseMem{size=PReg sReg, addr=PReg aReg, init=PReg iReg}, ...} =
145                    (addRealHint(aReg, GenReg edi); addRealHint(iReg, GenReg eax); addRealHint(sReg, GenReg ecx))
146
147            |   addHints{instr=JumpLoop{regArgs, ...}, ...} =
148                let
149                    fun addRegArg (arg, PReg resReg) =
150                        case decache arg of
151                            RegisterArgument(PReg argReg) => addSourceAndDestinationHint {dst=resReg, src=argReg}
152                        |   _ => ()
153                in
154                    List.app addRegArg regArgs
155                end
156
157            |   addHints{instr=RaiseExceptionPacket{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg eax)
158
159            |   addHints{instr=BeginHandler{packetReg=PReg pReg, workReg=_}, ...} =
160                    (* The exception packet is in rax. *) addRealHint(pReg, GenReg eax)
161
162            |   addHints{instr=ReturnResultFromFunction { resultReg=PReg resReg, realReg, ... }, ...} = addRealHint(resReg, realReg)
163
164            |   addHints{instr=ArithmeticFunction{oper=SUB, resultReg=PReg resReg, operand1=PReg op1Reg, ...}, ...} =
165                    (* Can only be one way round. *)
166                    addSourceAndDestinationHint {dst=resReg, src=op1Reg}
167
168            |   addHints{instr=ArithmeticFunction{resultReg=PReg resReg, operand1=PReg op1Reg, operand2, ...}, ...} =
169                (
170                    addSourceAndDestinationHint {dst=resReg, src=op1Reg};
171                    case decache operand2 of
172                        RegisterArgument(PReg op2Reg) =>
173                            addSourceAndDestinationHint {dst=resReg, src=op2Reg}
174                    |   _ => ()
175                )
176
177            |   addHints{instr=CopyToCache{source=PReg sreg, dest=PReg dreg, ...}, ...} =
178                    addSourceAndDestinationHint {src=sreg, dst=dreg}
179
180            |   addHints{instr=UntagValue{source=PReg sReg, dest=PReg dReg, ...}, ...} =
181                    addSourceAndDestinationHint{src=sReg, dst=dReg}
182
183            |   addHints{instr=ShiftOperation{resultReg=PReg resReg, operand=PReg operReg, shiftAmount=IntegerConstant _, ...}, ...} =
184                    addSourceAndDestinationHint{dst=resReg, src=operReg}
185
186            |   addHints{instr=ShiftOperation{resultReg=PReg resReg, operand=PReg operReg,
187                                            shiftAmount=RegisterArgument(PReg shiftReg), ...}, ...} =
188                    (addSourceAndDestinationHint{dst=resReg, src=operReg}; addRealHint(shiftReg, GenReg ecx))
189
190            |   addHints{instr=Multiplication{resultReg=PReg resReg, operand1=PReg op1Reg, operand2, ...}, ...} =
191                (
192                    addSourceAndDestinationHint{dst=resReg, src=op1Reg};
193                    case decache operand2 of
194                        RegisterArgument(PReg op2Reg) =>
195                            addSourceAndDestinationHint {dst=resReg, src=op2Reg}
196                    |   _ => ()
197                )
198
199            |   addHints{instr=Division{dividend=PReg regDivid, quotient=PReg regQuot, remainder=PReg regRem, ...}, ...} =
200                    (addRealHint(regDivid, GenReg eax); addRealHint(regQuot, GenReg eax); addRealHint(regRem, GenReg edx))
201
202            |   addHints{instr=CompareByteVectors{vec1Addr=PReg v1Reg, vec2Addr=PReg v2Reg, length=PReg lReg, ...}, ...} =
203                    (addRealHint(v1Reg, GenReg esi); addRealHint(v2Reg, GenReg edi); addRealHint(lReg, GenReg ecx))
204
205            |   addHints{instr=BlockMove{srcAddr=PReg sReg, destAddr=PReg dReg, length=PReg lReg, ...}, ...} =
206                    (addRealHint(sReg, GenReg esi); addRealHint(dReg, GenReg edi); addRealHint(lReg, GenReg ecx))
207
208            |   addHints{instr=X87FPGetCondition{dest=PReg dReg, ...}, ...} = addRealHint(dReg, GenReg eax)
209
210            |   addHints{instr=X87FPArith{resultReg=PReg resReg, arg1=PReg op1Reg, ...}, ...} =
211                    addSourceAndDestinationHint{dst=resReg, src=op1Reg}
212
213            |   addHints{instr=X87FPUnaryOps{dest=PReg resReg, source=PReg op1Reg, ...}, ...} =
214                    addSourceAndDestinationHint{dst=resReg, src=op1Reg}
215
216            |   addHints{instr=SSE2FPBinary{resultReg=PReg resReg, arg1=PReg op1Reg, ...}, ...} =
217                    addSourceAndDestinationHint{dst=resReg, src=op1Reg}
218
219            |   addHints{instr=AtomicExchangeAndAdd{resultReg=PReg resReg, source=PReg op1Reg, ...}, ...} =
220                    addSourceAndDestinationHint{dst=resReg, src=op1Reg}
221
222            |   addHints _ = ()
223
224        end
225
226        val allocatedRegs = Array.array(maxPRegs, NONE: reg option)
227        val failures = ref []: intSet list ref
228
229        (* Find a real register for a preg.
230           1.  If a register is already allocated use that.
231           2.  Try the "preferred" register if one has been given.
232           3.  Try the realHints value if there is one.
233           4.  See if there is a "friend" that has an appropriate register
234           5.  Look at all the registers and find one. *)
235        fun findRegister(r, pref, regSet) =
236        case Array.sub(allocatedRegs, r) of
237            SOME reg => reg
238        |   NONE =>
239            let
240                val {conflicts, realConflicts, ...} = Vector.sub(regStates, r)
241                (* Find the registers we've already allocated that may conflict. *)
242                val conflictingRegs =
243                    List.mapPartial(fn i => Array.sub(allocatedRegs, i)) (setToList conflicts) @
244                        realConflicts
245
246                fun isFree aReg = not (List.exists(fn i => i=aReg) conflictingRegs)
247
248                fun tryAReg NONE = NONE
249                |   tryAReg (somePref as SOME prefReg) =
250                        if isFree prefReg
251                        then (Array.update(allocatedRegs, r, somePref); somePref)
252                        else NONE
253
254                fun findAReg [] =
255                    (
256                        (* This failed.  We're going to have to spill something. *)
257                        failures := conflicts :: ! failures;
258                        hd regSet (* Return a register to satisfy everything. *)
259                    )
260                |   findAReg (reg::regs) =
261                        if isFree reg then (Array.update(allocatedRegs, r, SOME reg); reg)
262                        else findAReg regs
263
264
265                (* Search the sources and destinations to see if a register has
266                   already been allocated or there is a hint. *)
267                fun findAFriend([], [], _) = NONE
268
269                |   findAFriend(aDest :: otherDests, sources, alreadySeen) =
270                    let
271                        val possReg =
272                            case Array.sub(allocatedRegs, aDest) of
273                                v as SOME _ => tryAReg v
274                            |   NONE => tryAReg(Array.sub(realHints, aDest))
275                    in
276                        case possReg of
277                            reg as SOME _ => reg
278                        |   NONE =>
279                            let
280                                (* Add the destinations of the destinations to the list
281                                   if they don't conflict and haven't been seen. *)
282                                fun newFriend f =
283                                    not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts))
284                                val fOfF = List.filter newFriend (Array.sub(destinationRegs, aDest))
285                            in
286                                findAFriend(otherDests @ fOfF, sources, aDest :: alreadySeen)
287                            end
288                    end
289
290                |   findAFriend([], aSrc :: otherSrcs, alreadySeen) =
291                    let
292                        val possReg =
293                            case Array.sub(allocatedRegs, aSrc) of
294                                v as SOME _ => tryAReg v
295                            |   NONE => tryAReg(Array.sub(realHints, aSrc))
296                    in
297                        case possReg of
298                            reg as SOME _ => reg
299                        |   NONE =>
300                            let
301                                (* Add the sources of the sources to the list
302                                   if they don't conflict and haven't been seen. *)
303                                fun newFriend f =
304                                    not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts))
305                                val fOfF = List.filter newFriend (Array.sub(sourceRegs, aSrc))
306                            in
307                                findAFriend([], otherSrcs @ fOfF, aSrc :: alreadySeen)
308                            end
309                    end
310
311                (* See if there is a friend that has a register already or a
312                   hint.  Friends are registers that don't conflict and can
313                   possibly avoid an extra move. *)
314(*                    fun findAFriend([], _) = NONE
315                |   findAFriend(friend :: tail, old) =
316                    let
317                        val possReg =
318                            case Array.sub(allocatedRegs, friend) of
319                                v as SOME _ => tryAReg v
320                            |   NONE => tryAReg(Array.sub(realHints, friend))
321                    in
322                        case possReg of
323                            reg as SOME _ => reg
324                        |   NONE =>
325                            let
326                                (* Add a friend of a friend to the list if we haven't already
327                                   seen it and it doesn't conflict. *)
328                                fun newFriend f =
329                                    not(List.exists (fn n => n=f) old) andalso
330                                        not(List.exists (fn n => n=f) conflicts)
331                                val fOfF = List.filter newFriend (Array.sub(friends, friend))
332                            in
333                                findAFriend(tail @ fOfF, friend :: old)
334                            end
335                    end*)
336            in
337                case tryAReg pref of
338                    SOME r => r
339                |   NONE =>
340                    (
341                        case tryAReg (Array.sub(realHints, r)) of
342                            SOME r => r
343                        |   NONE =>
344                            (
345                                case findAFriend(Array.sub(destinationRegs, r), Array.sub(sourceRegs, r), []) of
346                                    SOME r => r
347                                    (* Look through the registers to find one that's free. *)
348                                |   NONE => findAReg regSet
349                            )
350                    )
351            end
352
353        fun allocateRegister args = ignore(findRegister args)
354
355        fun allocateGeneralReg r = allocateRegister(r, NONE, generalRegisters)
356        and allocateFloatReg r = allocateRegister(r, NONE, floatingPtRegisters)
357
358        fun allocateArgument(RegisterArgument(PReg r), regSet) = allocateRegister(r, NONE, regSet)
359        |   allocateArgument(MemoryLocation{base=PReg bReg, index, cache=NONE, ...}, _) = (allocateGeneralReg bReg; allocateArgIndex index)
360            (* Unfortunately we still have to allocate a register for the base even if we're going to use the cache.
361               That's because the conflict sets are based on the assumption that the registers are allocated at the
362               last occurrence (first when working from the end back) and it uses getInstructionRegisters which in turn
363               uses argRegs which returns both the base and the cache.  GetConflictSets could use a different version
364               but we also have to take account of save registers in e.g. AllocateMemoryOperation.  If we
365               don't allocate a register because it's not needed at some point it shouldn't be allocated
366               for the save set. *)
367        |   allocateArgument(MemoryLocation{cache=SOME(PReg r), base=PReg bReg, index, ...}, regSet) =
368                (allocateGeneralReg bReg; allocateArgIndex index; allocateRegister(r, NONE, regSet))
369        |   allocateArgument(StackLocation{cache=SOME(PReg r), ...}, regSet) = allocateRegister(r, NONE, regSet)
370        |   allocateArgument _ = ()
371
372        and allocateArgGeneral arg = allocateArgument(arg, generalRegisters)
373        and allocateArgFloat arg = allocateArgument(arg, floatingPtRegisters)
374
375        and allocateArgIndex NoMemIndex = ()
376        |   allocateArgIndex(MemIndex1(PReg r)) = allocateGeneralReg r
377        |   allocateArgIndex(MemIndex2(PReg r)) = allocateGeneralReg r
378        |   allocateArgIndex(MemIndex4(PReg r)) = allocateGeneralReg r
379        |   allocateArgIndex(MemIndex8(PReg r)) = allocateGeneralReg r
380        |   allocateArgIndex ObjectIndex = ()
381
382        (* Return the register part of a cached item.  We must still, unfortunately, ensure that a register is
383           allocated for base registers because we're assuming that a register is allocated on the last
384           occurrence and this might be it. *)
385        fun decache(StackLocation{cache=SOME r, ...}) = RegisterArgument r
386        |   decache(MemoryLocation{cache=SOME r, base=PReg bReg, ...}) =
387                (allocateGeneralReg bReg; RegisterArgument r)
388        |   decache arg = arg
389
390        val allocateFindRegister = findRegister
391
392        fun registerAllocate({instr=LoadArgument{source, dest=PReg dreg, kind}, ...}) =
393            let
394                val regSet =
395                    case kind of
396                        MoveFloat => floatingPtRegisters
397                    |   MoveDouble => floatingPtRegisters
398                    |   _ => generalRegisters
399                val realDestReg = findRegister(dreg, NONE, regSet)
400            in
401                (* We previously used decache here but that has the disadvantage that it
402                   may allocate the destination register as the base register resulting in
403                   it not being available as the cache register. *)
404                case source of
405                    RegisterArgument(PReg sreg) => allocateRegister(sreg, SOME realDestReg, regSet)
406
407                |   StackLocation{cache=SOME(PReg sreg), ...} =>
408                        allocateRegister(sreg, SOME realDestReg, regSet)
409
410                |   MemoryLocation{cache=SOME(PReg sreg), base=PReg bReg, ...} =>
411                    (
412                        (* Cached source.  Allocate this first. *)
413                        allocateRegister(sreg, SOME realDestReg, regSet);
414                        (* We need to allocate a register but do it afterwards. *)
415                        allocateGeneralReg bReg
416                    )
417
418                |   source => allocateArgument(source, regSet)
419            end
420
421        |   registerAllocate({instr=StoreArgument{ source, base=PReg bReg, index, kind, ... }, ...}) =
422            (
423                case (decache source, kind) of
424                    (RegisterArgument(PReg sReg), MoveByte) =>
425                    if targetArch <> Native32Bit
426                    then (allocateArgGeneral source; allocateGeneralReg bReg; allocateArgIndex index)
427                    else
428                    (* This is complicated on X86/32.  We can't use edi or esi for the store registers.  Instead
429                       we reserve ecx (see special case in "identify") and use that if we have to. *)
430                    (
431                        allocateRegister(sReg, SOME(GenReg ecx), generalRegisters);
432                        allocateGeneralReg bReg; allocateArgIndex index
433                    )
434
435                |   _ =>
436                    let
437                        val regSet =
438                            case kind of MoveFloat => floatingPtRegisters | MoveDouble => floatingPtRegisters | _ => generalRegisters
439                    in
440                        allocateArgument(source, regSet);
441                        allocateGeneralReg bReg;
442                        allocateArgIndex index
443                    end
444            )
445
446        |   registerAllocate{instr=LoadMemReg { dest=PReg pr, ...}, ...} = allocateGeneralReg pr
447
448        |   registerAllocate{instr=StoreMemReg { source=PReg pr, ...}, ...} = allocateGeneralReg pr
449
450        |   registerAllocate{instr=BeginFunction _, ...} = ()
451            (* Any registers that are referenced will have been allocated real registers. *)
452
453        |   registerAllocate({instr=TailRecursiveCall{regArgs=oRegArgs, stackArgs=oStackArgs, workReg=PReg wReg, ...}, ...}) =
454            let
455                val regArgs = List.map (fn (arg, reg) => (decache arg, reg)) oRegArgs
456                and stackArgs = List.map(fn {src, stack } => {src=decache src, stack=stack}) oStackArgs
457                fun allocateRegArg(argReg, GenReg _) = allocateArgGeneral argReg
458                |   allocateRegArg(argReg, XMMReg _) = allocateArgument(argReg, floatingPtRegisters)
459                |   allocateRegArg(_, FPReg _) = raise InternalError "allocateRegArg" (* Never used. *)
460            in
461                allocateGeneralReg wReg;
462                List.app (allocateArgGeneral o #src) stackArgs;
463                (* We've already hinted the arguments. *)
464                List.app allocateRegArg regArgs
465            end
466
467        |   registerAllocate({instr=FunctionCall{regArgs=oRegArgs, stackArgs=oStackArgs, dest=PReg dReg, realDest, saveRegs, ...}, ...}) =
468            let
469                val regArgs = List.map (fn (arg, reg) => (decache arg, reg)) oRegArgs
470                and stackArgs = List.map decache oStackArgs
471                fun allocateRegArg(argReg, GenReg _) = allocateArgGeneral argReg
472                |   allocateRegArg(argReg, XMMReg _) = allocateArgument(argReg, floatingPtRegisters)
473                |   allocateRegArg(_, FPReg _) = raise InternalError "allocateRegArg" (* Never used. *)
474            in
475                List.app(fn (PReg r) => allocateGeneralReg r) saveRegs;
476                (* Result will be in rax/fp0/xmm0. *)
477                allocateRegister(dReg, SOME realDest, [realDest]);
478                List.app allocateArgGeneral stackArgs;
479                (* We've already hinted the arguments. *)
480                List.app allocateRegArg regArgs
481            end
482
483        |   registerAllocate({instr=AllocateMemoryOperation{ dest=PReg dReg, saveRegs, ...}, ...}) =
484            (
485                List.app(fn (PReg r) => allocateGeneralReg r) saveRegs;
486                allocateGeneralReg dReg
487            )
488
489        |   registerAllocate({instr=AllocateMemoryVariable{size=PReg sReg, dest=PReg dReg, saveRegs}, ...}) =
490            (
491                List.app(fn (PReg r) => allocateGeneralReg r) saveRegs;
492                allocateGeneralReg dReg;
493                allocateGeneralReg sReg
494            )
495
496        |   registerAllocate({instr=InitialiseMem{size=PReg sReg, addr=PReg aReg, init=PReg iReg}, ...}) =
497            (
498                (* We are going to use rep stosl/q to set the memory.
499                   That requires the length to be in ecx, the initialiser to be in eax and
500                   the destination to be edi. *)
501                allocateRegister(aReg, SOME(GenReg edi), generalRegisters);
502                allocateRegister(iReg, SOME(GenReg eax), generalRegisters);
503                allocateRegister(sReg, SOME(GenReg ecx), generalRegisters)
504            )
505
506        |   registerAllocate{instr=InitialisationComplete, ...} = ()
507
508        |   registerAllocate{instr=BeginLoop, ...} = ()
509
510        |   registerAllocate({instr=JumpLoop{regArgs, stackArgs, checkInterrupt, workReg}, ...}) =
511            (
512                case workReg of SOME(PReg r) => allocateGeneralReg r | NONE => ();
513                List.app (fn (src, _, _) => allocateArgGeneral src) stackArgs;
514                List.app (fn (a, PReg r) => (allocateArgGeneral a; allocateGeneralReg r)) regArgs;
515                case checkInterrupt of SOME regs => List.app(fn PReg r => allocateGeneralReg r) regs | NONE => ()
516            )
517
518        |   registerAllocate({instr=RaiseExceptionPacket{ packetReg=PReg preg }, ...}) =
519                (* The argument must be put into rax. *)
520                allocateRegister(preg, SOME(GenReg eax), generalRegisters)
521
522        |   registerAllocate{instr=ReserveContainer _, ...} = ()
523
524        |   registerAllocate({instr=IndexedCaseOperation{testReg=PReg tReg, workReg=PReg wReg}, ...}) =
525            (
526                allocateRegister(tReg, NONE, generalRegisters);
527                allocateRegister(wReg, NONE, generalRegisters)
528            )
529
530        |   registerAllocate({instr=LockMutable{addr=PReg pr}, ...}) = allocateRegister(pr, NONE, generalRegisters)
531
532        |   registerAllocate({instr=WordComparison{ arg1=PReg arg1Reg, arg2, ... }, ...}) =
533            (
534                allocateRegister(arg1Reg, NONE, generalRegisters);
535                allocateArgGeneral arg2
536            )
537
538        |   registerAllocate({instr=CompareLiteral{ arg1, ... }, ...}) = allocateArgGeneral arg1
539
540        |   registerAllocate({instr=CompareByteMem{ arg1={base=PReg bReg, index, ...}, ...}, ...}) =
541                (allocateGeneralReg bReg; allocateArgIndex index)
542
543            (* Set up an exception handler. *)
544        |   registerAllocate({instr=PushExceptionHandler{workReg=PReg hReg}, ...}) = allocateGeneralReg hReg
545
546            (* Pop an exception handler at the end of a handled section.  Executed if no exception has been raised.
547               This removes items from the stack. *)
548        |   registerAllocate({instr=PopExceptionHandler{workReg=PReg wReg, ...}, ...}) = allocateGeneralReg wReg
549
550            (* Start of a handler.  Sets the address associated with PushExceptionHandler and
551               provides a register for the packet.*)
552        |   registerAllocate({instr=BeginHandler{packetReg=PReg pReg, workReg=PReg wReg}, ...}) =
553            (
554                (* The exception packet is in rax. *)
555                allocateRegister(pReg, SOME(GenReg eax), generalRegisters);
556                allocateGeneralReg wReg
557            )
558
559        |   registerAllocate({instr=ReturnResultFromFunction { resultReg=PReg resReg, realReg, ... }, ...}) =
560                allocateRegister(resReg, SOME realReg, [realReg] (* It MUST be in this register *))
561
562        |   registerAllocate{instr=ArithmeticFunction{oper=SUB, resultReg=PReg resReg, operand1=PReg op1Reg,
563                                            operand2, ...}, ...} =
564            (* Subtraction  - Unlike the other arithmetic operations we can't put the second
565               argument into the result register and then do the operation. *)
566            let
567                val realDestReg = findRegister(resReg, NONE, generalRegisters)
568                (* Try to put the argument into the same register as the result. *)
569            in
570                allocateRegister(op1Reg, SOME realDestReg, generalRegisters);
571                allocateArgGeneral operand2
572            end
573
574        |   registerAllocate({instr=ArithmeticFunction{resultReg=PReg resReg, operand1=PReg op1Reg, operand2, ...}, ...}) =
575            let
576                val realDestReg = findRegister(resReg, NONE, generalRegisters)
577                val () = allocateRegister(op1Reg, SOME realDestReg, generalRegisters)
578            in
579                case decache operand2 of
580                    RegisterArgument(PReg op2Reg) =>
581                    (* Arithmetic operation with both arguments as registers.  These operations are all symmetric so
582                       we can try to put either argument into the result reg and then do the operation on the other arg. *)
583                        allocateRegister(op2Reg, SOME realDestReg, generalRegisters)
584                |   operand2 => allocateArgGeneral operand2
585            end
586
587        |   registerAllocate({instr=TestTagBit{arg, ...}, ...}) = allocateArgGeneral arg
588
589        |   registerAllocate({instr=PushValue {arg, ...}, ...}) = allocateArgGeneral arg
590
591        |   registerAllocate({instr=CopyToCache{source=PReg sreg, dest=PReg dreg, kind}, ...}) =
592            let
593                val regSet =
594                    case kind of
595                        MoveFloat => floatingPtRegisters
596                    |   MoveDouble => floatingPtRegisters
597                    |   _ => generalRegisters
598                val realDestReg = findRegister(dreg, NONE, regSet)
599            in
600                (* Get the source register using the current destination as a preference. *)
601                allocateRegister(sreg, SOME realDestReg, regSet)
602            end
603
604        |   registerAllocate({instr=ResetStackPtr _, ...}) = ()
605
606        |   registerAllocate({instr=StoreToStack{ source, ... }, ...}) = allocateArgument(source, generalRegisters)
607
608        |   registerAllocate({instr=TagValue{source=PReg srcReg, dest=PReg dReg, ...}, ...}) =
609            (
610                (* Since we're using LEA to tag there's no cost to using a different reg. *)
611                allocateRegister(dReg, NONE, generalRegisters);
612                allocateRegister(srcReg, NONE, generalRegisters)
613            )
614
615        |   registerAllocate({instr=UntagValue{source=PReg sReg, dest=PReg dReg, cache, ...}, ...}) =
616            let
617                val regResult = findRegister(dReg, NONE, generalRegisters)
618                val () =
619                    case cache of
620                        SOME(PReg cReg) => allocateRegister(cReg, SOME regResult, generalRegisters)
621                    |   NONE => ()
622            in
623                allocateRegister(sReg, SOME regResult, generalRegisters)
624            end
625
626        |   registerAllocate({instr=LoadEffectiveAddress{base, index, dest=PReg dReg, ...}, ...}) =
627            (
628                allocateGeneralReg dReg;
629                case base of SOME(PReg br) => allocateGeneralReg br | _ => ();
630                allocateArgIndex index
631            )
632
633        |   registerAllocate({instr=ShiftOperation{resultReg=PReg resReg, operand=PReg operReg, shiftAmount=IntegerConstant _, ...}, ...}) =
634            let
635                val realDestReg = findRegister(resReg, NONE, generalRegisters)
636            in
637                allocateRegister(operReg, SOME realDestReg, generalRegisters)
638            end
639
640        |   registerAllocate({instr=ShiftOperation{resultReg=PReg resReg, operand=PReg operReg,
641                                        shiftAmount=RegisterArgument(PReg shiftReg), ...}, ...}) =
642            let
643                val realDestReg = findRegister(resReg, NONE, generalRegisters)
644            in
645                allocateRegister(shiftReg, SOME(GenReg ecx), generalRegisters);
646                allocateRegister(operReg, SOME realDestReg, generalRegisters)
647            end
648
649        |   registerAllocate{instr=ShiftOperation _, ...} = raise InternalError "registerAllocate - ShiftOperation"
650
651        |   registerAllocate({instr=
652                Multiplication{resultReg=PReg resReg, operand1=PReg op1Reg,
653                               operand2, ...}, ...}) =
654            let
655                val realDestReg = findRegister(resReg, NONE, generalRegisters)
656                val () = allocateRegister(op1Reg, SOME realDestReg, generalRegisters)
657            in
658                case decache operand2 of
659                    RegisterArgument(PReg op2Reg) =>
660                        (* Treat exactly the same as ArithmeticFunction. *)
661                        allocateRegister(op2Reg, SOME realDestReg, generalRegisters)
662                |   operand2 => allocateArgGeneral operand2
663            end
664
665        |   registerAllocate({instr=Division{dividend=PReg regDivid, divisor, quotient=PReg regQuot,
666                                  remainder=PReg regRem, ...}, ...}) =
667            (
668                (* Division is specific as to the registers.  The dividend must be eax, quotient is
669                   eax and the remainder is edx. *)
670                allocateRegister(regDivid, SOME(GenReg eax), generalRegisters);
671                allocateRegister(regQuot, SOME(GenReg eax), generalRegisters);
672                allocateRegister(regRem, SOME(GenReg edx), generalRegisters);
673                allocateArgGeneral divisor
674            )
675
676        |   registerAllocate({instr=AtomicExchangeAndAdd{base=PReg bReg, source=PReg sReg, resultReg=PReg rReg}, ...}) =
677                (allocateGeneralReg sReg; allocateGeneralReg bReg; allocateGeneralReg rReg)
678
679        |   registerAllocate({instr=BoxValue{boxKind, source=PReg sReg, dest=PReg dReg, saveRegs}, ...}) =
680            (
681                List.app(fn (PReg r) => allocateGeneralReg r) saveRegs;
682                case boxKind of
683                    BoxLargeWord => allocateGeneralReg sReg
684                |   BoxX87Double => allocateFloatReg sReg
685                |   BoxX87Float => allocateFloatReg sReg
686                |   BoxSSE2Float => allocateFloatReg sReg
687                |   BoxSSE2Double => allocateFloatReg sReg;
688                allocateGeneralReg dReg
689            )
690
691        |   registerAllocate({instr=CompareByteVectors{vec1Addr=PReg v1Reg, vec2Addr=PReg v2Reg, length=PReg lReg, ...}, ...}) =
692            (
693                allocateRegister(v1Reg, SOME(GenReg esi), generalRegisters);
694                allocateRegister(v2Reg, SOME(GenReg edi), generalRegisters);
695                allocateRegister(lReg, SOME(GenReg ecx), generalRegisters)
696            )
697
698        |   registerAllocate({instr=BlockMove{srcAddr=PReg sReg, destAddr=PReg dReg, length=PReg lReg, ...}, ...}) =
699            (
700                allocateRegister(sReg, SOME(GenReg esi), generalRegisters);
701                allocateRegister(dReg, SOME(GenReg edi), generalRegisters);
702                allocateRegister(lReg, SOME(GenReg ecx), generalRegisters)
703            )
704
705        |   registerAllocate{instr=X87Compare{arg1=PReg arg1Reg, arg2, ...}, ...} =
706                (allocateRegister(arg1Reg, NONE, floatingPtRegisters); allocateArgFloat arg2)
707
708        |   registerAllocate{instr=SSE2Compare{arg1=PReg arg1Reg, arg2, ...}, ...} =
709                (allocateRegister(arg1Reg, NONE, floatingPtRegisters); allocateArgFloat arg2)
710
711        |   registerAllocate({instr=X87FPGetCondition{dest=PReg dReg, ...}, ...}) =
712                (* We can only use RAX here. *)
713                allocateRegister(dReg, SOME(GenReg eax), generalRegisters)
714
715        |   registerAllocate({instr=X87FPArith{resultReg=PReg resReg, arg1=PReg op1Reg, arg2, ...}, ...}) =
716            let
717                val realDestReg = findRegister(resReg, NONE, floatingPtRegisters)
718            in
719                allocateRegister(op1Reg, SOME realDestReg, floatingPtRegisters);
720                allocateArgFloat arg2
721            end
722
723        |   registerAllocate({instr=X87FPUnaryOps{dest=PReg resReg, source=PReg op1Reg, ...}, ...}) =
724            let
725                val realDestReg = findRegister(resReg, NONE, floatingPtRegisters)
726            in
727                allocateRegister(op1Reg, SOME realDestReg, floatingPtRegisters)
728            end
729
730        |   registerAllocate({instr=X87Float{dest=PReg resReg, source}, ...}) =
731                (allocateArgGeneral source; allocateRegister(resReg, NONE, floatingPtRegisters))
732
733        |   registerAllocate({instr=SSE2Float{dest=PReg resReg, source}, ...}) =
734                (allocateArgGeneral source; allocateRegister(resReg, NONE, floatingPtRegisters))
735
736        |   registerAllocate({instr=SSE2FPUnary{resultReg=PReg resReg, source, ...}, ...}) =
737            (
738                allocateRegister(resReg, NONE, floatingPtRegisters);
739                allocateArgFloat source
740            )
741
742        |   registerAllocate({instr=SSE2FPBinary{resultReg=PReg resReg, arg1=PReg op1Reg, arg2, ...}, ...}) =
743            let
744                val realDestReg = findRegister(resReg, NONE, floatingPtRegisters)
745            in
746                allocateRegister(op1Reg, SOME realDestReg, floatingPtRegisters);
747                allocateArgFloat arg2
748            end
749
750        |   registerAllocate({instr=TagFloat{dest=PReg resReg, source=PReg sReg, ...}, ...}) =
751            (
752                allocateRegister(resReg, NONE, generalRegisters);
753                allocateRegister(sReg, NONE, floatingPtRegisters)
754            )
755
756        |   registerAllocate({instr=UntagFloat{source, dest=PReg dReg, cache, ...}, ...}) =
757            let
758                val regResult = findRegister(dReg, NONE, floatingPtRegisters)
759                val () =
760                    case cache of
761                        SOME(PReg cReg) => allocateRegister(cReg, SOME regResult, floatingPtRegisters)
762                    |   NONE => ()
763            in
764                allocateArgGeneral source
765            end
766
767        |   registerAllocate({instr=GetSSE2ControlReg{dest=PReg destReg}, ...}) =
768                allocateRegister(destReg, NONE, generalRegisters)
769
770        |   registerAllocate({instr=SetSSE2ControlReg{source=PReg srcReg}, ...}) =
771                allocateRegister(srcReg, NONE, generalRegisters)
772
773        |   registerAllocate({instr=GetX87ControlReg{dest=PReg destReg}, ...}) =
774                allocateRegister(destReg, NONE, generalRegisters)
775
776        |   registerAllocate({instr=SetX87ControlReg{source=PReg srcReg}, ...}) =
777                allocateRegister(srcReg, NONE, generalRegisters)
778
779        |   registerAllocate({instr=X87RealToInt{source=PReg srcReg, dest=PReg destReg}, ...}) =
780            (
781                allocateRegister(srcReg, NONE, floatingPtRegisters);
782                allocateRegister(destReg, NONE, generalRegisters)
783            )
784
785        |   registerAllocate({instr=SSE2RealToInt{source, dest=PReg destReg, ...}, ...}) =
786            (
787                allocateRegister(destReg, NONE, generalRegisters);
788                allocateArgFloat source
789            )
790
791        |   registerAllocate({instr=SignExtend32To64{source, dest=PReg destReg, ...}, ...}) =
792            (
793                allocateRegister(destReg, NONE, generalRegisters);
794                allocateArgGeneral source
795            )
796
797        |   registerAllocate({instr=TouchArgument{source=PReg srcReg}, ...}) =
798                allocateRegister(srcReg, NONE, generalRegisters)
799
800        |   registerAllocate({instr=PauseCPU, ...}) = ()
801
802        (* Depth-first scan. *)
803        val visited = Array.array(numBlocks, false)
804
805        fun processBlocks blockNo =
806        if Array.sub(visited, blockNo)
807        then ()  (* Done or currently being done. *)
808        else
809        let
810            val () = Array.update(visited, blockNo, true)
811            val ExtendedBasicBlock { flow, block, passThrough, exports, ...} =
812                Vector.sub(blocks, blockNo)
813            (* Add the hints for this block before the actual allocation of registers. *)
814            val _ = List.app addHints block
815            val () =
816                (* Process the dependencies first. *)
817                case flow of
818                    ExitCode => ()
819                |   Unconditional m => processBlocks m
820                |   Conditional {trueJump, falseJump, ...} =>
821                        (processBlocks trueJump; processBlocks falseJump)
822                |   IndexedBr cases => List.app processBlocks cases
823                |   SetHandler{ handler, continue } =>
824                        (processBlocks handler; processBlocks continue)
825                |   UnconditionalHandle _ => ()
826                |   ConditionalHandle { continue, ...} => processBlocks continue
827            (* Now this block. *)
828            local
829                (* We assume that anything used later will have been allocated a register.
830                   This is generally true except for a loop where the use may occur earlier. *)
831                val exported = setToList passThrough @ setToList exports
832                fun findAReg r =
833                    case Vector.sub(regProps, r) of
834                        RegPropStack _ => ()
835                    |   _ => ignore(allocateFindRegister(r, NONE, generalRegisters))
836            in
837                val () = List.app findAReg exported
838            end
839        in
840            List.foldr(fn (c, ()) => registerAllocate c) () block
841        end
842
843    in
844        processBlocks 0;
845        (* If the failures list is empty we succeeded. *)
846        case !failures of
847            [] =>  (* Return the allocation vector.  If a register isn't used replace it with rax. *)
848                AllocateSuccess(Vector.tabulate(maxPRegs, fn i => getOpt(Array.sub(allocatedRegs, i), GenReg eax)))
849            (* Else we'll have to spill something. *)
850        |   l => AllocateFailure l
851    end
852
853    structure Sharing =
854    struct
855        type intSet             = intSet
856        and extendedBasicBlock  = extendedBasicBlock
857        and regProperty         = regProperty
858        and reg                 = reg
859        and allocateResult      = allocateResult
860    end
861
862end;
863