1; This Source Code Form is subject to the terms of the Mozilla Public
2; License, v. 2.0. If a copy of the MPL was not distributed with this
3; file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5;
6; This code is converted from mpi_amd64_gas.asm for MASM for x64.
7;
8
9; ------------------------------------------------------------------------
10;
11;  Implementation of s_mpv_mul_set_vec which exploits
12;  the 64X64->128 bit  unsigned multiply instruction.
13;
14; ------------------------------------------------------------------------
15
16; r = a * digit, r and a are vectors of length len
17; returns the carry digit
18; r and a are 64 bit aligned.
19;
20; uint64_t
21; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
22;
23
24.CODE
25
26s_mpv_mul_set_vec64 PROC
27
28        ; compatibilities for paramenter registers
29        ;
30        ; About GAS and MASM, the usage of parameter registers are different.
31
32        push rdi
33        push rsi
34
35        mov rdi, rcx
36        mov rsi, rdx
37        mov edx, r8d
38        mov rcx, r9
39
40        xor rax, rax
41        test rdx, rdx
42        jz L17
43        mov r8, rdx
44        xor r9, r9
45
46L15:
47        cmp r8, 8
48        jb  L16
49        mov rax, [rsi]
50        mov r11, [8+rsi]
51        mul rcx
52        add rax, r9
53        adc rdx, 0
54        mov [0+rdi], rax
55        mov r9, rdx
56        mov rax,r11
57        mov r11, [16+rsi]
58        mul rcx
59        add rax,r9
60        adc rdx,0
61        mov [8+rdi],rax
62        mov r9,rdx
63        mov rax,r11
64        mov r11, [24+rsi]
65        mul rcx
66        add rax,r9
67        adc rdx,0
68        mov [16+rdi],rax
69        mov r9,rdx
70        mov rax,r11
71        mov r11, [32+rsi]
72        mul rcx
73        add rax,r9
74        adc rdx,0
75        mov [24+rdi],rax
76        mov r9,rdx
77        mov rax,r11
78        mov r11, [40+rsi]
79        mul rcx
80        add rax,r9
81        adc rdx,0
82        mov [32+rdi],rax
83        mov r9,rdx
84        mov rax,r11
85        mov r11, [48+rsi]
86        mul rcx
87        add rax,r9
88        adc rdx,0
89        mov [40+rdi],rax
90        mov r9,rdx
91        mov rax,r11
92        mov r11, [56+rsi]
93        mul rcx
94        add rax,r9
95        adc rdx,0
96        mov [48+rdi],rax
97        mov r9,rdx
98        mov rax,r11
99        mul rcx
100        add rax,r9
101        adc rdx,0
102        mov [56+rdi],rax
103        mov r9,rdx
104        add rsi, 64
105        add rdi, 64
106        sub r8, 8
107        jz L17
108        jmp L15
109
110L16:
111        mov rax, [0+rsi]
112        mul rcx
113        add rax, r9
114        adc rdx,0
115        mov [0+rdi],rax
116        mov r9,rdx
117        dec r8
118        jz L17
119        mov rax, [8+rsi]
120        mul rcx
121        add rax,r9
122        adc rdx,0
123        mov [8+rdi], rax
124        mov r9, rdx
125        dec r8
126        jz L17
127        mov rax, [16+rsi]
128        mul rcx
129        add rax, r9
130        adc rdx, 0
131        mov [16+rdi],rax
132        mov r9,rdx
133        dec r8
134        jz L17
135        mov rax, [24+rsi]
136        mul rcx
137        add rax, r9
138        adc rdx, 0
139        mov [24+rdi], rax
140        mov r9, rdx
141        dec r8
142        jz L17
143        mov rax, [32+rsi]
144        mul rcx
145        add rax, r9
146        adc rdx, 0
147        mov [32+rdi],rax
148        mov r9, rdx
149        dec r8
150        jz L17
151        mov rax, [40+rsi]
152        mul rcx
153        add rax, r9
154        adc rdx, 0
155        mov [40+rdi], rax
156        mov r9, rdx
157        dec r8
158        jz L17
159        mov rax, [48+rsi]
160        mul rcx
161        add rax, r9
162        adc rdx, 0
163        mov [48+rdi], rax
164        mov r9, rdx
165        dec r8
166        jz L17
167
168L17:
169        mov rax, r9
170        pop rsi
171        pop rdi
172        ret
173
174s_mpv_mul_set_vec64 ENDP
175
176
177;------------------------------------------------------------------------
178;
179; Implementation of s_mpv_mul_add_vec which exploits
180; the 64X64->128 bit  unsigned multiply instruction.
181;
182;------------------------------------------------------------------------
183
184; r += a * digit, r and a are vectors of length len
185; returns the carry digit
186; r and a are 64 bit aligned.
187;
188; uint64_t
189; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
190;
191
192s_mpv_mul_add_vec64 PROC
193
194        ; compatibilities for paramenter registers
195        ;
196        ; About GAS and MASM, the usage of parameter registers are different.
197
198        push rdi
199        push rsi
200
201        mov rdi, rcx
202        mov rsi, rdx
203        mov edx, r8d
204        mov rcx, r9
205
206        xor rax, rax
207        test rdx, rdx
208        jz L27
209        mov r8, rdx
210        xor r9, r9
211
212L25:
213        cmp r8, 8
214        jb L26
215        mov rax, [0+rsi]
216        mov r10, [0+rdi]
217        mov r11, [8+rsi]
218        mul rcx
219        add rax,r10
220        adc rdx,0
221        mov r10, [8+rdi]
222        add rax,r9
223        adc rdx,0
224        mov [0+rdi],rax
225        mov r9,rdx
226        mov rax,r11
227        mov r11, [16+rsi]
228        mul rcx
229        add rax,r10
230        adc rdx,0
231        mov r10, [16+rdi]
232        add rax,r9
233        adc rdx,0
234        mov [8+rdi],rax
235        mov r9,rdx
236        mov rax,r11
237        mov r11, [24+rsi]
238        mul rcx
239        add rax,r10
240        adc rdx,0
241        mov r10, [24+rdi]
242        add rax,r9
243        adc rdx,0
244        mov [16+rdi],rax
245        mov r9,rdx
246        mov rax,r11
247        mov r11, [32+rsi]
248        mul rcx
249        add rax,r10
250        adc rdx,0
251        mov r10, [32+rdi]
252        add rax,r9
253        adc rdx,0
254        mov [24+rdi],rax
255        mov r9,rdx
256        mov rax,r11
257        mov r11, [40+rsi]
258        mul rcx
259        add rax,r10
260        adc rdx,0
261        mov r10, [40+rdi]
262        add rax,r9
263        adc rdx,0
264        mov [32+rdi],rax
265        mov r9,rdx
266        mov rax,r11
267        mov r11, [48+rsi]
268        mul rcx
269        add rax,r10
270        adc rdx,0
271        mov r10, [48+rdi]
272        add rax,r9
273        adc rdx,0
274        mov [40+rdi],rax
275        mov r9,rdx
276        mov rax,r11
277        mov r11, [56+rsi]
278        mul rcx
279        add rax,r10
280        adc rdx,0
281        mov r10, [56+rdi]
282        add rax,r9
283        adc rdx,0
284        mov [48+rdi],rax
285        mov r9,rdx
286        mov rax,r11
287        mul rcx
288        add rax,r10
289        adc rdx,0
290        add rax,r9
291        adc rdx,0
292        mov [56+rdi],rax
293        mov r9,rdx
294        add rsi,64
295        add rdi,64
296        sub r8, 8
297        jz L27
298        jmp L25
299
300L26:
301        mov rax, [0+rsi]
302        mov r10, [0+rdi]
303        mul rcx
304        add rax,r10
305        adc rdx,0
306        add rax,r9
307        adc rdx,0
308        mov [0+rdi],rax
309        mov r9,rdx
310        dec r8
311        jz L27
312        mov rax, [8+rsi]
313        mov r10, [8+rdi]
314        mul rcx
315        add rax,r10
316        adc rdx,0
317        add rax,r9
318        adc rdx,0
319        mov [8+rdi],rax
320        mov r9,rdx
321        dec r8
322        jz L27
323        mov rax, [16+rsi]
324        mov r10, [16+rdi]
325        mul rcx
326        add rax,r10
327        adc rdx,0
328        add rax,r9
329        adc rdx,0
330        mov [16+rdi],rax
331        mov r9,rdx
332        dec r8
333        jz L27
334        mov rax, [24+rsi]
335        mov r10, [24+rdi]
336        mul rcx
337        add rax,r10
338        adc rdx,0
339        add rax,r9
340        adc rdx,0
341        mov [24+rdi],rax
342        mov r9,rdx
343        dec r8
344        jz L27
345        mov rax, [32+rsi]
346        mov r10, [32+rdi]
347        mul rcx
348        add rax,r10
349        adc rdx,0
350        add rax,r9
351        adc rdx,0
352        mov [32+rdi],rax
353        mov r9,rdx
354        dec r8
355        jz L27
356        mov rax, [40+rsi]
357        mov r10, [40+rdi]
358        mul rcx
359        add rax,r10
360        adc rdx,0
361        add rax,r9
362        adc rdx,0
363        mov [40+rdi],rax
364        mov r9,rdx
365        dec r8
366        jz L27
367        mov rax, [48+rsi]
368        mov r10, [48+rdi]
369        mul rcx
370        add rax,r10
371        adc rdx,0
372        add rax, r9
373        adc rdx, 0
374        mov [48+rdi], rax
375        mov r9, rdx
376        dec r8
377        jz L27
378
379L27:
380        mov rax, r9
381
382        pop rsi
383        pop rdi
384        ret
385
386s_mpv_mul_add_vec64 ENDP
387
388END
389