Lines Matching defs:C
25 C mp_limb_t mulredc14(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, label
26 C const mp_limb_t *m, mp_limb_t inv_m); label
27 C label
28 C arguments: label
29 C r3 = ptr to result z least significant limb label
30 C r4 = ptr to input x least significant limb label
31 C r5 = ptr to input y least significant limb label
32 C r6 = ptr to modulus m least significant limb label
33 C r7 = -1/m mod 2^64 label
34 C label
35 C final carry returned in r3 label
51 C Implements multiplication and REDC for two input numbers of 14 words label
53 C The algorithm: label
54 C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) label
55 C label
56 C T1:T0 = x[i]*y[0] ; label
57 C u = (T0*invm) % 2^64 ; label
58 C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ label
59 C for (j = 1; j < len; j++) label
60 C { label
61 C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; label
62 C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ label
63 C tmp[j-1] = T0; label
64 C } label
65 C tmp[len-1] = T1 ; label
66 C tmp[len] = cy ; /* cy <= 1 (see note 2) */ label
67 C for (i = 1; i < len; i++) label
68 C { label
69 C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; label
70 C u = (T0*invm) % 2^64 ; label
71 C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ label
72 C for (j = 1; j < len; j++) label
73 C { label
74 C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; label
76 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ label
77 C tmp[j-1] = T0; label
78 C } label
79 C tmp[len-1] = T1 ; label
80 C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ label
81 C } label
82 C z[0 ... len-1] = tmp[0 ... len-1] ; label
83 C return (tmp[len]) ; label
84 C label
85 C notes: label
86 C label
87 C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, label
88 C so cy:T1 <= 2*2^64 - 4. label
89 C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 label
90 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), label
91 C so cy:T1 <= 2*2^64 - 3. For j > 1, label
92 C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), label
93 C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. label
94 C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, label
95 C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) label
96 C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 label
97 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 label
98 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), label
99 C so cy:T1 <= 3*2^64 - 3. For j > 1, label
100 C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), label
101 C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. label
102 C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. label
103 C Assume this is true for index i-1, Then label
104 C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 label
105 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 label
106 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), label
107 C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. label
108 C label
109 C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 label
110 C YP = r5, MP = r6, TP = r1 (stack ptr) label
111 C label
113 C local variables: tmp[0 ... 14] array, having 14+1 8-byte words label
114 C The tmp array needs 14+1 entries, but tmp[14] is stored in label
115 C r15, so only 14 entries are used in the stack. label
122 C ######################################################################## label
123 C # i = 0 pass label
124 C ######################################################################### label
126 C Pass for j = 0. We need to fetch x[i] from memory and compute the new u label
149 C Pass for j = 1 label
168 C Pass for j = 2 label
187 C Pass for j = 3 label
206 C Pass for j = 4 label
225 C Pass for j = 5 label
244 C Pass for j = 6 label
263 C Pass for j = 7 label
282 C Pass for j = 8 label
301 C Pass for j = 9 label
320 C Pass for j = 10 label
339 C Pass for j = 11 label
358 C Pass for j = 12 label
377 C Pass for j = 13. Don't fetch new data from y[j+1]. label
395 C ######################################################################### label
396 C # i > 0 passes label
397 C ######################################################################### label
405 C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory label
406 C and compute the new u label
427 C Pass for j = 1 label
447 C Pass for j = 2 label
467 C Pass for j = 3 label
487 C Pass for j = 4 label
507 C Pass for j = 5 label
527 C Pass for j = 6 label
547 C Pass for j = 7 label
567 C Pass for j = 8 label
587 C Pass for j = 9 label
607 C Pass for j = 10 label
627 C Pass for j = 11 label
647 C Pass for j = 12 label
667 C Pass for j = 13. Don't fetch new data from y[j+1]. label
689 C Copy result from tmp memory to z label