1 #pragma once
2
3 #ifndef OPTIMIZE_FOR_LP64_INCLUDED
4 #define OPTIMIZE_FOR_LP64_INCLUDED
5
6 /* ========================================================================= */
7
8 /*
9
10 *****************************************************************************
11 * OSSERVAZIONI *
12 *****************************************************************************
13
14 ____________OSS 1:___________________________________________________________
15
16
17 se devo fare DUE MOLTIPLICAZIONI 13 bit * 8 bit posso farle in un
18 colpo solo, ad esempio:
19
20 siano X = xxxxxxxxxxxxx
21 S = ssssssss
22 Y = yyyyyyyyyyyyy
23 T = tttttttt
24
25 e devo calcolare
26 U = X * S
27 V = Y * T
28 posso farlo in un colpo solo impacchettando i bit cosi':
29
30 A = X 0 00000000 Y = xxxxxxxxxxxxx 0 00000000 yyyyyyyyyyyyy
31 B = 00000 S 0 00000000 00000 T = 00000ssssssss 0 00000000 00000tttttttt
32
33 ora se faccio C = A * B si ha
34
35 C = U ?????????????????????? V =
36 = uuuuuuuuuuuuuuuuuuuuu ?????????????????????? vvvvvvvvvvvvvvvvvvvvv
37
38 dove C e' di 64 bit; cioe' i primi 21 bit sono X * S = U
39 e gli ultimi 21 sono Y * T = V
40
41 ____________OSS 2:___________________________________________________________
42
43
44 se devo fare DUE MOLTIPLICAZIONI 16 bit * 16 bit del tipo
45 X * S = U
46 Y * S = V
47
48 con
49
50 #X = 16,
51 #Y = 16,
52 #S = 16
53
54 (dove l'operatore '#' da' come risultato il numero di bit di cui e' composto
55 un numero intero)
56
57 posso farle tutte e due in un solo colpo impacchettando i bit cosi':
58
59 O = 0000000000000000, #O = 16
60 A = X O Y , #A = 48
61 B = S , #B = 16
62 C = A * B , #C = 64
63
64 dove i primi 32 bit sono X * S e i secondi 32 bit sono Y * S
65
66 ____________OSS 3:___________________________________________________________
67
68
69 se devo fare QUATTRO MOLTIPLICAZIONI 8 bit * 8 bit del tipo
70 X * S = I #X = 8, #S = 8, #I = 16
71 Y * S = J #Y = 8, #S = 8, #J = 16
72 Z * S = K #Z = 8, #S = 8, #K = 16
73 W * S = L #W = 8, #S = 8, #L = 16
74
75
76 posso farle tutte e due in un solo colpo impacchettando i bit cosi':
77
78 O = 00000000 #O = 8
79 C = XOYOZOW * OOOOOOS #C = 64
80
81 dove
82 I sono i primi 16 bit,
83 J sono i secondi 16 bit,
84 K sono i terzi 16 bit,
85 L i quarti 16 bit
86 _____________________________________________________________________________
87
88 *****************************************************************************
89 */
90
91 /* ========================================================================= */
92
93 #define OPTIMIZE_FOR_LP64
94
95 /* ========================================================================= */
96
97 #define MASK_FIRST_OF_3_X_16BIT 0x7FFFC00000000
98 #define MASK_SECOND_OF_3_X_16BIT 0x3FFFE0000
99 #define MASK_THIRD_OF_3_X_16BIT 0x1FFFF
100
101 #define FIRST_OF_3_X_16BIT(x) (x) >> 34
102 #define SECOND_OF_3_X_16BIT(x) ((x)&MASK_SECOND_OF_3_X_16BIT) >> 17;
103 #define THIRD_OF_3_X_16BIT(x) (x) & MASK_THIRD_OF_3_X_16BIT;
104
105 /* ========================================================================= */
106
107 #define MASK_FIRST_OF_2_X_24BIT 0x3FFFFFE000000
108 #define MASK_SECOND_OF_2_X_24BIT 0x1FFFFFF
109
110 #define FIRST_OF_2_X_24BIT(x) (x) >> 25
111 #define SECOND_OF_2_X_24BIT(x) (x) & MASK_SECOND_OF_2_X_24BIT
112
113 /* ========================================================================= */
114
115 #define MASK_FIRST_OF_2_X_32BIT 0xFFFFFFFF00000000
116 #define MASK_SECOND_OF_2_X_32BIT 0xFFFFFFFF
117
118 #define FIRST_OF_2_X_32BIT(x) (x) >> 32
119 #define SECOND_OF_2_X_32BIT(x) (x) & MASK_SECOND_OF_2_X_32BIT
120
121 /* ========================================================================= */
122
123 typedef unsigned char UINT8;
124 typedef unsigned short UINT16;
125 typedef unsigned int UINT24;
126 typedef unsigned int UINT32;
127 typedef unsigned long UINT50;
128 typedef unsigned long UINT51;
129 typedef unsigned long UINT64;
130
131 /* ========================================================================= */
132
133 #if 0
134
135 /* esegue a1+b1, a2+c2, a3+c3 in un'unica operazione */
136 UINT64 add_3_x_16bit ( UINT16 a1, UINT16 a2, UINT16 a3,
137 UINT16 b1, UINT16 b2, UINT16 b3 );
138
139 /* esegue a1+b1, a2+b2 in un'unica operazione */
140 UINT50 add_2_x_24bit ( UINT24 a1, UINT24 a2,
141 UINT24 b1, UINT24 b2 );
142
143 /* esegue a1*b, a2*b in un'unica operazione */
144 UINT64 mult_2_x_16bit ( UINT16 a1, UINT16 a2,
145 UINT16 b );
146 #endif
147
148 /* ========================================================================= */
149
150 /* ------------------------------------------------------------------------- */
151
152 #define ADD_3_X_16BIT(a1, a2, a3, b1, b2, b3) \
153 (0L | (UINT64)(a1) << 34 | (UINT64)(a2) << 17 | (a3)) + \
154 (0L | (UINT64)(b1) << 34 | (UINT64)(b2) << 17 | (b3))
155
add_3_x_16bit(UINT16 a1,UINT16 a2,UINT16 a3,UINT16 b1,UINT16 b2,UINT16 b3)156 inline UINT64 add_3_x_16bit(UINT16 a1, UINT16 a2, UINT16 a3, UINT16 b1,
157 UINT16 b2, UINT16 b3) {
158 return (0L | (UINT64)a1 << 34 | (UINT64)a2 << 17 | a3) +
159 (0L | (UINT64)b1 << 34 | (UINT64)b2 << 17 | b3);
160 }
161
162 /* ------------------------------------------------------------------------- */
163
164 #define ADD_2_X_24BIT(a1, a2, b1, b2) \
165 (0L | (UINT64)(a1) << 25 | (a2)) + (0L | (UINT64)(b1) << 25 | (b2))
166
add_2_x_24bit(UINT24 a1,UINT24 a2,UINT24 b1,UINT24 b2)167 inline UINT50 add_2_x_24bit(UINT24 a1, UINT24 a2, UINT24 b1, UINT24 b2) {
168 return (0L | (UINT64)a1 << 25 | a2) + (0L | (UINT64)b1 << 25 | b2);
169 }
170
171 /* ------------------------------------------------------------------------- */
172
173 #define MULT_2_X_16BIT(a1, a2, b) \
174 ((UINT64)b) * (((UINT64)(a1) << 32) | (UINT64)a2)
175
mult_2_x_16bit(UINT16 a1,UINT16 a2,UINT16 b)176 inline UINT64 mult_2_x_16bit(UINT16 a1, UINT16 a2, UINT16 b) {
177 return (0L | (UINT64)a1 << 32 | a2) * b;
178 }
179
180 #endif
181