1 #pragma once
2 
3 #ifndef OPTIMIZE_FOR_LP64_INCLUDED
4 #define OPTIMIZE_FOR_LP64_INCLUDED
5 
6 /* ========================================================================= */
7 
8 /*
9 
10   *****************************************************************************
11   *  OSSERVAZIONI                                                             *
12   *****************************************************************************
13 
14   ____________OSS 1:___________________________________________________________
15 
16 
17   se devo fare DUE MOLTIPLICAZIONI 13 bit * 8 bit posso farle in un
18   colpo solo, ad esempio:
19 
20   siano X = xxxxxxxxxxxxx
21   S = ssssssss
22   Y = yyyyyyyyyyyyy
23   T = tttttttt
24 
25   e devo calcolare
26   U = X * S
27   V = Y * T
28   posso farlo in un colpo solo impacchettando i bit cosi':
29 
30   A = X       0 00000000 Y       = xxxxxxxxxxxxx 0 00000000 yyyyyyyyyyyyy
31   B = 00000 S 0 00000000 00000 T = 00000ssssssss 0 00000000 00000tttttttt
32 
33   ora se faccio C = A * B si ha
34 
35   C = U ?????????????????????? V =
36   = uuuuuuuuuuuuuuuuuuuuu ?????????????????????? vvvvvvvvvvvvvvvvvvvvv
37 
38   dove C e' di 64 bit; cioe' i primi 21 bit sono X * S = U
39   e gli ultimi 21 sono Y * T = V
40 
41   ____________OSS 2:___________________________________________________________
42 
43 
44   se devo fare DUE MOLTIPLICAZIONI 16 bit * 16 bit del tipo
45   X * S = U
46   Y * S = V
47 
48   con
49 
50   #X = 16,
51   #Y = 16,
52   #S = 16
53 
54   (dove l'operatore '#' da' come risultato il numero di bit di cui e' composto
55   un numero intero)
56 
57   posso farle tutte e due in un solo colpo impacchettando i bit cosi':
58 
59   O = 0000000000000000,   #O = 16
60   A = X O Y           ,   #A = 48
61   B = S               ,   #B = 16
62   C = A * B           ,   #C = 64
63 
64   dove i primi 32 bit sono X * S e i secondi 32 bit sono Y * S
65 
66   ____________OSS 3:___________________________________________________________
67 
68 
69   se devo fare QUATTRO MOLTIPLICAZIONI 8 bit * 8 bit del tipo
70   X * S = I           #X = 8, #S = 8, #I = 16
71   Y * S = J           #Y = 8, #S = 8, #J = 16
72   Z * S = K           #Z = 8, #S = 8, #K = 16
73   W * S = L           #W = 8, #S = 8, #L = 16
74 
75 
76   posso farle tutte e due in un solo colpo impacchettando i bit cosi':
77 
78   O = 00000000             #O = 8
79   C = XOYOZOW * OOOOOOS    #C = 64
80 
81   dove
82   I sono i primi 16 bit,
83   J sono i secondi 16 bit,
84   K sono i terzi 16 bit,
85   L i quarti 16 bit
86   _____________________________________________________________________________
87 
88   *****************************************************************************
89   */
90 
91 /* ========================================================================= */
92 
93 #define OPTIMIZE_FOR_LP64
94 
95 /* ========================================================================= */
96 
97 #define MASK_FIRST_OF_3_X_16BIT 0x7FFFC00000000
98 #define MASK_SECOND_OF_3_X_16BIT 0x3FFFE0000
99 #define MASK_THIRD_OF_3_X_16BIT 0x1FFFF
100 
101 #define FIRST_OF_3_X_16BIT(x) (x) >> 34
102 #define SECOND_OF_3_X_16BIT(x) ((x)&MASK_SECOND_OF_3_X_16BIT) >> 17;
103 #define THIRD_OF_3_X_16BIT(x) (x) & MASK_THIRD_OF_3_X_16BIT;
104 
105 /* ========================================================================= */
106 
107 #define MASK_FIRST_OF_2_X_24BIT 0x3FFFFFE000000
108 #define MASK_SECOND_OF_2_X_24BIT 0x1FFFFFF
109 
110 #define FIRST_OF_2_X_24BIT(x) (x) >> 25
111 #define SECOND_OF_2_X_24BIT(x) (x) & MASK_SECOND_OF_2_X_24BIT
112 
113 /* ========================================================================= */
114 
115 #define MASK_FIRST_OF_2_X_32BIT 0xFFFFFFFF00000000
116 #define MASK_SECOND_OF_2_X_32BIT 0xFFFFFFFF
117 
118 #define FIRST_OF_2_X_32BIT(x) (x) >> 32
119 #define SECOND_OF_2_X_32BIT(x) (x) & MASK_SECOND_OF_2_X_32BIT
120 
121 /* ========================================================================= */
122 
123 typedef unsigned char UINT8;
124 typedef unsigned short UINT16;
125 typedef unsigned int UINT24;
126 typedef unsigned int UINT32;
127 typedef unsigned long UINT50;
128 typedef unsigned long UINT51;
129 typedef unsigned long UINT64;
130 
131 /* ========================================================================= */
132 
133 #if 0
134 
135 /* esegue a1+b1, a2+c2, a3+c3 in un'unica operazione */
136 UINT64  add_3_x_16bit ( UINT16 a1, UINT16 a2, UINT16 a3,
137 			UINT16 b1, UINT16 b2, UINT16 b3 );
138 
139 /* esegue a1+b1, a2+b2 in un'unica operazione */
140 UINT50 add_2_x_24bit ( UINT24 a1, UINT24 a2,
141 		       UINT24 b1, UINT24 b2 );
142 
143 /* esegue a1*b, a2*b in un'unica operazione */
144 UINT64 mult_2_x_16bit ( UINT16 a1, UINT16 a2,
145 			UINT16 b );
146 #endif
147 
148 /* ========================================================================= */
149 
150 /* ------------------------------------------------------------------------- */
151 
152 #define ADD_3_X_16BIT(a1, a2, a3, b1, b2, b3)                                  \
153   (0L | (UINT64)(a1) << 34 | (UINT64)(a2) << 17 | (a3)) +                      \
154       (0L | (UINT64)(b1) << 34 | (UINT64)(b2) << 17 | (b3))
155 
add_3_x_16bit(UINT16 a1,UINT16 a2,UINT16 a3,UINT16 b1,UINT16 b2,UINT16 b3)156 inline UINT64 add_3_x_16bit(UINT16 a1, UINT16 a2, UINT16 a3, UINT16 b1,
157                             UINT16 b2, UINT16 b3) {
158   return (0L | (UINT64)a1 << 34 | (UINT64)a2 << 17 | a3) +
159          (0L | (UINT64)b1 << 34 | (UINT64)b2 << 17 | b3);
160 }
161 
162 /* ------------------------------------------------------------------------- */
163 
164 #define ADD_2_X_24BIT(a1, a2, b1, b2)                                          \
165   (0L | (UINT64)(a1) << 25 | (a2)) + (0L | (UINT64)(b1) << 25 | (b2))
166 
add_2_x_24bit(UINT24 a1,UINT24 a2,UINT24 b1,UINT24 b2)167 inline UINT50 add_2_x_24bit(UINT24 a1, UINT24 a2, UINT24 b1, UINT24 b2) {
168   return (0L | (UINT64)a1 << 25 | a2) + (0L | (UINT64)b1 << 25 | b2);
169 }
170 
171 /* ------------------------------------------------------------------------- */
172 
173 #define MULT_2_X_16BIT(a1, a2, b)                                              \
174   ((UINT64)b) * (((UINT64)(a1) << 32) | (UINT64)a2)
175 
mult_2_x_16bit(UINT16 a1,UINT16 a2,UINT16 b)176 inline UINT64 mult_2_x_16bit(UINT16 a1, UINT16 a2, UINT16 b) {
177   return (0L | (UINT64)a1 << 32 | a2) * b;
178 }
179 
180 #endif
181