1 /* This code is part of the tng compression routines.
2 *
3 * Written by Daniel Spangberg and Magnus Lundborg
4 * Copyright (c) 2010, 2013-2014 The GROMACS development team.
5 *
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the Revised BSD License.
9 */
10
11
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15
16 #include "../../include/compression/tng_compress.h"
17
18 /* 64 bit integers are not required in this part of the program. But
19 they improve the running speed if present. If 64 bit integers are
20 available define the symbol HAVE64BIT. It should get automatically
21 defined by the defines in my64bit.h */
22 #include "../../include/compression/my64bit.h"
23
24 #include "../../include/compression/widemuldiv.h"
25
26 #ifndef TRAJNG_X86_GCC_INLINE_MULDIV
27 #if defined(__GNUC__) && defined(__i386__)
28 #define TRAJNG_X86_GCC_INLINE_MULDIV
29 #endif /* gcc & i386 */
30 #if defined(__GNUC__) && defined(__x86_64__)
31 #define TRAJNG_X86_GCC_INLINE_MULDIV
32 #endif /* gcc & x86_64 */
33 #endif /* TRAJNG X86 GCC INLINE MULDIV */
34
35 #ifdef USE_WINDOWS
36 #define TNG_INLINE __inline
37 #else
38 #define TNG_INLINE inline
39 #endif
40
41 /* Multiply two 32 bit unsigned integers returning a 64 bit unsigned value (in two integers) */
Ptngc_widemul(unsigned int i1,unsigned int i2,unsigned int * ohi,unsigned int * olo)42 static TNG_INLINE void Ptngc_widemul(unsigned int i1, unsigned int i2, unsigned int *ohi, unsigned int *olo)
43 {
44 #if defined(TRAJNG_X86_GCC_INLINE_MULDIV)
45 __asm__ __volatile__ ("mull %%edx\n\t"
46 : "=a" (i1), "=d" (i2)
47 : "a" (i1),"d" (i2)
48 : "cc");
49 *ohi=i2;
50 *olo=i1;
51 #else /* TRAJNG X86 GCC INLINE MULDIV */
52
53 #ifdef HAVE64BIT
54 my_uint64_t res= ((my_uint64_t)i1) * ((my_uint64_t)i2);
55 *olo=res & 0xFFFFFFFFU;
56 *ohi=(res>>32) & 0xFFFFFFFFU;
57 #else /* HAVE64BIT */
58
59 unsigned int bits=16;
60 unsigned int L_m=(1<<bits)-1; /* Lower bits mask. */
61
62 unsigned int a_U,a_L; /* upper and lower half of a */
63 unsigned int b_U,b_L; /* upper and lower half of b */
64
65 unsigned int x_UU,x_UL,x_LU,x_LL; /* temporary storage. */
66 unsigned int x,x_U,x_L; /* temporary storage. */
67
68 /* Extract partial ints */
69 a_L=i1 & L_m;
70 a_U=i1>>bits;
71 b_L=i2 & L_m;
72 b_U=i2>>bits;
73
74 /* Do a*a=>2a multiply where a is half number of bits in an int */
75 x=a_L*b_L;
76 x_LL=x & L_m;
77 x_LU=x>>bits;
78
79 x=a_U*b_L;
80 x_LU+=x & L_m;
81 x_UL=x>>bits;
82
83 x=a_L*b_U;
84 x_LU+=x & L_m;
85 x_UL+=x>>bits;
86
87 x=a_U*b_U;
88 x_UL+=x & L_m;
89 x_UU=x>>bits;
90
91 /* Combine results */
92 x_UL+=x_LU>>bits;
93 x_LU&=L_m;
94 x_UU+=x_UL>>bits;
95 x_UL&=L_m;
96
97 x_U=(x_UU<<bits)|x_UL;
98 x_L=(x_LU<<bits)|x_LL;
99
100 /* Write back results */
101 *ohi=x_U;
102 *olo=x_L;
103 #endif /* HAVE64BIT */
104 #endif /* TRAJNG X86 GCC INLINE MULDIV */
105 }
106
107 /* Divide a 64 bit unsigned value in hi:lo with the 32 bit value i and
108 return the result in result and the remainder in remainder */
Ptngc_widediv(unsigned int hi,unsigned int lo,const unsigned int i,unsigned int * result,unsigned int * remainder)109 static TNG_INLINE void Ptngc_widediv(unsigned int hi, unsigned int lo, const unsigned int i, unsigned int *result, unsigned int *remainder)
110 {
111 #if defined(TRAJNG_X86_GCC_INLINE_MULDIV)
112 __asm__ __volatile__ ("divl %%ecx\n\t"
113 : "=a" (lo), "=d" (hi)
114 : "a" (lo),"d" (hi), "c" (i)
115 : "cc");
116 *result=lo;
117 *remainder=hi;
118 #else /* TRAJNG X86 GCC INLINE MULDIV */
119 #ifdef HAVE64BIT
120 my_uint64_t v= (((my_uint64_t)hi)<<32) | ((my_uint64_t)lo);
121 my_uint64_t res=v/((my_uint64_t)i);
122 my_uint64_t rem=v-res*((my_uint64_t)i);
123 *result=(unsigned int)res;
124 *remainder=(unsigned int)rem;
125 #else /* HAVE64BIT */
126 unsigned int res;
127 unsigned int rmask;
128 unsigned int s_U,s_L;
129 unsigned int bits=16U;
130 unsigned int bits2=bits*2U;
131 unsigned int hibit=bits2-1U;
132 unsigned int hibit_mask=1U<<hibit;
133 unsigned int allbits=(hibit_mask-1U)|hibit_mask;
134
135 /* Do division. */
136 rmask=hibit_mask;
137 res=0;
138 s_U=i>>(bits2-hibit);
139 s_L=(i<<hibit)&0xFFFFFFFFU;
140 while (rmask)
141 {
142 if ((s_U<hi) || ((s_U==hi) && (s_L<=lo)))
143 {
144 /* Subtract */
145 hi-=s_U;
146 if (s_L>lo)
147 {
148 unsigned int t;
149 hi--; /* Borrow */
150 t=allbits-s_L;
151 lo+=t+1;
152 }
153 else
154 lo-=s_L;
155
156 /* Set bit. */
157 res|=rmask;
158 }
159 rmask>>=1;
160 s_L>>=1;
161 if (s_U & 1U)
162 s_L|=hibit_mask;
163 s_U>>=1;
164 }
165 *remainder=lo;
166 *result=res;
167 #endif /* HAVE64BIT */
168 #endif /* TRAJNG X86 GCC INLINE MULDIV */
169 }
170
171 /* Add a unsigned int to a largeint. j determines which value in the
172 largeint to add v1 to. */
largeint_add_gen(const unsigned int v1,unsigned int * largeint,const int n,int j)173 static TNG_INLINE void largeint_add_gen(const unsigned int v1, unsigned int *largeint, const int n, int j)
174 {
175 /* Add with carry. unsigned ints in C wrap modulo 2**bits when "overflowed". */
176 unsigned int v2=(v1+largeint[j])&0xFFFFFFFFU; /* Add and cap at 32 bits */
177 unsigned int carry=0;
178 if ((((unsigned int)-1)&0xFFFFFFFFU) -v1<largeint[j])
179 carry=1;
180 largeint[j]=v2;
181 j++;
182 while ((j<n) && carry)
183 {
184 v2=(largeint[j]+carry)&0xFFFFFFFFU;
185 carry=0;
186 if ((((unsigned int)-1)&0xFFFFFFFFU) -1<largeint[j])
187 carry=1;
188 largeint[j]=v2;
189 j++;
190 }
191 }
192
193 /* Add a unsigned int to a largeint. */
Ptngc_largeint_add(const unsigned int v1,unsigned int * largeint,const int n)194 void Ptngc_largeint_add(const unsigned int v1, unsigned int *largeint, const int n)
195 {
196 largeint_add_gen(v1,largeint,n,0);
197 }
198
199 /* Multiply v1 with largeint_in and return result in largeint_out */
Ptngc_largeint_mul(const unsigned int v1,unsigned int * largeint_in,unsigned int * largeint_out,const int n)200 void Ptngc_largeint_mul(const unsigned int v1, unsigned int *largeint_in, unsigned int *largeint_out, const int n)
201 {
202 int i;
203 unsigned int lo,hi;
204
205 memset(largeint_out, 0U, sizeof(unsigned int) * n);
206
207 for (i=0; i<n-1; i++)
208 {
209 if (largeint_in[i]!=0U)
210 {
211 Ptngc_widemul(v1,largeint_in[i],&hi,&lo); /* 32x32->64 mul */
212 largeint_add_gen(lo,largeint_out,n,i);
213 largeint_add_gen(hi,largeint_out,n,i+1);
214 }
215 }
216 if (largeint_in[i]!=0U)
217 {
218 Ptngc_widemul(v1,largeint_in[i],&hi,&lo); /* 32x32->64 mul */
219 largeint_add_gen(lo,largeint_out,n,i);
220 }
221 }
222
223 /* Return the remainder from dividing largeint_in with v1. Result of the division is returned in largeint_out */
Ptngc_largeint_div(const unsigned int v1,unsigned int * largeint_in,unsigned int * largeint_out,const int n)224 unsigned int Ptngc_largeint_div(const unsigned int v1, unsigned int *largeint_in, unsigned int *largeint_out, const int n)
225 {
226 unsigned int result,remainder=0;
227 int i;
228 unsigned int hi;
229 /* Boot */
230 hi=0U;
231 i=n;
232 while (i)
233 {
234 i--;
235 Ptngc_widediv(hi,largeint_in[i],v1,&result,&remainder);
236 largeint_out[i]=result;
237 hi=remainder;
238 }
239 return remainder;
240 }
241