1 /*
2  * include/haproxy/intops.h
3  * Functions for integer operations.
4  *
5  * Copyright (C) 2020 Willy Tarreau - w@1wt.eu
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation, version 2.1
10  * exclusively.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
20 */
21 
22 #ifndef _HAPROXY_INTOPS_H
23 #define _HAPROXY_INTOPS_H
24 
25 #include <haproxy/api.h>
26 
27 /* exported functions, mostly integer parsing */
28 /* rounds <i> down to the closest value having max 2 digits */
29 unsigned int round_2dig(unsigned int i);
30 unsigned int full_hash(unsigned int a);
31 int varint_bytes(uint64_t v);
32 unsigned int read_uint(const char **s, const char *end);
33 long long read_int64(const char **s, const char *end);
34 unsigned long long read_uint64(const char **s, const char *end);
35 unsigned int str2ui(const char *s);
36 unsigned int str2uic(const char *s);
37 unsigned int strl2ui(const char *s, int len);
38 unsigned int strl2uic(const char *s, int len);
39 int strl2ic(const char *s, int len);
40 int strl2irc(const char *s, int len, int *ret);
41 int strl2llrc(const char *s, int len, long long *ret);
42 int strl2llrc_dotted(const char *text, int len, long long *ret);
43 unsigned int mask_find_rank_bit(unsigned int r, unsigned long m);
44 unsigned int mask_find_rank_bit_fast(unsigned int r, unsigned long m,
45                                      unsigned long a, unsigned long b,
46                                      unsigned long c, unsigned long d);
47 void mask_prep_rank_map(unsigned long m,
48                         unsigned long *a, unsigned long *b,
49                         unsigned long *c, unsigned long *d);
50 
51 
52 /* Multiply the two 32-bit operands and shift the 64-bit result right 32 bits.
53  * This is used to compute fixed ratios by setting one of the operands to
54  * (2^32*ratio).
55  */
mul32hi(unsigned int a,unsigned int b)56 static inline unsigned int mul32hi(unsigned int a, unsigned int b)
57 {
58 	return ((unsigned long long)a * b + a) >> 32;
59 }
60 
61 /* gcc does not know when it can safely divide 64 bits by 32 bits. Use this
62  * function when you know for sure that the result fits in 32 bits, because
63  * it is optimal on x86 and on 64bit processors.
64  */
div64_32(unsigned long long o1,unsigned int o2)65 static inline unsigned int div64_32(unsigned long long o1, unsigned int o2)
66 {
67 	unsigned long long result;
68 #ifdef __i386__
69 	asm("divl %2"
70 	    : "=A" (result)
71 	    : "A"(o1), "rm"(o2));
72 #else
73 	result = o1 / o2;
74 #endif
75 	return result;
76 }
77 
78 /* rotate left a 64-bit integer by <bits:[0-5]> bits */
rotl64(uint64_t v,uint8_t bits)79 static inline uint64_t rotl64(uint64_t v, uint8_t bits)
80 {
81 #if !defined(__ARM_ARCH_8A) && !defined(__x86_64__)
82 	bits &= 63;
83 #endif
84 	v = (v << bits) | (v >> (-bits & 63));
85 	return v;
86 }
87 
88 /* rotate right a 64-bit integer by <bits:[0-5]> bits */
rotr64(uint64_t v,uint8_t bits)89 static inline uint64_t rotr64(uint64_t v, uint8_t bits)
90 {
91 #if !defined(__ARM_ARCH_8A) && !defined(__x86_64__)
92 	bits &= 63;
93 #endif
94 	v = (v >> bits) | (v << (-bits & 63));
95 	return v;
96 }
97 
98 /* Simple popcountl implementation. It returns the number of ones in a word.
99  * Described here : https://graphics.stanford.edu/~seander/bithacks.html
100  */
my_popcountl(unsigned long a)101 static inline unsigned int my_popcountl(unsigned long a)
102 {
103 	a = a - ((a >> 1) & ~0UL/3);
104 	a = (a & ~0UL/15*3) + ((a >> 2) & ~0UL/15*3);
105 	a = (a + (a >> 4)) & ~0UL/255*15;
106 	return (unsigned long)(a * (~0UL/255)) >> (sizeof(unsigned long) - 1) * 8;
107 }
108 
109 /* returns non-zero if <a> has at least 2 bits set */
atleast2(unsigned long a)110 static inline unsigned long atleast2(unsigned long a)
111 {
112 	return a & (a - 1);
113 }
114 
115 /* Simple ffs implementation. It returns the position of the lowest bit set to
116  * one, starting at 1. It is illegal to call it with a==0 (undefined result).
117  */
my_ffsl(unsigned long a)118 static inline unsigned int my_ffsl(unsigned long a)
119 {
120 	unsigned long cnt;
121 
122 #if defined(__x86_64__)
123 	__asm__("bsf %1,%0\n" : "=r" (cnt) : "rm" (a));
124 	cnt++;
125 #else
126 
127 	cnt = 1;
128 #if LONG_MAX > 0x7FFFFFFFL /* 64bits */
129 	if (!(a & 0xFFFFFFFFUL)) {
130 		a >>= 32;
131 		cnt += 32;
132 	}
133 #endif
134 	if (!(a & 0XFFFFU)) {
135 		a >>= 16;
136 		cnt += 16;
137 	}
138 	if (!(a & 0XFF)) {
139 		a >>= 8;
140 		cnt += 8;
141 	}
142 	if (!(a & 0xf)) {
143 		a >>= 4;
144 		cnt += 4;
145 	}
146 	if (!(a & 0x3)) {
147 		a >>= 2;
148 		cnt += 2;
149 	}
150 	if (!(a & 0x1)) {
151 		cnt += 1;
152 	}
153 #endif /* x86_64 */
154 
155 	return cnt;
156 }
157 
158 /* Simple fls implementation. It returns the position of the highest bit set to
159  * one, starting at 1. It is illegal to call it with a==0 (undefined result).
160  */
my_flsl(unsigned long a)161 static inline unsigned int my_flsl(unsigned long a)
162 {
163 	unsigned long cnt;
164 
165 #if defined(__x86_64__)
166 	__asm__("bsr %1,%0\n" : "=r" (cnt) : "rm" (a));
167 	cnt++;
168 #else
169 
170 	cnt = 1;
171 #if LONG_MAX > 0x7FFFFFFFUL /* 64bits */
172 	if (a & 0xFFFFFFFF00000000UL) {
173 		a >>= 32;
174 		cnt += 32;
175 	}
176 #endif
177 	if (a & 0XFFFF0000U) {
178 		a >>= 16;
179 		cnt += 16;
180 	}
181 	if (a & 0XFF00) {
182 		a >>= 8;
183 		cnt += 8;
184 	}
185 	if (a & 0xf0) {
186 		a >>= 4;
187 		cnt += 4;
188 	}
189 	if (a & 0xc) {
190 		a >>= 2;
191 		cnt += 2;
192 	}
193 	if (a & 0x2) {
194 		cnt += 1;
195 	}
196 #endif /* x86_64 */
197 
198 	return cnt;
199 }
200 
201 /* Build a word with the <bits> lower bits set (reverse of my_popcountl) */
nbits(int bits)202 static inline unsigned long nbits(int bits)
203 {
204 	if (--bits < 0)
205 		return 0;
206 	else
207 		return (2UL << bits) - 1;
208 }
209 
210 /* Turns 64-bit value <a> from host byte order to network byte order.
211  * The principle consists in letting the compiler detect we're playing
212  * with a union and simplify most or all operations. The asm-optimized
213  * htonl() version involving bswap (x86) / rev (arm) / other is a single
214  * operation on little endian, or a NOP on big-endian. In both cases,
215  * this lets the compiler "see" that we're rebuilding a 64-bit word from
216  * two 32-bit quantities that fit into a 32-bit register. In big endian,
217  * the whole code is optimized out. In little endian, with a decent compiler,
218  * a few bswap and 2 shifts are left, which is the minimum acceptable.
219  */
my_htonll(unsigned long long a)220 static inline unsigned long long my_htonll(unsigned long long a)
221 {
222 #if defined(__x86_64__)
223 	__asm__ volatile("bswapq %0" : "=r"(a) : "0"(a));
224 	return a;
225 #else
226 	union {
227 		struct {
228 			unsigned int w1;
229 			unsigned int w2;
230 		} by32;
231 		unsigned long long by64;
232 	} w = { .by64 = a };
233 	return ((unsigned long long)htonl(w.by32.w1) << 32) | htonl(w.by32.w2);
234 #endif
235 }
236 
237 /* Turns 64-bit value <a> from network byte order to host byte order. */
my_ntohll(unsigned long long a)238 static inline unsigned long long my_ntohll(unsigned long long a)
239 {
240 	return my_htonll(a);
241 }
242 
243 /* sets bit <bit> into map <map>, which must be long-aligned */
ha_bit_set(unsigned long bit,long * map)244 static inline void ha_bit_set(unsigned long bit, long *map)
245 {
246 	map[bit / (8 * sizeof(*map))] |= 1UL << (bit & (8 * sizeof(*map) - 1));
247 }
248 
249 /* clears bit <bit> from map <map>, which must be long-aligned */
ha_bit_clr(unsigned long bit,long * map)250 static inline void ha_bit_clr(unsigned long bit, long *map)
251 {
252 	map[bit / (8 * sizeof(*map))] &= ~(1UL << (bit & (8 * sizeof(*map) - 1)));
253 }
254 
255 /* flips bit <bit> from map <map>, which must be long-aligned */
ha_bit_flip(unsigned long bit,long * map)256 static inline void ha_bit_flip(unsigned long bit, long *map)
257 {
258 	map[bit / (8 * sizeof(*map))] ^= 1UL << (bit & (8 * sizeof(*map) - 1));
259 }
260 
261 /* returns non-zero if bit <bit> from map <map> is set, otherwise 0 */
ha_bit_test(unsigned long bit,const long * map)262 static inline int ha_bit_test(unsigned long bit, const long *map)
263 {
264 	return !!(map[bit / (8 * sizeof(*map))] & 1UL << (bit & (8 * sizeof(*map) - 1)));
265 }
266 
267 /* hash a 32-bit integer to another 32-bit integer. This code may be large when
268  * inlined, use full_hash() instead.
269  */
__full_hash(unsigned int a)270 static inline unsigned int __full_hash(unsigned int a)
271 {
272 	/* This function is one of Bob Jenkins' full avalanche hashing
273 	 * functions, which when provides quite a good distribution for little
274 	 * input variations. The result is quite suited to fit over a 32-bit
275 	 * space with enough variations so that a randomly picked number falls
276 	 * equally before any server position.
277 	 * Check http://burtleburtle.net/bob/hash/integer.html for more info.
278 	 */
279 	a = (a+0x7ed55d16) + (a<<12);
280 	a = (a^0xc761c23c) ^ (a>>19);
281 	a = (a+0x165667b1) + (a<<5);
282 	a = (a+0xd3a2646c) ^ (a<<9);
283 	a = (a+0xfd7046c5) + (a<<3);
284 	a = (a^0xb55a4f09) ^ (a>>16);
285 
286 	/* ensure values are better spread all around the tree by multiplying
287 	 * by a large prime close to 3/4 of the tree.
288 	 */
289 	return a * 3221225473U;
290 }
291 
292 /*
293  * Return integer equivalent of character <c> for a hex digit (0-9, a-f, A-F),
294  * otherwise -1. This compact form helps gcc produce efficient code.
295  */
hex2i(int c)296 static inline int hex2i(int c)
297 {
298 	if ((unsigned char)(c -= '0') > 9) {
299 		if ((unsigned char)(c -= 'A' - '0') > 5 &&
300 			      (unsigned char)(c -= 'a' - 'A') > 5)
301 			c = -11;
302 		c += 10;
303 	}
304 	return c;
305 }
306 
307 /* This one is 6 times faster than strtoul() on athlon, but does
308  * no check at all.
309  */
__str2ui(const char * s)310 static inline unsigned int __str2ui(const char *s)
311 {
312 	unsigned int i = 0;
313 	while (*s) {
314 		i = i * 10 - '0';
315 		i += (unsigned char)*s++;
316 	}
317 	return i;
318 }
319 
320 /* This one is 5 times faster than strtoul() on athlon with checks.
321  * It returns the value of the number composed of all valid digits read.
322  */
__str2uic(const char * s)323 static inline unsigned int __str2uic(const char *s)
324 {
325 	unsigned int i = 0;
326 	unsigned int j;
327 
328 	while (1) {
329 		j = (*s++) - '0';
330 		if (j > 9)
331 			break;
332 		i *= 10;
333 		i += j;
334 	}
335 	return i;
336 }
337 
338 /* This one is 28 times faster than strtoul() on athlon, but does
339  * no check at all!
340  */
__strl2ui(const char * s,int len)341 static inline unsigned int __strl2ui(const char *s, int len)
342 {
343 	unsigned int i = 0;
344 
345 	while (len-- > 0) {
346 		i = i * 10 - '0';
347 		i += (unsigned char)*s++;
348 	}
349 	return i;
350 }
351 
352 /* This one is 7 times faster than strtoul() on athlon with checks.
353  * It returns the value of the number composed of all valid digits read.
354  */
__strl2uic(const char * s,int len)355 static inline unsigned int __strl2uic(const char *s, int len)
356 {
357 	unsigned int i = 0;
358 	unsigned int j, k;
359 
360 	while (len-- > 0) {
361 		j = (*s++) - '0';
362 		k = i * 10;
363 		if (j > 9)
364 			break;
365 		i = k + j;
366 	}
367 	return i;
368 }
369 
370 /* This function reads an unsigned integer from the string pointed to by <s>
371  * and returns it. The <s> pointer is adjusted to point to the first unread
372  * char. The function automatically stops at <end>.
373  */
__read_uint(const char ** s,const char * end)374 static inline unsigned int __read_uint(const char **s, const char *end)
375 {
376 	const char *ptr = *s;
377 	unsigned int i = 0;
378 	unsigned int j, k;
379 
380 	while (ptr < end) {
381 		j = *ptr - '0';
382 		k = i * 10;
383 		if (j > 9)
384 			break;
385 		i = k + j;
386 		ptr++;
387 	}
388 	*s = ptr;
389 	return i;
390 }
391 
392 /* returns the number of bytes needed to encode <v> as a varint. Be careful, use
393  * it only with constants as it generates a large code (typ. 180 bytes). Use the
394  * varint_bytes() version instead in case of doubt.
395  */
__varint_bytes(uint64_t v)396 static inline int __varint_bytes(uint64_t v)
397 {
398 	switch (v) {
399 	case 0x0000000000000000 ... 0x00000000000000ef: return 1;
400 	case 0x00000000000000f0 ... 0x00000000000008ef: return 2;
401 	case 0x00000000000008f0 ... 0x00000000000408ef: return 3;
402 	case 0x00000000000408f0 ... 0x00000000020408ef: return 4;
403 	case 0x00000000020408f0 ... 0x00000001020408ef: return 5;
404 	case 0x00000001020408f0 ... 0x00000081020408ef: return 6;
405 	case 0x00000081020408f0 ... 0x00004081020408ef: return 7;
406 	case 0x00004081020408f0 ... 0x00204081020408ef: return 8;
407 	case 0x00204081020408f0 ... 0x10204081020408ef: return 9;
408 	default: return 10;
409 	}
410 }
411 
412 /* Encode the integer <i> into a varint (variable-length integer). The encoded
413  * value is copied in <*buf>. Here is the encoding format:
414  *
415  *        0 <= X < 240        : 1 byte  (7.875 bits)  [ XXXX XXXX ]
416  *      240 <= X < 2288       : 2 bytes (11 bits)     [ 1111 XXXX ] [ 0XXX XXXX ]
417  *     2288 <= X < 264432     : 3 bytes (18 bits)     [ 1111 XXXX ] [ 1XXX XXXX ]   [ 0XXX XXXX ]
418  *   264432 <= X < 33818864   : 4 bytes (25 bits)     [ 1111 XXXX ] [ 1XXX XXXX ]*2 [ 0XXX XXXX ]
419  * 33818864 <= X < 4328786160 : 5 bytes (32 bits)     [ 1111 XXXX ] [ 1XXX XXXX ]*3 [ 0XXX XXXX ]
420  * ...
421  *
422  * On success, it returns the number of written bytes and <*buf> is moved after
423  * the encoded value. Otherwise, it returns -1. */
encode_varint(uint64_t i,char ** buf,char * end)424 static inline int encode_varint(uint64_t i, char **buf, char *end)
425 {
426 	unsigned char *p = (unsigned char *)*buf;
427 	int r;
428 
429 	if (p >= (unsigned char *)end)
430 		return -1;
431 
432 	if (i < 240) {
433 		*p++ = i;
434 		*buf = (char *)p;
435 		return 1;
436 	}
437 
438 	*p++ = (unsigned char)i | 240;
439 	i = (i - 240) >> 4;
440 	while (i >= 128) {
441 		if (p >= (unsigned char *)end)
442 			return -1;
443 		*p++ = (unsigned char)i | 128;
444 		i = (i - 128) >> 7;
445 	}
446 
447 	if (p >= (unsigned char *)end)
448 		return -1;
449 	*p++ = (unsigned char)i;
450 
451 	r    = ((char *)p - *buf);
452 	*buf = (char *)p;
453 	return r;
454 }
455 
456 /* Decode a varint from <*buf> and save the decoded value in <*i>. See
457  * 'spoe_encode_varint' for details about varint.
458  * On success, it returns the number of read bytes and <*buf> is moved after the
459  * varint. Otherwise, it returns -1. */
decode_varint(char ** buf,char * end,uint64_t * i)460 static inline int decode_varint(char **buf, char *end, uint64_t *i)
461 {
462 	unsigned char *p = (unsigned char *)*buf;
463 	int r;
464 
465 	if (p >= (unsigned char *)end)
466 		return -1;
467 
468 	*i = *p++;
469 	if (*i < 240) {
470 		*buf = (char *)p;
471 		return 1;
472 	}
473 
474 	r = 4;
475 	do {
476 		if (p >= (unsigned char *)end)
477 			return -1;
478 		*i += (uint64_t)*p << r;
479 		r  += 7;
480 	} while (*p++ >= 128);
481 
482 	r    = ((char *)p - *buf);
483 	*buf = (char *)p;
484 	return r;
485 }
486 
487 #endif /* _HAPROXY_INTOPS_H */
488 
489 /*
490  * Local variables:
491  *  c-indent-level: 8
492  *  c-basic-offset: 8
493  * End:
494  */
495