1 /* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: expandtab:ts=8:sw=4:softtabstop=4:
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       fastpos.h
6 /// \brief      Kind of two-bit version of bit scan reverse
7 ///
8 //  Authors:    Igor Pavlov
9 //              Lasse Collin
10 //
11 //  This file has been put into the public domain.
12 //  You can do whatever you want with this file.
13 //
14 ///////////////////////////////////////////////////////////////////////////////
15 
16 #ifndef LZMA_FASTPOS_H
17 #define LZMA_FASTPOS_H
18 
19 // LZMA encodes match distances (positions) by storing the highest two
20 // bits using a six-bit value [0, 63], and then the missing lower bits.
21 // Dictionary size is also stored using this encoding in the new .lzma
22 // file format header.
23 //
24 // fastpos.h provides a way to quickly find out the correct six-bit
25 // values. The following table gives some examples of this encoding:
26 //
27 //      pos   return
28 //       0       0
29 //       1       1
30 //       2       2
31 //       3       3
32 //       4       4
33 //       5       4
34 //       6       5
35 //       7       5
36 //       8       6
37 //      11       6
38 //      12       7
39 //     ...      ...
40 //      15       7
41 //      16       8
42 //      17       8
43 //     ...      ...
44 //      23       8
45 //      24       9
46 //      25       9
47 //     ...      ...
48 //
49 //
50 // Provided functions or macros
51 // ----------------------------
52 //
53 // get_pos_slot(pos) is the basic version. get_pos_slot_2(pos)
54 // assumes that pos >= FULL_DISTANCES, thus the result is at least
55 // FULL_DISTANCES_BITS * 2. Using get_pos_slot(pos) instead of
56 // get_pos_slot_2(pos) would give the same result, but get_pos_slot_2(pos)
57 // should be tiny bit faster due to the assumption being made.
58 //
59 //
60 // Size vs. speed
61 // --------------
62 //
63 // With some CPUs that have fast BSR (bit scan reverse) instruction, the
64 // size optimized version is slightly faster than the bigger table based
65 // approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III
66 // and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that
67 // would still have speed roughly comparable to the table version. Older
68 // x86 CPUs like the original Pentium have very slow BSR; on those systems
69 // the table version is a lot faster.
70 //
71 // On some CPUs, the table version is a lot faster when using position
72 // dependent code, but with position independent code the size optimized
73 // version is slightly faster. This occurs at least on 32-bit SPARC (no
74 // ASM optimizations).
75 //
76 // I'm making the table version the default, because that has good speed
77 // on all systems I have tried. The size optimized version is sometimes
78 // slightly faster, but sometimes it is a lot slower.
79 
80 #ifdef HAVE_SMALL
81 #	include "bsr.h"
82 
83 #	define get_pos_slot(pos) ((pos) <= 4 ? (pos) : get_pos_slot_2(pos))
84 
85 static inline uint32_t
get_pos_slot_2(uint32_t pos)86 get_pos_slot_2(uint32_t pos)
87 {
88 	uint32_t i;
89 	lzma_bsr(i, pos);
90 	return (i + i) + ((pos >> (i - 1)) & 1);
91 }
92 
93 
94 #else
95 
96 #define FASTPOS_BITS 13
97 
98 extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];
99 
100 
101 #define fastpos_shift(extra, n) \
102 	((extra) + (n) * (FASTPOS_BITS - 1))
103 
104 #define fastpos_limit(extra, n) \
105 	(UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))
106 
107 #define fastpos_result(pos, extra, n) \
108 	lzma_fastpos[(pos) >> fastpos_shift(extra, n)] \
109 			+ 2 * fastpos_shift(extra, n)
110 
111 
112 static inline uint32_t
get_pos_slot(uint32_t pos)113 get_pos_slot(uint32_t pos)
114 {
115 	// If it is small enough, we can pick the result directly from
116 	// the precalculated table.
117 	if (pos < fastpos_limit(0, 0))
118 		return lzma_fastpos[pos];
119 
120 	if (pos < fastpos_limit(0, 1))
121 		return fastpos_result(pos, 0, 1);
122 
123 	return fastpos_result(pos, 0, 2);
124 }
125 
126 
127 #ifdef FULL_DISTANCES_BITS
128 static inline uint32_t
get_pos_slot_2(uint32_t pos)129 get_pos_slot_2(uint32_t pos)
130 {
131 	assert(pos >= FULL_DISTANCES);
132 
133 	if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
134 		return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 0);
135 
136 	if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
137 		return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 1);
138 
139 	return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 2);
140 }
141 #endif
142 
143 #endif
144 
145 #endif
146