1 // Copyright 2008 Google Inc. All Rights Reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 //
29 // Internals shared between the Snappy implementation and its unittest.
30 
31 #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
32 #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
33 
34 #include "snappy-stubs-internal.h"
35 
36 namespace snappy {
37 namespace internal {
38 
39 // Working memory performs a single allocation to hold all scratch space
40 // required for compression.
41 class WorkingMemory {
42  public:
43   explicit WorkingMemory(size_t input_size);
44   ~WorkingMemory();
45 
46   // Allocates and clears a hash table using memory in "*this",
47   // stores the number of buckets in "*table_size" and returns a pointer to
48   // the base of the hash table.
49   uint16* GetHashTable(size_t fragment_size, int* table_size) const;
GetScratchInput()50   char* GetScratchInput() const { return input_; }
GetScratchOutput()51   char* GetScratchOutput() const { return output_; }
52 
53  private:
54   char* mem_;      // the allocated memory, never nullptr
55   size_t size_;    // the size of the allocated memory, never 0
56   uint16* table_;  // the pointer to the hashtable
57   char* input_;    // the pointer to the input scratch buffer
58   char* output_;   // the pointer to the output scratch buffer
59 
60   // No copying
61   WorkingMemory(const WorkingMemory&);
62   void operator=(const WorkingMemory&);
63 };
64 
65 // Flat array compression that does not emit the "uncompressed length"
66 // prefix. Compresses "input" string to the "*op" buffer.
67 //
68 // REQUIRES: "input_length <= kBlockSize"
69 // REQUIRES: "op" points to an array of memory that is at least
70 // "MaxCompressedLength(input_length)" in size.
71 // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
72 // REQUIRES: "table_size" is a power of two
73 //
74 // Returns an "end" pointer into "op" buffer.
75 // "end - op" is the compressed size of "input".
76 char* CompressFragment(const char* input,
77                        size_t input_length,
78                        char* op,
79                        uint16* table,
80                        const int table_size);
81 
82 // Find the largest n such that
83 //
84 //   s1[0,n-1] == s2[0,n-1]
85 //   and n <= (s2_limit - s2).
86 //
87 // Return make_pair(n, n < 8).
88 // Does not read *s2_limit or beyond.
89 // Does not read *(s1 + (s2_limit - s2)) or beyond.
90 // Requires that s2_limit >= s2.
91 //
92 // Separate implementation for 64-bit, little-endian cpus.
93 #if !defined(SNAPPY_IS_BIG_ENDIAN) && \
94     (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM))
FindMatchLength(const char * s1,const char * s2,const char * s2_limit)95 static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
96                                                       const char* s2,
97                                                       const char* s2_limit) {
98   assert(s2_limit >= s2);
99   size_t matched = 0;
100 
101   // This block isn't necessary for correctness; we could just start looping
102   // immediately.  As an optimization though, it is useful.  It creates some not
103   // uncommon code paths that determine, without extra effort, whether the match
104   // length is less than 8.  In short, we are hoping to avoid a conditional
105   // branch, and perhaps get better code layout from the C++ compiler.
106   if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
107     uint64 a1 = UNALIGNED_LOAD64(s1);
108     uint64 a2 = UNALIGNED_LOAD64(s2);
109     if (a1 != a2) {
110       return std::pair<size_t, bool>(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3,
111                                      true);
112     } else {
113       matched = 8;
114       s2 += 8;
115     }
116   }
117 
118   // Find out how long the match is. We loop over the data 64 bits at a
119   // time until we find a 64-bit block that doesn't match; then we find
120   // the first non-matching bit and use that to calculate the total
121   // length of the match.
122   while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
123     if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
124       s2 += 8;
125       matched += 8;
126     } else {
127       uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
128       int matching_bits = Bits::FindLSBSetNonZero64(x);
129       matched += matching_bits >> 3;
130       assert(matched >= 8);
131       return std::pair<size_t, bool>(matched, false);
132     }
133   }
134   while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) {
135     if (s1[matched] == *s2) {
136       ++s2;
137       ++matched;
138     } else {
139       return std::pair<size_t, bool>(matched, matched < 8);
140     }
141   }
142   return std::pair<size_t, bool>(matched, matched < 8);
143 }
144 #else
FindMatchLength(const char * s1,const char * s2,const char * s2_limit)145 static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
146                                                       const char* s2,
147                                                       const char* s2_limit) {
148   // Implementation based on the x86-64 version, above.
149   assert(s2_limit >= s2);
150   int matched = 0;
151 
152   while (s2 <= s2_limit - 4 &&
153          UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
154     s2 += 4;
155     matched += 4;
156   }
157   if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) {
158     uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
159     int matching_bits = Bits::FindLSBSetNonZero(x);
160     matched += matching_bits >> 3;
161   } else {
162     while ((s2 < s2_limit) && (s1[matched] == *s2)) {
163       ++s2;
164       ++matched;
165     }
166   }
167   return std::pair<size_t, bool>(matched, matched < 8);
168 }
169 #endif
170 
171 // Lookup tables for decompression code.  Give --snappy_dump_decompression_table
172 // to the unit test to recompute char_table.
173 
174 enum {
175   LITERAL = 0,
176   COPY_1_BYTE_OFFSET = 1,  // 3 bit length + 3 bits of offset in opcode
177   COPY_2_BYTE_OFFSET = 2,
178   COPY_4_BYTE_OFFSET = 3
179 };
180 static const int kMaximumTagLength = 5;  // COPY_4_BYTE_OFFSET plus the actual offset.
181 
182 // Data stored per entry in lookup table:
183 //      Range   Bits-used       Description
184 //      ------------------------------------
185 //      1..64   0..7            Literal/copy length encoded in opcode byte
186 //      0..7    8..10           Copy offset encoded in opcode byte / 256
187 //      0..4    11..13          Extra bytes after opcode
188 //
189 // We use eight bits for the length even though 7 would have sufficed
190 // because of efficiency reasons:
191 //      (1) Extracting a byte is faster than a bit-field
192 //      (2) It properly aligns copy offset so we do not need a <<8
193 static const uint16 char_table[256] = {
194   0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
195   0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
196   0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
197   0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
198   0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
199   0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
200   0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
201   0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
202   0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
203   0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
204   0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
205   0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
206   0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
207   0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
208   0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
209   0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
210   0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
211   0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
212   0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
213   0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
214   0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
215   0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
216   0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
217   0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
218   0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
219   0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
220   0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
221   0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
222   0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
223   0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
224   0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
225   0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
226 };
227 
228 }  // end namespace internal
229 }  // end namespace snappy
230 
231 #endif  // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
232