1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 //
5 // Endian-neutral encoding:
6 // * Fixed-length numbers are encoded with least-significant byte first
7 // * In addition we support variable length "varint" encoding
8 // * Strings are encoded prefixed by their length in varint format
9 
10 #ifndef STORAGE_LEVELDB_UTIL_CODING_H_
11 #define STORAGE_LEVELDB_UTIL_CODING_H_
12 
13 #include <cstdint>
14 #include <cstring>
15 #include <string>
16 
17 #include "leveldb/slice.h"
18 #include "port/port.h"
19 
20 namespace leveldb {
21 
22 // Standard Put... routines append to a string
23 void PutFixed32(std::string* dst, uint32_t value);
24 void PutFixed64(std::string* dst, uint64_t value);
25 void PutVarint32(std::string* dst, uint32_t value);
26 void PutVarint64(std::string* dst, uint64_t value);
27 void PutLengthPrefixedSlice(std::string* dst, const Slice& value);
28 
29 // Standard Get... routines parse a value from the beginning of a Slice
30 // and advance the slice past the parsed value.
31 bool GetVarint32(Slice* input, uint32_t* value);
32 bool GetVarint64(Slice* input, uint64_t* value);
33 bool GetLengthPrefixedSlice(Slice* input, Slice* result);
34 
35 // Pointer-based variants of GetVarint...  These either store a value
36 // in *v and return a pointer just past the parsed value, or return
37 // nullptr on error.  These routines only look at bytes in the range
38 // [p..limit-1]
39 const char* GetVarint32Ptr(const char* p, const char* limit, uint32_t* v);
40 const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* v);
41 
42 // Returns the length of the varint32 or varint64 encoding of "v"
43 int VarintLength(uint64_t v);
44 
45 // Lower-level versions of Put... that write directly into a character buffer
46 // and return a pointer just past the last byte written.
47 // REQUIRES: dst has enough space for the value being written
48 char* EncodeVarint32(char* dst, uint32_t value);
49 char* EncodeVarint64(char* dst, uint64_t value);
50 
51 // TODO(costan): Remove port::kLittleEndian and the fast paths based on
52 //               std::memcpy when clang learns to optimize the generic code, as
53 //               described in https://bugs.llvm.org/show_bug.cgi?id=41761
54 //
55 // The platform-independent code in DecodeFixed{32,64}() gets optimized to mov
56 // on x86 and ldr on ARM64, by both clang and gcc. However, only gcc optimizes
57 // the platform-independent code in EncodeFixed{32,64}() to mov / str.
58 
59 // Lower-level versions of Put... that write directly into a character buffer
60 // REQUIRES: dst has enough space for the value being written
61 
EncodeFixed32(char * dst,uint32_t value)62 inline void EncodeFixed32(char* dst, uint32_t value) {
63   uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
64 
65   if (port::kLittleEndian) {
66     // Fast path for little-endian CPUs. All major compilers optimize this to a
67     // single mov (x86_64) / str (ARM) instruction.
68     std::memcpy(buffer, &value, sizeof(uint32_t));
69     return;
70   }
71 
72   // Platform-independent code.
73   // Currently, only gcc optimizes this to a single mov / str instruction.
74   buffer[0] = static_cast<uint8_t>(value);
75   buffer[1] = static_cast<uint8_t>(value >> 8);
76   buffer[2] = static_cast<uint8_t>(value >> 16);
77   buffer[3] = static_cast<uint8_t>(value >> 24);
78 }
79 
EncodeFixed64(char * dst,uint64_t value)80 inline void EncodeFixed64(char* dst, uint64_t value) {
81   uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
82 
83   if (port::kLittleEndian) {
84     // Fast path for little-endian CPUs. All major compilers optimize this to a
85     // single mov (x86_64) / str (ARM) instruction.
86     std::memcpy(buffer, &value, sizeof(uint64_t));
87     return;
88   }
89 
90   // Platform-independent code.
91   // Currently, only gcc optimizes this to a single mov / str instruction.
92   buffer[0] = static_cast<uint8_t>(value);
93   buffer[1] = static_cast<uint8_t>(value >> 8);
94   buffer[2] = static_cast<uint8_t>(value >> 16);
95   buffer[3] = static_cast<uint8_t>(value >> 24);
96   buffer[4] = static_cast<uint8_t>(value >> 32);
97   buffer[5] = static_cast<uint8_t>(value >> 40);
98   buffer[6] = static_cast<uint8_t>(value >> 48);
99   buffer[7] = static_cast<uint8_t>(value >> 56);
100 }
101 
102 // Lower-level versions of Get... that read directly from a character buffer
103 // without any bounds checking.
104 
DecodeFixed32(const char * ptr)105 inline uint32_t DecodeFixed32(const char* ptr) {
106   const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
107 
108   if (port::kLittleEndian) {
109     // Fast path for little-endian CPUs. All major compilers optimize this to a
110     // single mov (x86_64) / ldr (ARM) instruction.
111     uint32_t result;
112     std::memcpy(&result, buffer, sizeof(uint32_t));
113     return result;
114   }
115 
116   // Platform-independent code.
117   // Clang and gcc optimize this to a single mov / ldr instruction.
118   return (static_cast<uint32_t>(buffer[0])) |
119          (static_cast<uint32_t>(buffer[1]) << 8) |
120          (static_cast<uint32_t>(buffer[2]) << 16) |
121          (static_cast<uint32_t>(buffer[3]) << 24);
122 }
123 
DecodeFixed64(const char * ptr)124 inline uint64_t DecodeFixed64(const char* ptr) {
125   const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
126 
127   if (port::kLittleEndian) {
128     // Fast path for little-endian CPUs. All major compilers optimize this to a
129     // single mov (x86_64) / ldr (ARM) instruction.
130     uint64_t result;
131     std::memcpy(&result, buffer, sizeof(uint64_t));
132     return result;
133   }
134 
135   // Platform-independent code.
136   // Clang and gcc optimize this to a single mov / ldr instruction.
137   return (static_cast<uint64_t>(buffer[0])) |
138          (static_cast<uint64_t>(buffer[1]) << 8) |
139          (static_cast<uint64_t>(buffer[2]) << 16) |
140          (static_cast<uint64_t>(buffer[3]) << 24) |
141          (static_cast<uint64_t>(buffer[4]) << 32) |
142          (static_cast<uint64_t>(buffer[5]) << 40) |
143          (static_cast<uint64_t>(buffer[6]) << 48) |
144          (static_cast<uint64_t>(buffer[7]) << 56);
145 }
146 
147 // Internal routine for use by fallback path of GetVarint32Ptr
148 const char* GetVarint32PtrFallback(const char* p, const char* limit,
149                                    uint32_t* value);
GetVarint32Ptr(const char * p,const char * limit,uint32_t * value)150 inline const char* GetVarint32Ptr(const char* p, const char* limit,
151                                   uint32_t* value) {
152   if (p < limit) {
153     uint32_t result = *(reinterpret_cast<const uint8_t*>(p));
154     if ((result & 128) == 0) {
155       *value = result;
156       return p + 1;
157     }
158   }
159   return GetVarint32PtrFallback(p, limit, value);
160 }
161 
162 }  // namespace leveldb
163 
164 #endif  // STORAGE_LEVELDB_UTIL_CODING_H_
165