1 #ifndef LM_INTERPOLATE_BOUNDED_SEQUENCE_ENCODING_H 2 #define LM_INTERPOLATE_BOUNDED_SEQUENCE_ENCODING_H 3 4 /* Encodes fixed-length sequences of integers with known bounds on each entry. 5 * This is used to encode how far each model has backed off. 6 * TODO: make this class efficient. Bit-level packing or multiply by bound and 7 * add. 8 */ 9 10 #include "util/exception.hh" 11 #include "util/fixed_array.hh" 12 13 #include <algorithm> 14 #include <cstring> 15 16 namespace lm { 17 namespace interpolate { 18 19 class BoundedSequenceEncoding { 20 public: 21 // Encode [0, bound_begin[0]) x [0, bound_begin[1]) x [0, bound_begin[2]) x ... x [0, *(bound_end - 1)) for entries in the sequence 22 BoundedSequenceEncoding(const unsigned char *bound_begin, const unsigned char *bound_end); 23 Entries() const24 std::size_t Entries() const { return entries_.size(); } 25 EncodedLength() const26 std::size_t EncodedLength() const { return byte_length_; } 27 Encode(const unsigned char * from,void * to_void) const28 void Encode(const unsigned char *from, void *to_void) const { 29 uint8_t *to = static_cast<uint8_t*>(to_void); 30 uint64_t cur = 0; 31 for (const Entry *i = entries_.begin(); i != entries_.end(); ++i, ++from) { 32 if (UTIL_UNLIKELY(i->next)) { 33 std::memcpy(to, &cur, sizeof(uint64_t)); 34 to += sizeof(uint64_t); 35 cur = 0; 36 } 37 cur |= static_cast<uint64_t>(*from) << i->shift; 38 } 39 #if BYTE_ORDER == BIG_ENDIAN 40 cur <<= (8 - overhang_) * 8; 41 #endif 42 memcpy(to, &cur, overhang_); 43 } 44 Decode(const void * from_void,unsigned char * to) const45 void Decode(const void *from_void, unsigned char *to) const { 46 const uint8_t *from = static_cast<const uint8_t*>(from_void); 47 uint64_t cur = 0; 48 memcpy(&cur, from, first_copy_); 49 #if BYTE_ORDER == BIG_ENDIAN 50 cur >>= (8 - first_copy_) * 8; 51 #endif 52 for (const Entry *i = entries_.begin(); i != entries_.end(); ++i, ++to) { 53 if (UTIL_UNLIKELY(i->next)) { 54 from += sizeof(uint64_t); 55 cur = 0; 56 std::memcpy(&cur, from, 57 std::min<std::size_t>(sizeof(uint64_t), static_cast<const uint8_t*>(from_void) + byte_length_ - from)); 58 #if BYTE_ORDER == BIG_ENDIAN 59 cur >>= (8 - (static_cast<const uint8_t*>(from_void) + byte_length_ - from)) * 8; 60 #endif 61 } 62 *to = (cur >> i->shift) & i->mask; 63 } 64 } 65 66 private: 67 struct Entry { 68 bool next; 69 uint8_t shift; 70 uint64_t mask; 71 }; 72 util::FixedArray<Entry> entries_; 73 std::size_t byte_length_; 74 std::size_t first_copy_; 75 std::size_t overhang_; 76 }; 77 78 79 }} // namespaces 80 81 #endif // LM_INTERPOLATE_BOUNDED_SEQUENCE_ENCODING_H 82