1 #ifndef LM_INTERPOLATE_BOUNDED_SEQUENCE_ENCODING_H
2 #define LM_INTERPOLATE_BOUNDED_SEQUENCE_ENCODING_H
3 
4 /* Encodes fixed-length sequences of integers with known bounds on each entry.
5  * This is used to encode how far each model has backed off.
6  * TODO: make this class efficient.  Bit-level packing or multiply by bound and
7  * add.
8  */
9 
10 #include "util/exception.hh"
11 #include "util/fixed_array.hh"
12 
13 #include <algorithm>
14 #include <cstring>
15 
16 namespace lm {
17 namespace interpolate {
18 
19 class BoundedSequenceEncoding {
20   public:
21     // Encode [0, bound_begin[0]) x [0, bound_begin[1]) x [0, bound_begin[2]) x ... x [0, *(bound_end - 1)) for entries in the sequence
22     BoundedSequenceEncoding(const unsigned char *bound_begin, const unsigned char *bound_end);
23 
Entries() const24     std::size_t Entries() const { return entries_.size(); }
25 
EncodedLength() const26     std::size_t EncodedLength() const { return byte_length_; }
27 
Encode(const unsigned char * from,void * to_void) const28     void Encode(const unsigned char *from, void *to_void) const {
29       uint8_t *to = static_cast<uint8_t*>(to_void);
30       uint64_t cur = 0;
31       for (const Entry *i = entries_.begin(); i != entries_.end(); ++i, ++from) {
32         if (UTIL_UNLIKELY(i->next)) {
33           std::memcpy(to, &cur, sizeof(uint64_t));
34           to += sizeof(uint64_t);
35           cur = 0;
36         }
37         cur |= static_cast<uint64_t>(*from) << i->shift;
38       }
39 #if BYTE_ORDER == BIG_ENDIAN
40       cur <<= (8 - overhang_) * 8;
41 #endif
42       memcpy(to, &cur, overhang_);
43     }
44 
Decode(const void * from_void,unsigned char * to) const45     void Decode(const void *from_void, unsigned char *to) const {
46       const uint8_t *from = static_cast<const uint8_t*>(from_void);
47       uint64_t cur = 0;
48       memcpy(&cur, from, first_copy_);
49 #if BYTE_ORDER == BIG_ENDIAN
50       cur >>= (8 - first_copy_) * 8;
51 #endif
52       for (const Entry *i = entries_.begin(); i != entries_.end(); ++i, ++to) {
53         if (UTIL_UNLIKELY(i->next)) {
54           from += sizeof(uint64_t);
55           cur = 0;
56           std::memcpy(&cur, from,
57               std::min<std::size_t>(sizeof(uint64_t), static_cast<const uint8_t*>(from_void) + byte_length_ - from));
58 #if BYTE_ORDER == BIG_ENDIAN
59           cur >>= (8 - (static_cast<const uint8_t*>(from_void) + byte_length_ - from)) * 8;
60 #endif
61         }
62         *to = (cur >> i->shift) & i->mask;
63       }
64     }
65 
66   private:
67     struct Entry {
68       bool next;
69       uint8_t shift;
70       uint64_t mask;
71     };
72     util::FixedArray<Entry> entries_;
73     std::size_t byte_length_;
74     std::size_t first_copy_;
75     std::size_t overhang_;
76 };
77 
78 
79 }} // namespaces
80 
81 #endif // LM_INTERPOLATE_BOUNDED_SEQUENCE_ENCODING_H
82