1 #ifndef SOURCETOOLS_UTF8_UTF8_H 2 #define SOURCETOOLS_UTF8_UTF8_H 3 4 #include <cstddef> 5 6 #include <sourcetools/core/core.h> 7 8 namespace sourcetools { 9 namespace utf8 { 10 11 namespace detail { 12 static const unsigned char mask[] = { 13 0, // 00000000 14 0x7F, // 01111111 15 0x1F, // 00011111 16 0x0F, // 00001111 17 0x07, // 00000111 18 0x03, // 00000011 19 0x01 // 00000001 20 }; 21 } // namespace detail 22 23 class iterator 24 { 25 public: iterator(const char * data)26 iterator(const char* data) 27 : data_(reinterpret_cast<const unsigned char*>(data)), 28 offset_(0) 29 { 30 } 31 iterator(const iterator & other)32 iterator(const iterator& other) 33 : data_(other.data_), 34 offset_(other.offset_) 35 { 36 } 37 38 wchar_t operator*() 39 { 40 std::size_t n = size(); 41 if (n == 0 || n > 6) 42 return -1; 43 44 const unsigned char* it = data_ + offset_; 45 wchar_t ch = (*it++) & detail::mask[n]; 46 for (std::size_t i = 1; i < n; ++i) 47 { 48 ch <<= 6; 49 ch |= (*it++) & 0x3F; 50 } 51 52 return ch; 53 } 54 55 iterator& operator++() 56 { 57 offset_ += size(); 58 return *this; 59 } 60 61 iterator operator++(int) 62 { 63 iterator copy(*this); 64 operator++(); 65 return copy; 66 } 67 68 bool operator==(const iterator& it) 69 { 70 return 71 data_ + offset_ == 72 it.data_ + it.offset_; 73 } 74 75 bool operator!=(const iterator& it) 76 { 77 return 78 data_ + offset_ != 79 it.data_ + it.offset_; 80 } 81 82 private: 83 size()84 int size() 85 { 86 unsigned char ch = data_[offset_]; 87 if (ch == 0) 88 return 0; 89 else if (ch < 192) 90 return 1; 91 else if (ch < 224) 92 return 2; 93 else if (ch < 240) 94 return 3; 95 else if (ch < 248) 96 return 4; 97 else if (ch < 252) 98 return 5; 99 else if (ch < 254) 100 return 6; 101 102 // TODO: on error? 103 return 1; 104 } 105 106 private: 107 108 const unsigned char* data_; 109 std::size_t offset_; 110 }; 111 112 } // namespace utf8 113 } // namespace sourcetools 114 115 #endif /* SOURCETOOLS_UTF8_UTF8_H */ 116