1 #pragma once 2 3 #include <string> 4 5 #include <locale.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <wchar.h> 10 11 #ifdef USE_PINYINTABLE_DATA // load pinyin table from external file 12 #include <fstream> 13 #else 14 #include "PinYinTable.h" 15 #endif 16 17 namespace scx { 18 19 class PinYinCompare 20 { 21 private: 22 template<size_t S, typename T = void> 23 struct Key; 24 25 template<typename T> 26 struct Key<2, T> 27 { 28 using type = unsigned short; 29 }; 30 31 template<typename T> 32 struct Key<4, T> 33 { 34 using type = unsigned int; 35 }; 36 37 using key_t = Key<sizeof(wchar_t)>::type; 38 39 public: 40 #ifdef USE_PINYINTABLE_DATA 41 bool Init(const std::string& path) 42 { 43 std::fstream file; 44 file.open(path.c_str(), std::ios::in); 45 if (!file) 46 return false; 47 std::string line; 48 for (m_count = 0; std::getline(file, line) && m_count < TABLE_SIZE; ++m_count) { 49 if (line.size() <= 5) 50 continue; 51 key_t key = ::strtol(line.substr(0, 4).c_str(), nullptr, 16); 52 size_t end = line.find('\t', 5); 53 m_table[key] = line.substr(5, end == std::string::npos ? std::string::npos : end - 5); 54 } 55 file.close(); 56 return (m_count != 0); 57 } 58 59 size_t Count() const { return m_count; } 60 #endif 61 /* compare only first character */ 62 bool CmpUtf8FirstChar(const std::string& a, const std::string& b) const 63 { 64 wchar_t buf1[2]; 65 wchar_t buf2[2]; 66 size_t ret1 = ::mbstowcs(buf1, a.c_str(), 1); 67 size_t ret2 = ::mbstowcs(buf2, b.c_str(), 1); 68 69 if (ret1 == (size_t)-1 || ret2 == (size_t)-1) 70 return ::strcoll(a.c_str(), b.c_str()) < 0; 71 72 key_t key1 = static_cast<key_t>(buf1[0]); 73 key_t key2 = static_cast<key_t>(buf2[0]); 74 bool hit1 = key1 < TABLE_SIZE; 75 bool hit2 = key2 < TABLE_SIZE; 76 const auto& snd1 = hit1 ? m_table[key1] : ""; 77 const auto& snd2 = hit2 ? m_table[key2] : ""; 78 79 if (!hit1 && hit2) 80 return true; 81 else if (hit1 && !hit2) 82 return false; 83 else if (!hit1 && !hit2) 84 return ::strcoll(a.c_str(), b.c_str()) < 0; 85 else 86 #ifdef USE_PINYINTABLE_DATA 87 return ::strcoll(snd1.c_str(), snd2.c_str()) < 0; 88 #else 89 return ::strcoll(snd1, snd2) < 0; 90 #endif 91 } 92 93 bool CmpUtf8(const std::string& a, const std::string& b) const 94 { 95 const char* bytes1 = a.data(); 96 const char* bytes2 = b.data(); 97 int nbytes1 = a.size(); 98 int nbytes2 = b.size(); 99 100 wchar_t buf1[2]; 101 wchar_t buf2[2]; 102 103 while (true) { 104 if (nbytes1 <= 0 || nbytes2 <= 0) 105 return nbytes1 < nbytes2; 106 107 int ret1 = ::mbtowc(buf1, bytes1, MB_CUR_MAX); 108 if (ret1 < 0) 109 ::mbtowc(nullptr, nullptr, 0); 110 int ret2 = ::mbtowc(buf2, bytes2, MB_CUR_MAX); 111 if (ret2 < 0) 112 ::mbtowc(nullptr, nullptr, 0); 113 114 if (ret1 <= 0 || ret2 <= 0) { 115 break; 116 } else if (ret1 == 1 && ret2 == 1) { 117 if (bytes1[0] != bytes2[0]) 118 return bytes1[0] < bytes2[0]; 119 } else if (ret1 == 1 && ret2 > 1) { 120 return true; 121 } else if (ret1 > 1 && ret1 == 1) { 122 return false; 123 } 124 125 key_t key1 = static_cast<key_t>(buf1[0]); 126 key_t key2 = static_cast<key_t>(buf2[0]); 127 128 bool hit1 = key1 < TABLE_SIZE; 129 bool hit2 = key2 < TABLE_SIZE; 130 if (!hit1 && hit2) 131 return true; 132 else if (hit1 && !hit2) 133 return false; 134 else if (!hit1 && !hit2) 135 break; 136 137 const auto& snd1 = m_table[key1]; 138 const auto& snd2 = m_table[key2]; 139 #ifdef USE_PINYINTABLE_DATA 140 int ret = ::strcoll(snd1.c_str(), snd2.c_str()); 141 #else 142 int ret = ::strcoll(snd1, snd2); 143 #endif 144 if (ret != 0) 145 return ret < 0; 146 147 bytes1 += ret1; 148 nbytes1 -= ret1; 149 bytes2 += ret2; 150 nbytes2 -= ret2; 151 } 152 153 return ::strcoll(bytes1, bytes2) < 0; 154 } 155 156 private: 157 #ifdef USE_PINYINTABLE_DATA 158 constexpr static const size_t TABLE_SIZE = 0xff00; 159 std::string m_table[TABLE_SIZE]; 160 size_t m_count = 0; 161 #else 162 constexpr static const size_t TABLE_SIZE = scx::PinTableTableSize; 163 const char* const* m_table = scx::PinYinTable; 164 #endif 165 }; 166 } 167