1 #pragma once
2 
3 #include <string>
4 
5 #include <locale.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <wchar.h>
10 
11 #ifdef USE_PINYINTABLE_DATA // load pinyin table from external file
12 #include <fstream>
13 #else
14 #include "PinYinTable.h"
15 #endif
16 
17 namespace scx {
18 
19 class PinYinCompare
20 {
21   private:
22     template<size_t S, typename T = void>
23     struct Key;
24 
25     template<typename T>
26     struct Key<2, T>
27     {
28         using type = unsigned short;
29     };
30 
31     template<typename T>
32     struct Key<4, T>
33     {
34         using type = unsigned int;
35     };
36 
37     using key_t = Key<sizeof(wchar_t)>::type;
38 
39   public:
40 #ifdef USE_PINYINTABLE_DATA
41     bool Init(const std::string& path)
42     {
43         std::fstream file;
44         file.open(path.c_str(), std::ios::in);
45         if (!file)
46             return false;
47         std::string line;
48         for (m_count = 0; std::getline(file, line) && m_count < TABLE_SIZE; ++m_count) {
49             if (line.size() <= 5)
50                 continue;
51             key_t key = ::strtol(line.substr(0, 4).c_str(), nullptr, 16);
52             size_t end = line.find('\t', 5);
53             m_table[key] = line.substr(5, end == std::string::npos ? std::string::npos : end - 5);
54         }
55         file.close();
56         return (m_count != 0);
57     }
58 
59     size_t Count() const { return m_count; }
60 #endif
61     /* compare only first character */
62     bool CmpUtf8FirstChar(const std::string& a, const std::string& b) const
63     {
64         wchar_t buf1[2];
65         wchar_t buf2[2];
66         size_t ret1 = ::mbstowcs(buf1, a.c_str(), 1);
67         size_t ret2 = ::mbstowcs(buf2, b.c_str(), 1);
68 
69         if (ret1 == (size_t)-1 || ret2 == (size_t)-1)
70             return ::strcoll(a.c_str(), b.c_str()) < 0;
71 
72         key_t key1 = static_cast<key_t>(buf1[0]);
73         key_t key2 = static_cast<key_t>(buf2[0]);
74         bool hit1 = key1 < TABLE_SIZE;
75         bool hit2 = key2 < TABLE_SIZE;
76         const auto& snd1 = hit1 ? m_table[key1] : "";
77         const auto& snd2 = hit2 ? m_table[key2] : "";
78 
79         if (!hit1 && hit2)
80             return true;
81         else if (hit1 && !hit2)
82             return false;
83         else if (!hit1 && !hit2)
84             return ::strcoll(a.c_str(), b.c_str()) < 0;
85         else
86 #ifdef USE_PINYINTABLE_DATA
87             return ::strcoll(snd1.c_str(), snd2.c_str()) < 0;
88 #else
89             return ::strcoll(snd1, snd2) < 0;
90 #endif
91     }
92 
93     bool CmpUtf8(const std::string& a, const std::string& b) const
94     {
95         const char* bytes1 = a.data();
96         const char* bytes2 = b.data();
97         int nbytes1 = a.size();
98         int nbytes2 = b.size();
99 
100         wchar_t buf1[2];
101         wchar_t buf2[2];
102 
103         while (true) {
104             if (nbytes1 <= 0 || nbytes2 <= 0)
105                 return nbytes1 < nbytes2;
106 
107             int ret1 = ::mbtowc(buf1, bytes1, MB_CUR_MAX);
108             if (ret1 < 0)
109                 ::mbtowc(nullptr, nullptr, 0);
110             int ret2 = ::mbtowc(buf2, bytes2, MB_CUR_MAX);
111             if (ret2 < 0)
112                 ::mbtowc(nullptr, nullptr, 0);
113 
114             if (ret1 <= 0 || ret2 <= 0) {
115                 break;
116             } else if (ret1 == 1 && ret2 == 1) {
117                 if (bytes1[0] != bytes2[0])
118                     return bytes1[0] < bytes2[0];
119             } else if (ret1 == 1 && ret2 > 1) {
120                 return true;
121             } else if (ret1 > 1 && ret1 == 1) {
122                 return false;
123             }
124 
125             key_t key1 = static_cast<key_t>(buf1[0]);
126             key_t key2 = static_cast<key_t>(buf2[0]);
127 
128             bool hit1 = key1 < TABLE_SIZE;
129             bool hit2 = key2 < TABLE_SIZE;
130             if (!hit1 && hit2)
131                 return true;
132             else if (hit1 && !hit2)
133                 return false;
134             else if (!hit1 && !hit2)
135                 break;
136 
137             const auto& snd1 = m_table[key1];
138             const auto& snd2 = m_table[key2];
139 #ifdef USE_PINYINTABLE_DATA
140             int ret = ::strcoll(snd1.c_str(), snd2.c_str());
141 #else
142             int ret = ::strcoll(snd1, snd2);
143 #endif
144             if (ret != 0)
145                 return ret < 0;
146 
147             bytes1 += ret1;
148             nbytes1 -= ret1;
149             bytes2 += ret2;
150             nbytes2 -= ret2;
151         }
152 
153         return ::strcoll(bytes1, bytes2) < 0;
154     }
155 
156   private:
157 #ifdef USE_PINYINTABLE_DATA
158     constexpr static const size_t TABLE_SIZE = 0xff00;
159     std::string m_table[TABLE_SIZE];
160     size_t m_count = 0;
161 #else
162     constexpr static const size_t TABLE_SIZE = scx::PinTableTableSize;
163     const char* const* m_table = scx::PinYinTable;
164 #endif
165 };
166 }
167