1 #include "stdafx.h"
2 #include "Util/EncodingTable.h"
3 #include "Util/Util.h"
4 #include "Core/Common.h"
5
6 #define MAXHEXLENGTH 32
7
Trie()8 Trie::Trie()
9 {
10 Node root { 0, false };
11 nodes.push_back(root);
12 }
13
insert(const wchar_t * text,size_t value)14 void Trie::insert(const wchar_t* text, size_t value)
15 {
16 size_t node = 0; // root node
17
18 // traverse existing nodes
19 while (*text != 0)
20 {
21 LookupEntry lookupEntry { node, *text };
22 auto it = lookup.find(lookupEntry);
23 if (it == lookup.end())
24 break;
25
26 node = it->second;
27 text++;
28 }
29
30 // add new nodes as necessary
31 while (*text != 0)
32 {
33 Node newNode { nodes.size(), false };
34 nodes.push_back(newNode);
35
36 LookupEntry lookupEntry { node, *text };
37 lookup[lookupEntry] = newNode.index;
38 node = newNode.index;
39 text++;
40 }
41
42 // set value
43 nodes[node].hasValue = true;
44 nodes[node].value = value;
45 }
46
insert(wchar_t character,size_t value)47 void Trie::insert(wchar_t character, size_t value)
48 {
49 wchar_t str[2];
50 str[0] = character;
51 str[1] = 0;
52 insert(str,value);
53 }
54
findLongestPrefix(const wchar_t * text,size_t & result)55 bool Trie::findLongestPrefix(const wchar_t* text, size_t& result)
56 {
57 size_t node = 0; // root node
58 size_t valueNode = 0; // remember last node that had a value
59
60 while (*text != 0)
61 {
62 if (nodes[node].hasValue)
63 valueNode = node;
64
65 LookupEntry lookupEntry { node, *text++ };
66 auto it = lookup.find(lookupEntry);
67
68 if (it == lookup.end())
69 break;
70
71 node = it->second;
72 }
73
74 if (nodes[node].hasValue)
75 valueNode = node;
76
77 result = nodes[valueNode].value;
78 return nodes[valueNode].hasValue;
79 }
80
EncodingTable()81 EncodingTable::EncodingTable()
82 {
83
84 }
85
~EncodingTable()86 EncodingTable::~EncodingTable()
87 {
88
89 }
90
clear()91 void EncodingTable::clear()
92 {
93 hexData.clear();
94 entries.clear();
95 }
96
parseHexString(std::wstring & hex,unsigned char * dest)97 int parseHexString(std::wstring& hex, unsigned char* dest)
98 {
99 for (size_t i = 0; i < hex.size(); i++)
100 {
101 wchar_t source = towlower(hex[i]);
102 int value;
103
104 if (source >= 'a' && source <= 'f')
105 {
106 value = source-'a'+10;
107 } else if (source >= '0' && source <= '9')
108 {
109 value = source-'0';
110 } else {
111 return -1;
112 }
113
114 size_t index = i/2;
115 if (i % 2)
116 dest[index] = (dest[index] << 4) | value;
117 else
118 dest[index] = value;
119 }
120
121 return (int) hex.size()/2;
122 }
123
load(const std::wstring & fileName,TextFile::Encoding encoding)124 bool EncodingTable::load(const std::wstring& fileName, TextFile::Encoding encoding)
125 {
126 unsigned char hexBuffer[MAXHEXLENGTH];
127
128 TextFile input;
129 if (input.open(fileName,TextFile::Read,encoding) == false)
130 return false;
131
132 hexData.clear();
133 entries.clear();
134 setTerminationEntry((unsigned char*)"\0",1);
135
136 while (!input.atEnd())
137 {
138 std::wstring line = input.readLine();
139 if (line.empty() || line[0] == '*') continue;
140
141 if (line[0] == '/')
142 {
143 std::wstring hex = line.substr(1);
144 if (hex.empty() || hex.length() > 2*MAXHEXLENGTH)
145 {
146 // error
147 continue;
148 }
149
150 int length = parseHexString(hex,hexBuffer);
151 if (length == -1)
152 {
153 // error
154 continue;
155 }
156
157 setTerminationEntry(hexBuffer,length);
158 } else {
159 size_t pos = line.find(L'=');
160 std::wstring hex = line.substr(0,pos);
161 std::wstring value = line.substr(pos+1);
162
163 if (hex.empty() || value.empty() || hex.length() > 2*MAXHEXLENGTH)
164 {
165 // error
166 continue;
167 }
168
169 int length = parseHexString(hex,hexBuffer);
170 if (length == -1)
171 {
172 // error
173 continue;
174 }
175
176 addEntry(hexBuffer,length,value);
177 }
178 }
179
180 return true;
181 }
182
addEntry(unsigned char * hex,size_t hexLength,const std::wstring & value)183 void EncodingTable::addEntry(unsigned char* hex, size_t hexLength, const std::wstring& value)
184 {
185 if (value.size() == 0)
186 return;
187
188 // insert into trie
189 size_t index = entries.size();
190 lookup.insert(value.c_str(),index);
191
192 // add entry
193 TableEntry entry;
194 entry.hexPos = hexData.append(hex,hexLength);
195 entry.hexLen = hexLength;
196 entry.valueLen = value.size();
197
198 entries.push_back(entry);
199 }
200
addEntry(unsigned char * hex,size_t hexLength,wchar_t value)201 void EncodingTable::addEntry(unsigned char* hex, size_t hexLength, wchar_t value)
202 {
203 if (value == '\0')
204 return;
205
206 // insert into trie
207 size_t index = entries.size();
208 lookup.insert(value,index);
209
210 // add entry
211 TableEntry entry;
212 entry.hexPos = hexData.append(hex,hexLength);
213 entry.hexLen = hexLength;
214 entry.valueLen = 1;
215
216 entries.push_back(entry);
217
218 }
219
setTerminationEntry(unsigned char * hex,size_t hexLength)220 void EncodingTable::setTerminationEntry(unsigned char* hex, size_t hexLength)
221 {
222 terminationEntry.hexPos = hexData.append(hex,hexLength);
223 terminationEntry.hexLen = hexLength;
224 terminationEntry.valueLen = 0;
225 }
226
encodeString(const std::wstring & str,bool writeTermination)227 ByteArray EncodingTable::encodeString(const std::wstring& str, bool writeTermination)
228 {
229 ByteArray result;
230
231 size_t pos = 0;
232 while (pos < str.size())
233 {
234 size_t index;
235 if (lookup.findLongestPrefix(str.c_str()+pos,index) == false)
236 {
237 // error
238 return ByteArray();
239 }
240
241 TableEntry& entry = entries[index];
242 for (size_t i = 0; i < entry.hexLen; i++)
243 {
244 result.appendByte(hexData[entry.hexPos+i]);
245 }
246
247 pos += entry.valueLen;
248 }
249
250 if (writeTermination)
251 {
252 TableEntry& entry = terminationEntry;
253 for (size_t i = 0; i < entry.hexLen; i++)
254 {
255 result.appendByte(hexData[entry.hexPos+i]);
256 }
257 }
258
259 return result;
260 }
261
encodeTermination()262 ByteArray EncodingTable::encodeTermination()
263 {
264 ByteArray result;
265
266 TableEntry& entry = terminationEntry;
267 for (size_t i = 0; i < entry.hexLen; i++)
268 {
269 result.appendByte(hexData[entry.hexPos+i]);
270 }
271
272 return result;
273 }
274