1 #include "stdafx.h"
2 #include "Util/EncodingTable.h"
3 #include "Util/Util.h"
4 #include "Core/Common.h"
5 
6 #define MAXHEXLENGTH 32
7 
Trie()8 Trie::Trie()
9 {
10 	Node root { 0, false };
11 	nodes.push_back(root);
12 }
13 
insert(const wchar_t * text,size_t value)14 void Trie::insert(const wchar_t* text, size_t value)
15 {
16 	size_t node = 0;	// root node
17 
18 	// traverse existing nodes
19 	while (*text != 0)
20 	{
21 		LookupEntry lookupEntry { node, *text };
22 		auto it = lookup.find(lookupEntry);
23 		if (it == lookup.end())
24 			break;
25 
26 		node = it->second;
27 		text++;
28 	}
29 
30 	// add new nodes as necessary
31 	while (*text != 0)
32 	{
33 		Node newNode { nodes.size(), false };
34 		nodes.push_back(newNode);
35 
36 		LookupEntry lookupEntry { node, *text };
37 		lookup[lookupEntry] = newNode.index;
38 		node = newNode.index;
39 		text++;
40 	}
41 
42 	// set value
43 	nodes[node].hasValue = true;
44 	nodes[node].value = value;
45 }
46 
insert(wchar_t character,size_t value)47 void Trie::insert(wchar_t character, size_t value)
48 {
49 	wchar_t str[2];
50 	str[0] = character;
51 	str[1] = 0;
52 	insert(str,value);
53 }
54 
findLongestPrefix(const wchar_t * text,size_t & result)55 bool Trie::findLongestPrefix(const wchar_t* text, size_t& result)
56 {
57 	size_t node = 0;		// root node
58 	size_t valueNode = 0;	// remember last node that had a value
59 
60 	while (*text != 0)
61 	{
62 		if (nodes[node].hasValue)
63 			valueNode = node;
64 
65 		LookupEntry lookupEntry { node, *text++ };
66 		auto it = lookup.find(lookupEntry);
67 
68 		if (it == lookup.end())
69 			break;
70 
71 		node = it->second;
72 	}
73 
74 	if (nodes[node].hasValue)
75 		valueNode = node;
76 
77 	result = nodes[valueNode].value;
78 	return nodes[valueNode].hasValue;
79 }
80 
EncodingTable()81 EncodingTable::EncodingTable()
82 {
83 
84 }
85 
~EncodingTable()86 EncodingTable::~EncodingTable()
87 {
88 
89 }
90 
clear()91 void EncodingTable::clear()
92 {
93 	hexData.clear();
94 	entries.clear();
95 }
96 
parseHexString(std::wstring & hex,unsigned char * dest)97 int parseHexString(std::wstring& hex, unsigned char* dest)
98 {
99 	for (size_t i = 0; i < hex.size(); i++)
100 	{
101 		wchar_t source = towlower(hex[i]);
102 		int value;
103 
104 		if (source >= 'a' && source <= 'f')
105 		{
106 			value = source-'a'+10;
107 		} else if (source >= '0' && source <= '9')
108 		{
109 			value = source-'0';
110 		} else {
111 			return -1;
112 		}
113 
114 		size_t index = i/2;
115 		if (i % 2)
116 			dest[index] = (dest[index] << 4) | value;
117 		else
118 			dest[index] = value;
119 	}
120 
121 	return (int) hex.size()/2;
122 }
123 
load(const std::wstring & fileName,TextFile::Encoding encoding)124 bool EncodingTable::load(const std::wstring& fileName, TextFile::Encoding encoding)
125 {
126 	unsigned char hexBuffer[MAXHEXLENGTH];
127 
128 	TextFile input;
129 	if (input.open(fileName,TextFile::Read,encoding) == false)
130 		return false;
131 
132 	hexData.clear();
133 	entries.clear();
134 	setTerminationEntry((unsigned char*)"\0",1);
135 
136 	while (!input.atEnd())
137 	{
138 		std::wstring line = input.readLine();
139 		if (line.empty() || line[0] == '*') continue;
140 
141 		if (line[0] == '/')
142 		{
143 			std::wstring hex = line.substr(1);
144 			if (hex.empty() || hex.length() > 2*MAXHEXLENGTH)
145 			{
146 				// error
147 				continue;
148 			}
149 
150 			int length = parseHexString(hex,hexBuffer);
151 			if (length == -1)
152 			{
153 				// error
154 				continue;
155 			}
156 
157 			setTerminationEntry(hexBuffer,length);
158 		} else {
159 			size_t pos = line.find(L'=');
160 			std::wstring hex = line.substr(0,pos);
161 			std::wstring value = line.substr(pos+1);
162 
163 			if (hex.empty() || value.empty() || hex.length() > 2*MAXHEXLENGTH)
164 			{
165 				// error
166 				continue;
167 			}
168 
169 			int length = parseHexString(hex,hexBuffer);
170 			if (length == -1)
171 			{
172 				// error
173 				continue;
174 			}
175 
176 			addEntry(hexBuffer,length,value);
177 		}
178 	}
179 
180 	return true;
181 }
182 
addEntry(unsigned char * hex,size_t hexLength,const std::wstring & value)183 void EncodingTable::addEntry(unsigned char* hex, size_t hexLength, const std::wstring& value)
184 {
185 	if (value.size() == 0)
186 		return;
187 
188 	// insert into trie
189 	size_t index = entries.size();
190 	lookup.insert(value.c_str(),index);
191 
192 	// add entry
193 	TableEntry entry;
194 	entry.hexPos = hexData.append(hex,hexLength);
195 	entry.hexLen = hexLength;
196 	entry.valueLen = value.size();
197 
198 	entries.push_back(entry);
199 }
200 
addEntry(unsigned char * hex,size_t hexLength,wchar_t value)201 void EncodingTable::addEntry(unsigned char* hex, size_t hexLength, wchar_t value)
202 {
203 	if (value == '\0')
204 		return;
205 
206 	// insert into trie
207 	size_t index = entries.size();
208 	lookup.insert(value,index);
209 
210 	// add entry
211 	TableEntry entry;
212 	entry.hexPos = hexData.append(hex,hexLength);
213 	entry.hexLen = hexLength;
214 	entry.valueLen = 1;
215 
216 	entries.push_back(entry);
217 
218 }
219 
setTerminationEntry(unsigned char * hex,size_t hexLength)220 void EncodingTable::setTerminationEntry(unsigned char* hex, size_t hexLength)
221 {
222 	terminationEntry.hexPos = hexData.append(hex,hexLength);
223 	terminationEntry.hexLen = hexLength;
224 	terminationEntry.valueLen = 0;
225 }
226 
encodeString(const std::wstring & str,bool writeTermination)227 ByteArray EncodingTable::encodeString(const std::wstring& str, bool writeTermination)
228 {
229 	ByteArray result;
230 
231 	size_t pos = 0;
232 	while (pos < str.size())
233 	{
234 		size_t index;
235 		if (lookup.findLongestPrefix(str.c_str()+pos,index) == false)
236 		{
237 			// error
238 			return ByteArray();
239 		}
240 
241 		TableEntry& entry = entries[index];
242 		for (size_t i = 0; i < entry.hexLen; i++)
243 		{
244 			result.appendByte(hexData[entry.hexPos+i]);
245 		}
246 
247 		pos += entry.valueLen;
248 	}
249 
250 	if (writeTermination)
251 	{
252 		TableEntry& entry = terminationEntry;
253 		for (size_t i = 0; i < entry.hexLen; i++)
254 		{
255 			result.appendByte(hexData[entry.hexPos+i]);
256 		}
257 	}
258 
259 	return result;
260 }
261 
encodeTermination()262 ByteArray EncodingTable::encodeTermination()
263 {
264 	ByteArray result;
265 
266 	TableEntry& entry = terminationEntry;
267 	for (size_t i = 0; i < entry.hexLen; i++)
268 	{
269 		result.appendByte(hexData[entry.hexPos+i]);
270 	}
271 
272 	return result;
273 }
274