1 /* rewrite to class without glib by Mathias Küster (2002)
2  */
3 
4 /* DCTC - a Direct Connect text clone for Linux
5  * Copyright (C) 2001 Eric Prevoteau
6  *
7  * he3.c: Copyright (C) Eric Prevoteau <www@ac2i.tzo.com>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22  */
23 
24 #include "che3.h"
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "../dcos.h"
31 #include "clist.h"
32 #include "cbytearray.h"
33 #include "cstring.h"
34 
35 /******************************************************/
36 /* patch from David Toth for                          */
37 /* File sharing list Windows (DC) compatibiliy bugfix */
38 /* (byte 4 meaning and computation).                  */
39 /******************************************************/
40 
41 /*********************************************************************/
42 /* When the file list of another user is retrieved, it is compressed */
43 /* The data format is described below.
44 byte 0: 'H'
45 byte 1: 'E'
46 byte 2: '3'
47 byte 3: 0xD
48 byte 4: 8 bit parity of the decoded data (the result of all bytes "xor"-ed)
49 byte 5 - 8 : it is a number encoded in little endian. It it the number of bytes to produce.
50 byte 9 - 10: it is a number encoded in little endian. This number gives the number of couples.
51              Let's name this number as N
52 byte 11 - (10+2*N): this is a list of couple. The first byte is a character (the one before compression),
53              the second byte gives the length of the compressed pattern. Let's name this value L.
54 byte (11+2*N) - (11+2*N+ ((sum(L)+7)&~7): a bit barbare :) To simplify. Let's name K, the sum of all L values.
55              Here, you find a string of K bits (byte aligned). The L0 first bits are the encoded value of
56              caracter of couple 0. The next Lx bits are the encoded value of caractere of couple x.
57              Note: each byte are used from the lower bit to the upper bit.
58 after this string, you have the encoded string.
59 When data are finally decoded. The format is always:
60 directory name \xD\xA
61 \x9 filename | filesize \xD\xA
62 \x9 filename | filesize \xD\xA
63 \x9 filename | filesize \xD\xA
64 \x9 filename | filesize \xD\xA
65 \x9 filename | filesize \xD\xA
66 
67 */
68 
69 /* ============================================================================================= */
70 /* ============================================================================================= */
71 /* ==================================== Decoding functions ===================================== */
72 /* ============================================================================================= */
73 /* ============================================================================================= */
74 
75 /******************************************************/
76 /*get 1 bit from the current bit position inside data */
77 /******************************************************/
get_bit(unsigned char * data,unsigned long * cur_pos)78 unsigned long CHE3::get_bit(unsigned char *data, unsigned long *cur_pos)
79 {
80 	unsigned long out;
81 
82 	out=((unsigned long)(data[(*cur_pos)/8]>>((*cur_pos)&7)))&1;
83 
84 	(*cur_pos)++;
85 
86 	return out;
87 }
88 
89 /*********************************************************/
90 /* get nb_bits from the current bit position inside data */
91 /*********************************************************/
get_bits(unsigned char * data,unsigned long * cur_pos,int nb_bit)92 unsigned long CHE3::get_bits(unsigned char *data, unsigned long *cur_pos, int nb_bit)
93 {
94 	int i;
95 	unsigned long res=0;
96 
97 	for(i=0;i<nb_bit;i++)
98 	{
99 		res=(res<<1)|get_bit(data,cur_pos);
100 	}
101 	return res;
102 }
103 
104 /**************************************************/
105 /* decompress data compressed using HE3 algorithm */
106 /**********************************************************/
107 /* input: a GByteArray containing HE3 compressed data     */
108 /* output: a GString containing uncompressed data or NULL */
109 /**********************************************************/
decode_he3_data(CByteArray * data)110 CString *CHE3::decode_he3_data(CByteArray *data)
111 {
112 	CString *output;
113 
114 	//output=g_string_new("");
115 	output = new CString();
116 
117 	if((data->Data()[0]=='H')&&(data->Data()[1]=='E')&&(data->Data()[2]=='3')&&(data->Data()[3]==0xD))
118 	{
119 		CByteArray *decode_array=NULL;
120 		int pos;
121 		int nb_couple;
122 		int max_len=0;		/* max size of encoded pattern */
123 		int ttl_len=0;		/* total size of all encoded patterns */
124 		unsigned long offset_pattern;
125 		unsigned long offset_encoded;
126 		int nb_output;
127 
128 		/* compute the number of bytes to produce */
129 		nb_output=(((int)(data->Data()[8]))&255);
130 		nb_output<<=8;
131 		nb_output|=(((int)(data->Data()[7]))&255);
132 		nb_output<<=8;
133 		nb_output|=(((int)(data->Data()[6]))&255);
134 		nb_output<<=8;
135 		nb_output|=(((int)(data->Data()[5]))&255);
136 
137 		/* compute the number of couples */
138 		nb_couple=data->Data()[9];
139 		nb_couple+=((((int)(data->Data()[10]))&255)<<8);
140 
141 		for(pos=0;pos<nb_couple;pos++)
142 		{
143 			int v;
144 
145 			v=(((int)(data->Data()[12+pos*2]))&255);
146 			if(v>max_len)
147 				max_len=v;
148 			ttl_len+=v;
149 		}
150 
151 		//decode_array=g_byte_array_new();
152 		decode_array = new CByteArray();
153 		//decode_array=g_byte_array_set_size(decode_array,1<<(max_len+1));		/* theorytically, max_len could reach up to 255 */
154 		decode_array->SetSize(1<<(max_len+1));																						/* but really, a  value above 15 is not oftenly encountered */
155 																										/* thus the algorithm needs no more than ... 64KB */
156 																								/* raisonnable value is upto 23 (requires 8MB) */
157 		{
158 //			CString *tmp;
159 
160 			//tmp=g_string_new("");
161 //			tmp = new CString();
162 //			g_string_sprintf(tmp,"he3:max_len: %d",max_len);
163 
164 //			disp_msg(INFO_MSG,"decode_he3_data",tmp->str,NULL);
165 			//g_string_free(tmp,true);
166 //			delete tmp;
167 		}
168 
169 		if(decode_array!=NULL)
170 		{
171 			/* clear the decode array */
172 			/* the decode array is technically a binary tree */
173 			/* if the depth of the tree is big (let's say more than 10), */
174 			/* storing the binary tree inside an array becomes very memory consumming */
175 			/* but I am too lazy to program all binary tree creation/addition/navigation/destruction functions :) */
176 			memset(decode_array->Data(),0,1<<(max_len+1));
177 
178 			offset_pattern=8*(11+nb_couple*2);		/* position of the pattern block, it is just after the list of couples */
179 			offset_encoded=offset_pattern + ((ttl_len+7)&~7);	/* the encoded data are just after */
180 																				/* the pattern block (rounded to upper full byte) */
181 
182 			/* decode_array is a binary tree. byte 0 is the level 0 of the tree. byte 2-3, the level 1, byte 4-7, the level 2, */
183 			/* in decode array, a N bit length pattern having the value K is its data at the position: */
184 			/* 2^N + (K&((2^N)-1)) */
185 			/* due to the fact K has always N bit length, the formula can be simplified into: */
186 			/* 2^N + K */
187 			for(pos=0;pos<nb_couple;pos++)
188 			{
189 				unsigned int v_len;
190 				unsigned long value;
191 
192 				v_len=(((int)(data->Data()[12+pos*2]))&255);	/* the number of bit required */
193 
194 				value=get_bits(data->Data(),&offset_pattern,v_len);
195 				decode_array->Data()[(1<<v_len)+ value]=data->Data()[11+pos*2];		/* the character */
196 			}
197 
198 			/* now, its time to decode */
199 			//while(output->len!=nb_output)
200 			int l=0;
201 			while(output->Length()!=nb_output)
202 			//while(l!=nb_output)
203 			{
204 				unsigned long cur_val;
205 				unsigned int nb_bit_val;
206 
207 				cur_val=get_bit(data->Data(),&offset_encoded);		/* get one bit */
208 				nb_bit_val=1;
209 
210 				while(decode_array->Data()[(1<<nb_bit_val) + cur_val ]==0)
211 				{
212 					cur_val=(cur_val<<1)|get_bit(data->Data(),&offset_encoded);
213 					nb_bit_val++;
214 				}
215 
216 				//output=g_string_append_c(output,decode_array->data[(1<<nb_bit_val) + cur_val ]);
217 				(*output) += decode_array->Data()[(1<<nb_bit_val) + cur_val ];
218 				//printf("%d %d\n",l,output->Length());
219 				l++;
220 
221 			}
222 			//g_byte_array_free(decode_array,true);
223 			delete decode_array;
224 		}
225 	}
226 
227 	{
228 		int i;
229 		unsigned char parity=0;
230 
231 		for(i=0;i<output->Length();i++)
232 			parity^=output->Data()[i];
233 
234 //		if ( data->Size()>5 )
235 //			printf("PARITY : %d %d\n",data->Data()[4],parity);
236 	}
237 
238 	return output;
239 }
240 
241 #if 0
242 int main(int argc,char **argv)
243 {
244 	CByteArray *in;
245 	CString *out;
246 	unsigned char tst[54]={0x48,0x45,0x33,0x0D,0x12,0x24,0x00,0x00,0x00,0x0B,0x00,0x09,0x04,0x0A,0x03,0x0D,0x04,0x2E,0x04,0x32,0x04,0x61,0x02,0x62,0x03,0x65,0x05,0x73,0x05,0x74,0x03,0x7C,0x04,0xA6,0xF4,0xE2,0x21,0xAE,0x01,0x61,0x71,0x29,0xFB,0xFF,0xFF,0xBE,0x75,0xA5,0x0C,0x00,0xF0,0xAD,0x2B,0x05};
247 
248 	//in=g_byte_array_new();
249 	in = new CByteArray();
250 	//in=g_byte_array_append(in,tst,54);
251 	in->Append(tst,54);
252 	out=decode_he3_data(in);
253 
254 	//g_byte_array_free(in,true);
255 	delete in;
256 
257 	printf("%s\n",out->str);
258 }
259 #endif
260 
261 /* ============================================================================================= */
262 /* ============================================================================================= */
263 /* ==================================== Encoding functions ===================================== */
264 /* ============================================================================================= */
265 /* ============================================================================================= */
266 
267 
268 //static gint huf_insert_glist(gconstpointer a, gconstpointer b)
huf_insert_glist(void * a,void * b)269 int CHE3::huf_insert_glist(void * a, void * b)
270 {
271 	if(((const HUFNODE*)a)->occur<((const HUFNODE*)b)->occur)
272 		return -1;		/* a is must be before b */
273 	else if(((const HUFNODE*)a)->occur>((const HUFNODE*)b)->occur)
274 			return 1;		/* a is must be after b */
275 
276 	/* if both have the same occurences, the one without son comes before */
277 	if( (((const HUFNODE*)a)->left==NULL) && (((const HUFNODE*)b)->left==NULL) )
278 		return -1;
279 
280 	if(((const HUFNODE*)a)->left==NULL)
281 		return -1;
282 
283 	return 1;
284 }
285 
286 /**************************************************************************/
287 /* recursively scan all nodes of the huffman tree and fill encoding table */
288 /**************************************************************************/
use_hufnode(HUFENCODE tbl_enc[256],HUFNODE * node,unsigned int bits_len,unsigned long bits)289 void CHE3::use_hufnode(HUFENCODE tbl_enc[256], HUFNODE *node,unsigned int bits_len, unsigned long bits)
290 {
291 	if(node->left!=NULL)
292 	{	/* it is a node, right is always != NULL */
293 		use_hufnode(tbl_enc,node->left,bits_len+1,(bits<<1)|0);
294 		use_hufnode(tbl_enc,node->right,bits_len+1,(bits<<1)|1);
295 	}
296 	else
297 	{	/* it is a leaf, right is always NULL */
298 		int idx=((int)node->val)&255;
299 		tbl_enc[idx].bits_len=bits_len;
300 		tbl_enc[idx].bits=bits;
301 #if 0
302 		printf("huf: bits_len: %u  pattern: %lu ",bits_len,bits);
303 		printf("leaf: %d (%c)\n",idx,node->val);
304 #endif
305 
306 //		if(bits_len>8*sizeof(unsigned long))		/* compared with the size of unsigned long in bits */
307 //			disp_msg(ERR_MSG,"user_hufnode","encoded value cannot fit into unsigned long",NULL);
308 	}
309 
310 	return;
311 }
312 
313 /*****************************************************************/
314 /* recursively free memory used by all nodes of the huffman tree */
315 /*****************************************************************/
free_hufnode(HUFNODE * node)316 void CHE3::free_hufnode(HUFNODE *node)
317 {
318 	if(node==NULL)
319 		return;
320 	if(node->left!=NULL)
321 		free_hufnode(node->left);
322 	if(node->right!=NULL)
323 		free_hufnode(node->right);
324 	free(node);
325 }
326 
327 
add_bit(CByteArray * data,unsigned long * bit_pos,unsigned char bit_value)328 CByteArray *CHE3::add_bit(CByteArray *data, unsigned long *bit_pos, unsigned char bit_value)
329 {
330 	if(((*bit_pos)&7)==0)		/* starting a new byte ? */
331 	{
332 		unsigned char v=0;
333 		//data=g_byte_array_append(data,&v,1);
334 		data->Append(&v,1);
335 	}
336 
337 	/* due to the fact we always add 0 as new byte, we don't have to set bit having 0 as value */
338 	/* but just 1 */
339 	if(bit_value!=0)
340 	{
341 		data->Data()[(*bit_pos)/8]|= (1<<((*bit_pos)&7));
342 	}
343 
344 	(*bit_pos)++;
345 
346 	return data;
347 }
348 
349 /**************************************/
350 /* append the pattern to data@bit_pos */
351 /**************************************/
add_bits(CByteArray * data,unsigned long * bit_pos,unsigned long pattern,unsigned int pattern_length)352 CByteArray *CHE3::add_bits(CByteArray *data, unsigned long *bit_pos, unsigned long pattern, unsigned int pattern_length)
353 {
354 	unsigned long i;
355 
356 	for(i=0;i<pattern_length;i++)
357 	{
358 		data=add_bit(data,bit_pos,(unsigned char)((pattern>>(pattern_length-1-i))&1) );		/* use pattern from upper to lower bit */
359 	}
360 	return data;
361 }
362 
363 #if 0
364 static void	disp_huf(GList *pre_tree)
365 {
366 	int i;
367 	HUFNODE *w;
368 	printf("---\n");
369 	//for(i=0;i<g_list_length(pre_tree);i++)
370 	for(i=0;i<pre_tree->size;i++)
371 	{
372 		w=g_list_nth_data(pre_tree,i);		/* get the first HUFNODE of the list */
373 
374 		printf("occur: %ld, left=%p, right=%p, val=%02X\n",w->occur,w->left,w->right,w->val);
375 	}
376 }
377 #endif
378 
379 /*****************************************************************************************/
380 /* compress data compressed using an Huffman algorithm and store it in HE3 usable format */
381 /*****************************************************************************************/
382 /* input: a GString containing a string to compress        */
383 /* output: a GByteArray containing compressed data or NULL */
384 /***********************************************************/
encode_he3_data(const CString * str)385 CByteArray *CHE3::encode_he3_data(const CString *str)
386 {
387 	unsigned long occur[256];
388 	HUFENCODE tbl_enc[256];
389 
390 	long i;
391 	CByteArray *data;
392 	//GList *pre_tree=NULL;
393 	CList<CString> *pre_tree=NULL;
394 	HUFNODE *root_huf=NULL;
395 	int nb_val=0;
396 	unsigned long bit_pos;
397 
398 	if((str==NULL)||(str->Length()==0))
399 		return NULL;
400 
401 	/* count the number of times each character appears */
402 	memset(occur,0,sizeof(occur));
403 
404 	//for(i=0;i<str->len;i++)
405 	for(i=0;i<str->Length();i++)
406 	{
407 		occur[((int)(str->Data()[i]))&255]++;
408 	}
409 
410 	/* now, we will build the huffman tree */
411 	pre_tree = new CList<CString>();
412 
413 	/* stage 1: create all the leafs */
414 	for(i=0;i<256;i++)
415 	{
416 		if(occur[i]!=0)
417 		{
418 			HUFNODE *nw;
419 
420 			nw=(HUFNODE*)malloc(sizeof(HUFNODE));
421 			nw->occur=occur[i];
422 			nw->left=NULL;
423 			nw->right=NULL;
424 			nw->val=(unsigned char)i;
425 
426 			//pre_tree=g_list_insert_sorted(pre_tree,nw, huf_insert_glist);
427 			pre_tree->InsertSorted((CString*)nw, huf_insert_glist);
428 
429 			nb_val++;
430 		}
431 	}
432 
433 	//while(g_list_length(pre_tree)>1)
434 	while(pre_tree->Count()>1)
435 	{
436 		HUFNODE *nw;
437 
438 		nw=(HUFNODE*)malloc(sizeof(HUFNODE));
439 
440 		nw->left=(HUFNODE*)pre_tree->Next(0);	/* get the first HUFNODE of the list */
441 		pre_tree->Remove((CString*)nw->left);	/* and remove it */
442 		nw->right=(HUFNODE*)pre_tree->Next(0);	/* get the second HUFNODE of the list */
443 		pre_tree->Remove((CString*)nw->right);	/* and remove it */
444 
445 		nw->occur=nw->left->occur+nw->right->occur;
446 		nw->val=0;
447 
448 		/* add a new node having the 2 removed nodes as son */
449 		//pre_tree=g_list_insert_sorted(pre_tree,nw, huf_insert_glist);
450 		pre_tree->InsertSorted((CString*)nw, huf_insert_glist);
451 	}
452 #if 0
453 	disp_huf(pre_tree);
454 #endif
455 	/* now, the huffman tree is done */
456 	//root_huf=g_list_nth_data(pre_tree,0);
457 	root_huf=(HUFNODE*)pre_tree->Next(0);
458 
459 	//pre_tree=g_list_remove(pre_tree,root_huf);
460 	pre_tree->Remove((CString*)root_huf);
461 
462 	memset(tbl_enc,0,sizeof(tbl_enc));
463 
464 	/* now, we will compute encoding pattern from huffman tree */
465 	use_hufnode(tbl_enc,root_huf,0,0);
466 
467 	/* well, encoding table is now done, we can encode the data */
468 	//data=g_byte_array_new();
469 	data = new CByteArray();
470 
471 	/* set the initial HE3 header */
472 	{
473 		unsigned char he3_header[]={'H','E','3',0xD,
474 											 0,					/* parity: all uncompressed bytes xor'ed */
475 											 0,0,0,0,			/* number of bytes to produce (little-endian) */
476 											 0,0};				/* number of couples */
477 
478 		/* patch from David Toth (byte 4: parity computation) */
479 		unsigned char parity=0;
480 
481 		//for(i=0;i<str->len;i++)
482 		for(i=0;i<str->Length();i++)
483 			parity^=str->Data()[i];
484 		he3_header[4]=(unsigned char)(parity&255);
485 
486 		//he3_header[5]=str->len&255;
487 		he3_header[5]=(unsigned char)(str->Length()&255);
488 		he3_header[6]=(unsigned char)((str->Length()>>8)&255);
489 		he3_header[7]=(unsigned char)((str->Length()>>16)&255);
490 		he3_header[8]=(unsigned char)((str->Length()>>24)&255);
491 		he3_header[9]=(unsigned char)(nb_val&255);
492 		he3_header[10]=(unsigned char)((nb_val>>8)&255);
493 
494 		//data=g_byte_array_append(data,he3_header,11);
495 		data->Append(he3_header,11);
496 	}
497 
498 	/* add the couple list (character, pattern length) */
499 	for(i=0;i<256;i++)
500 	{
501 		if(occur[i]!=0)
502 		{
503 			unsigned char ent[2];
504 
505 			ent[0]=(unsigned char)i;
506 			ent[1]=(unsigned char)tbl_enc[i].bits_len;
507 			//data=g_byte_array_append(data,ent,2);
508 			data->Append(ent,2);
509 		}
510 	}
511 
512 	/* and now, add all the patterns */
513 	bit_pos=data->Size()*8;
514 	for(i=0;i<256;i++)
515 	{
516 		if(occur[i]!=0)
517 		{
518 			data=add_bits(data,&bit_pos,tbl_enc[i].bits, tbl_enc[i].bits_len);
519 		}
520 	}
521 
522 	bit_pos=(bit_pos+7)&~7;		/* move to the beginning of the next byte */
523 
524 	/* now, we encode the string */
525 	for(i=0;i<str->Length();i++)
526 	{
527 		int idx=((int)str->Data()[i])&255;
528 
529 		data=add_bits(data,&bit_pos,tbl_enc[idx].bits, tbl_enc[idx].bits_len);
530 	}
531 
532 	/* free huffman tree */
533 	free_hufnode(root_huf);
534 
535 	delete pre_tree;
536 
537 	return data;
538 }
539