1 /* rewrite to class without glib by Mathias Küster (2002)
2 */
3
4 /* DCTC - a Direct Connect text clone for Linux
5 * Copyright (C) 2001 Eric Prevoteau
6 *
7 * he3.c: Copyright (C) Eric Prevoteau <www@ac2i.tzo.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 */
23
24 #include "che3.h"
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "../dcos.h"
31 #include "clist.h"
32 #include "cbytearray.h"
33 #include "cstring.h"
34
35 /******************************************************/
36 /* patch from David Toth for */
37 /* File sharing list Windows (DC) compatibiliy bugfix */
38 /* (byte 4 meaning and computation). */
39 /******************************************************/
40
41 /*********************************************************************/
42 /* When the file list of another user is retrieved, it is compressed */
43 /* The data format is described below.
44 byte 0: 'H'
45 byte 1: 'E'
46 byte 2: '3'
47 byte 3: 0xD
48 byte 4: 8 bit parity of the decoded data (the result of all bytes "xor"-ed)
49 byte 5 - 8 : it is a number encoded in little endian. It it the number of bytes to produce.
50 byte 9 - 10: it is a number encoded in little endian. This number gives the number of couples.
51 Let's name this number as N
52 byte 11 - (10+2*N): this is a list of couple. The first byte is a character (the one before compression),
53 the second byte gives the length of the compressed pattern. Let's name this value L.
54 byte (11+2*N) - (11+2*N+ ((sum(L)+7)&~7): a bit barbare :) To simplify. Let's name K, the sum of all L values.
55 Here, you find a string of K bits (byte aligned). The L0 first bits are the encoded value of
56 caracter of couple 0. The next Lx bits are the encoded value of caractere of couple x.
57 Note: each byte are used from the lower bit to the upper bit.
58 after this string, you have the encoded string.
59 When data are finally decoded. The format is always:
60 directory name \xD\xA
61 \x9 filename | filesize \xD\xA
62 \x9 filename | filesize \xD\xA
63 \x9 filename | filesize \xD\xA
64 \x9 filename | filesize \xD\xA
65 \x9 filename | filesize \xD\xA
66
67 */
68
69 /* ============================================================================================= */
70 /* ============================================================================================= */
71 /* ==================================== Decoding functions ===================================== */
72 /* ============================================================================================= */
73 /* ============================================================================================= */
74
75 /******************************************************/
76 /*get 1 bit from the current bit position inside data */
77 /******************************************************/
get_bit(unsigned char * data,unsigned long * cur_pos)78 unsigned long CHE3::get_bit(unsigned char *data, unsigned long *cur_pos)
79 {
80 unsigned long out;
81
82 out=((unsigned long)(data[(*cur_pos)/8]>>((*cur_pos)&7)))&1;
83
84 (*cur_pos)++;
85
86 return out;
87 }
88
89 /*********************************************************/
90 /* get nb_bits from the current bit position inside data */
91 /*********************************************************/
get_bits(unsigned char * data,unsigned long * cur_pos,int nb_bit)92 unsigned long CHE3::get_bits(unsigned char *data, unsigned long *cur_pos, int nb_bit)
93 {
94 int i;
95 unsigned long res=0;
96
97 for(i=0;i<nb_bit;i++)
98 {
99 res=(res<<1)|get_bit(data,cur_pos);
100 }
101 return res;
102 }
103
104 /**************************************************/
105 /* decompress data compressed using HE3 algorithm */
106 /**********************************************************/
107 /* input: a GByteArray containing HE3 compressed data */
108 /* output: a GString containing uncompressed data or NULL */
109 /**********************************************************/
decode_he3_data(CByteArray * data)110 CString *CHE3::decode_he3_data(CByteArray *data)
111 {
112 CString *output;
113
114 //output=g_string_new("");
115 output = new CString();
116
117 if((data->Data()[0]=='H')&&(data->Data()[1]=='E')&&(data->Data()[2]=='3')&&(data->Data()[3]==0xD))
118 {
119 CByteArray *decode_array=NULL;
120 int pos;
121 int nb_couple;
122 int max_len=0; /* max size of encoded pattern */
123 int ttl_len=0; /* total size of all encoded patterns */
124 unsigned long offset_pattern;
125 unsigned long offset_encoded;
126 int nb_output;
127
128 /* compute the number of bytes to produce */
129 nb_output=(((int)(data->Data()[8]))&255);
130 nb_output<<=8;
131 nb_output|=(((int)(data->Data()[7]))&255);
132 nb_output<<=8;
133 nb_output|=(((int)(data->Data()[6]))&255);
134 nb_output<<=8;
135 nb_output|=(((int)(data->Data()[5]))&255);
136
137 /* compute the number of couples */
138 nb_couple=data->Data()[9];
139 nb_couple+=((((int)(data->Data()[10]))&255)<<8);
140
141 for(pos=0;pos<nb_couple;pos++)
142 {
143 int v;
144
145 v=(((int)(data->Data()[12+pos*2]))&255);
146 if(v>max_len)
147 max_len=v;
148 ttl_len+=v;
149 }
150
151 //decode_array=g_byte_array_new();
152 decode_array = new CByteArray();
153 //decode_array=g_byte_array_set_size(decode_array,1<<(max_len+1)); /* theorytically, max_len could reach up to 255 */
154 decode_array->SetSize(1<<(max_len+1)); /* but really, a value above 15 is not oftenly encountered */
155 /* thus the algorithm needs no more than ... 64KB */
156 /* raisonnable value is upto 23 (requires 8MB) */
157 {
158 // CString *tmp;
159
160 //tmp=g_string_new("");
161 // tmp = new CString();
162 // g_string_sprintf(tmp,"he3:max_len: %d",max_len);
163
164 // disp_msg(INFO_MSG,"decode_he3_data",tmp->str,NULL);
165 //g_string_free(tmp,true);
166 // delete tmp;
167 }
168
169 if(decode_array!=NULL)
170 {
171 /* clear the decode array */
172 /* the decode array is technically a binary tree */
173 /* if the depth of the tree is big (let's say more than 10), */
174 /* storing the binary tree inside an array becomes very memory consumming */
175 /* but I am too lazy to program all binary tree creation/addition/navigation/destruction functions :) */
176 memset(decode_array->Data(),0,1<<(max_len+1));
177
178 offset_pattern=8*(11+nb_couple*2); /* position of the pattern block, it is just after the list of couples */
179 offset_encoded=offset_pattern + ((ttl_len+7)&~7); /* the encoded data are just after */
180 /* the pattern block (rounded to upper full byte) */
181
182 /* decode_array is a binary tree. byte 0 is the level 0 of the tree. byte 2-3, the level 1, byte 4-7, the level 2, */
183 /* in decode array, a N bit length pattern having the value K is its data at the position: */
184 /* 2^N + (K&((2^N)-1)) */
185 /* due to the fact K has always N bit length, the formula can be simplified into: */
186 /* 2^N + K */
187 for(pos=0;pos<nb_couple;pos++)
188 {
189 unsigned int v_len;
190 unsigned long value;
191
192 v_len=(((int)(data->Data()[12+pos*2]))&255); /* the number of bit required */
193
194 value=get_bits(data->Data(),&offset_pattern,v_len);
195 decode_array->Data()[(1<<v_len)+ value]=data->Data()[11+pos*2]; /* the character */
196 }
197
198 /* now, its time to decode */
199 //while(output->len!=nb_output)
200 int l=0;
201 while(output->Length()!=nb_output)
202 //while(l!=nb_output)
203 {
204 unsigned long cur_val;
205 unsigned int nb_bit_val;
206
207 cur_val=get_bit(data->Data(),&offset_encoded); /* get one bit */
208 nb_bit_val=1;
209
210 while(decode_array->Data()[(1<<nb_bit_val) + cur_val ]==0)
211 {
212 cur_val=(cur_val<<1)|get_bit(data->Data(),&offset_encoded);
213 nb_bit_val++;
214 }
215
216 //output=g_string_append_c(output,decode_array->data[(1<<nb_bit_val) + cur_val ]);
217 (*output) += decode_array->Data()[(1<<nb_bit_val) + cur_val ];
218 //printf("%d %d\n",l,output->Length());
219 l++;
220
221 }
222 //g_byte_array_free(decode_array,true);
223 delete decode_array;
224 }
225 }
226
227 {
228 int i;
229 unsigned char parity=0;
230
231 for(i=0;i<output->Length();i++)
232 parity^=output->Data()[i];
233
234 // if ( data->Size()>5 )
235 // printf("PARITY : %d %d\n",data->Data()[4],parity);
236 }
237
238 return output;
239 }
240
241 #if 0
242 int main(int argc,char **argv)
243 {
244 CByteArray *in;
245 CString *out;
246 unsigned char tst[54]={0x48,0x45,0x33,0x0D,0x12,0x24,0x00,0x00,0x00,0x0B,0x00,0x09,0x04,0x0A,0x03,0x0D,0x04,0x2E,0x04,0x32,0x04,0x61,0x02,0x62,0x03,0x65,0x05,0x73,0x05,0x74,0x03,0x7C,0x04,0xA6,0xF4,0xE2,0x21,0xAE,0x01,0x61,0x71,0x29,0xFB,0xFF,0xFF,0xBE,0x75,0xA5,0x0C,0x00,0xF0,0xAD,0x2B,0x05};
247
248 //in=g_byte_array_new();
249 in = new CByteArray();
250 //in=g_byte_array_append(in,tst,54);
251 in->Append(tst,54);
252 out=decode_he3_data(in);
253
254 //g_byte_array_free(in,true);
255 delete in;
256
257 printf("%s\n",out->str);
258 }
259 #endif
260
261 /* ============================================================================================= */
262 /* ============================================================================================= */
263 /* ==================================== Encoding functions ===================================== */
264 /* ============================================================================================= */
265 /* ============================================================================================= */
266
267
268 //static gint huf_insert_glist(gconstpointer a, gconstpointer b)
huf_insert_glist(void * a,void * b)269 int CHE3::huf_insert_glist(void * a, void * b)
270 {
271 if(((const HUFNODE*)a)->occur<((const HUFNODE*)b)->occur)
272 return -1; /* a is must be before b */
273 else if(((const HUFNODE*)a)->occur>((const HUFNODE*)b)->occur)
274 return 1; /* a is must be after b */
275
276 /* if both have the same occurences, the one without son comes before */
277 if( (((const HUFNODE*)a)->left==NULL) && (((const HUFNODE*)b)->left==NULL) )
278 return -1;
279
280 if(((const HUFNODE*)a)->left==NULL)
281 return -1;
282
283 return 1;
284 }
285
286 /**************************************************************************/
287 /* recursively scan all nodes of the huffman tree and fill encoding table */
288 /**************************************************************************/
use_hufnode(HUFENCODE tbl_enc[256],HUFNODE * node,unsigned int bits_len,unsigned long bits)289 void CHE3::use_hufnode(HUFENCODE tbl_enc[256], HUFNODE *node,unsigned int bits_len, unsigned long bits)
290 {
291 if(node->left!=NULL)
292 { /* it is a node, right is always != NULL */
293 use_hufnode(tbl_enc,node->left,bits_len+1,(bits<<1)|0);
294 use_hufnode(tbl_enc,node->right,bits_len+1,(bits<<1)|1);
295 }
296 else
297 { /* it is a leaf, right is always NULL */
298 int idx=((int)node->val)&255;
299 tbl_enc[idx].bits_len=bits_len;
300 tbl_enc[idx].bits=bits;
301 #if 0
302 printf("huf: bits_len: %u pattern: %lu ",bits_len,bits);
303 printf("leaf: %d (%c)\n",idx,node->val);
304 #endif
305
306 // if(bits_len>8*sizeof(unsigned long)) /* compared with the size of unsigned long in bits */
307 // disp_msg(ERR_MSG,"user_hufnode","encoded value cannot fit into unsigned long",NULL);
308 }
309
310 return;
311 }
312
313 /*****************************************************************/
314 /* recursively free memory used by all nodes of the huffman tree */
315 /*****************************************************************/
free_hufnode(HUFNODE * node)316 void CHE3::free_hufnode(HUFNODE *node)
317 {
318 if(node==NULL)
319 return;
320 if(node->left!=NULL)
321 free_hufnode(node->left);
322 if(node->right!=NULL)
323 free_hufnode(node->right);
324 free(node);
325 }
326
327
add_bit(CByteArray * data,unsigned long * bit_pos,unsigned char bit_value)328 CByteArray *CHE3::add_bit(CByteArray *data, unsigned long *bit_pos, unsigned char bit_value)
329 {
330 if(((*bit_pos)&7)==0) /* starting a new byte ? */
331 {
332 unsigned char v=0;
333 //data=g_byte_array_append(data,&v,1);
334 data->Append(&v,1);
335 }
336
337 /* due to the fact we always add 0 as new byte, we don't have to set bit having 0 as value */
338 /* but just 1 */
339 if(bit_value!=0)
340 {
341 data->Data()[(*bit_pos)/8]|= (1<<((*bit_pos)&7));
342 }
343
344 (*bit_pos)++;
345
346 return data;
347 }
348
349 /**************************************/
350 /* append the pattern to data@bit_pos */
351 /**************************************/
add_bits(CByteArray * data,unsigned long * bit_pos,unsigned long pattern,unsigned int pattern_length)352 CByteArray *CHE3::add_bits(CByteArray *data, unsigned long *bit_pos, unsigned long pattern, unsigned int pattern_length)
353 {
354 unsigned long i;
355
356 for(i=0;i<pattern_length;i++)
357 {
358 data=add_bit(data,bit_pos,(unsigned char)((pattern>>(pattern_length-1-i))&1) ); /* use pattern from upper to lower bit */
359 }
360 return data;
361 }
362
363 #if 0
364 static void disp_huf(GList *pre_tree)
365 {
366 int i;
367 HUFNODE *w;
368 printf("---\n");
369 //for(i=0;i<g_list_length(pre_tree);i++)
370 for(i=0;i<pre_tree->size;i++)
371 {
372 w=g_list_nth_data(pre_tree,i); /* get the first HUFNODE of the list */
373
374 printf("occur: %ld, left=%p, right=%p, val=%02X\n",w->occur,w->left,w->right,w->val);
375 }
376 }
377 #endif
378
379 /*****************************************************************************************/
380 /* compress data compressed using an Huffman algorithm and store it in HE3 usable format */
381 /*****************************************************************************************/
382 /* input: a GString containing a string to compress */
383 /* output: a GByteArray containing compressed data or NULL */
384 /***********************************************************/
encode_he3_data(const CString * str)385 CByteArray *CHE3::encode_he3_data(const CString *str)
386 {
387 unsigned long occur[256];
388 HUFENCODE tbl_enc[256];
389
390 long i;
391 CByteArray *data;
392 //GList *pre_tree=NULL;
393 CList<CString> *pre_tree=NULL;
394 HUFNODE *root_huf=NULL;
395 int nb_val=0;
396 unsigned long bit_pos;
397
398 if((str==NULL)||(str->Length()==0))
399 return NULL;
400
401 /* count the number of times each character appears */
402 memset(occur,0,sizeof(occur));
403
404 //for(i=0;i<str->len;i++)
405 for(i=0;i<str->Length();i++)
406 {
407 occur[((int)(str->Data()[i]))&255]++;
408 }
409
410 /* now, we will build the huffman tree */
411 pre_tree = new CList<CString>();
412
413 /* stage 1: create all the leafs */
414 for(i=0;i<256;i++)
415 {
416 if(occur[i]!=0)
417 {
418 HUFNODE *nw;
419
420 nw=(HUFNODE*)malloc(sizeof(HUFNODE));
421 nw->occur=occur[i];
422 nw->left=NULL;
423 nw->right=NULL;
424 nw->val=(unsigned char)i;
425
426 //pre_tree=g_list_insert_sorted(pre_tree,nw, huf_insert_glist);
427 pre_tree->InsertSorted((CString*)nw, huf_insert_glist);
428
429 nb_val++;
430 }
431 }
432
433 //while(g_list_length(pre_tree)>1)
434 while(pre_tree->Count()>1)
435 {
436 HUFNODE *nw;
437
438 nw=(HUFNODE*)malloc(sizeof(HUFNODE));
439
440 nw->left=(HUFNODE*)pre_tree->Next(0); /* get the first HUFNODE of the list */
441 pre_tree->Remove((CString*)nw->left); /* and remove it */
442 nw->right=(HUFNODE*)pre_tree->Next(0); /* get the second HUFNODE of the list */
443 pre_tree->Remove((CString*)nw->right); /* and remove it */
444
445 nw->occur=nw->left->occur+nw->right->occur;
446 nw->val=0;
447
448 /* add a new node having the 2 removed nodes as son */
449 //pre_tree=g_list_insert_sorted(pre_tree,nw, huf_insert_glist);
450 pre_tree->InsertSorted((CString*)nw, huf_insert_glist);
451 }
452 #if 0
453 disp_huf(pre_tree);
454 #endif
455 /* now, the huffman tree is done */
456 //root_huf=g_list_nth_data(pre_tree,0);
457 root_huf=(HUFNODE*)pre_tree->Next(0);
458
459 //pre_tree=g_list_remove(pre_tree,root_huf);
460 pre_tree->Remove((CString*)root_huf);
461
462 memset(tbl_enc,0,sizeof(tbl_enc));
463
464 /* now, we will compute encoding pattern from huffman tree */
465 use_hufnode(tbl_enc,root_huf,0,0);
466
467 /* well, encoding table is now done, we can encode the data */
468 //data=g_byte_array_new();
469 data = new CByteArray();
470
471 /* set the initial HE3 header */
472 {
473 unsigned char he3_header[]={'H','E','3',0xD,
474 0, /* parity: all uncompressed bytes xor'ed */
475 0,0,0,0, /* number of bytes to produce (little-endian) */
476 0,0}; /* number of couples */
477
478 /* patch from David Toth (byte 4: parity computation) */
479 unsigned char parity=0;
480
481 //for(i=0;i<str->len;i++)
482 for(i=0;i<str->Length();i++)
483 parity^=str->Data()[i];
484 he3_header[4]=(unsigned char)(parity&255);
485
486 //he3_header[5]=str->len&255;
487 he3_header[5]=(unsigned char)(str->Length()&255);
488 he3_header[6]=(unsigned char)((str->Length()>>8)&255);
489 he3_header[7]=(unsigned char)((str->Length()>>16)&255);
490 he3_header[8]=(unsigned char)((str->Length()>>24)&255);
491 he3_header[9]=(unsigned char)(nb_val&255);
492 he3_header[10]=(unsigned char)((nb_val>>8)&255);
493
494 //data=g_byte_array_append(data,he3_header,11);
495 data->Append(he3_header,11);
496 }
497
498 /* add the couple list (character, pattern length) */
499 for(i=0;i<256;i++)
500 {
501 if(occur[i]!=0)
502 {
503 unsigned char ent[2];
504
505 ent[0]=(unsigned char)i;
506 ent[1]=(unsigned char)tbl_enc[i].bits_len;
507 //data=g_byte_array_append(data,ent,2);
508 data->Append(ent,2);
509 }
510 }
511
512 /* and now, add all the patterns */
513 bit_pos=data->Size()*8;
514 for(i=0;i<256;i++)
515 {
516 if(occur[i]!=0)
517 {
518 data=add_bits(data,&bit_pos,tbl_enc[i].bits, tbl_enc[i].bits_len);
519 }
520 }
521
522 bit_pos=(bit_pos+7)&~7; /* move to the beginning of the next byte */
523
524 /* now, we encode the string */
525 for(i=0;i<str->Length();i++)
526 {
527 int idx=((int)str->Data()[i])&255;
528
529 data=add_bits(data,&bit_pos,tbl_enc[idx].bits, tbl_enc[idx].bits_len);
530 }
531
532 /* free huffman tree */
533 free_hufnode(root_huf);
534
535 delete pre_tree;
536
537 return data;
538 }
539