1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
3  *
4  *  The Contents of this file are made available subject to the terms of
5  *  either of the following licenses
6  *
7  *         - GNU Lesser General Public License Version 2.1
8  *         - Sun Industry Standards Source License Version 1.1
9  *
10  *  Sun Microsystems Inc., October, 2000
11  *
12  *  GNU Lesser General Public License Version 2.1
13  *  =============================================
14  *  Copyright 2000 by Sun Microsystems, Inc.
15  *  901 San Antonio Road, Palo Alto, CA 94303, USA
16  *
17  *  This library is free software; you can redistribute it and/or
18  *  modify it under the terms of the GNU Lesser General Public
19  *  License version 2.1, as published by the Free Software Foundation.
20  *
21  *  This library is distributed in the hope that it will be useful,
22  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  *  Lesser General Public License for more details.
25  *
26  *  You should have received a copy of the GNU Lesser General Public
27  *  License along with this library; if not, write to the Free Software
28  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
29  *  MA  02111-1307  USA
30  *
31  *
32  *  Sun Industry Standards Source License Version 1.1
33  *  =================================================
34  *  The contents of this file are subject to the Sun Industry Standards
35  *  Source License Version 1.1 (the "License"); You may not use this file
36  *  except in compliance with the License. You may obtain a copy of the
37  *  License at http://www.openoffice.org/license.html.
38  *
39  *  Software provided under this License is provided on an "AS IS" basis,
40  *  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
41  *  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
42  *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
43  *  See the License for the specific provisions governing your rights and
44  *  obligations concerning the Software.
45  *
46  *  The Initial Developer of the Original Code is: IBM Corporation
47  *
48  *  Copyright: 2008 by IBM Corporation
49  *
50  *  All Rights Reserved.
51  *
52  *  Contributor(s): _______________________________________
53  *
54  *
55  ************************************************************************/
56 
57 #include "explode.hxx"
58 #include <tools/stream.hxx>
59 
60 #include <algorithm>
61 #include <assert.h>
62 #include <math.h>
63 
64     const char Tree1String[][32] = {
65         "101",
66         "11",
67            "100",
68         "011",
69         "0101",
70         "0100",
71         "0011",
72         "00101",
73         "00100",
74         "00011",
75         "00010",
76         "000011",
77         "000010",
78         "000001",
79         "0000001",
80         "0000000",
81     };
82 
83     const char Tree2String[][32] = {
84         "11"    ,
85         "1011"  ,
86            "1010"  ,
87         "10011"  ,
88         "10010"  ,
89         "10001"  ,
90         "10000"  ,
91         "011111"  ,
92         "011110"  ,
93         "011101"  ,
94         "011100"  ,
95         "011011"  ,
96         "011010"  ,
97         "011001"  ,
98         "011000"  ,
99         "010111"  ,
100         "010110" ,
101         "010101" ,
102         "010100" ,
103         "010011" ,
104         "010010" ,
105         "010001" ,
106         "0100001" ,
107         "0100000" ,
108         "0011111" ,
109         "0011110" ,
110         "0011101" ,
111         "0011100" ,
112         "0011011" ,
113         "0011010" ,
114         "0011001" ,
115         "0011000" ,
116         "0010111" ,
117         "0010110" ,
118         "0010101" ,
119         "0010100" ,
120         "0010011" ,
121         "0010010" ,
122         "0010001" ,
123         "0010000" ,
124         "0001111" ,
125         "0001110" ,
126         "0001101" ,
127         "0001100" ,
128         "0001011" ,
129         "0001010" ,
130         "0001001" ,
131         "0001000" ,
132         "00001111",
133         "00001110",
134         "00001101",
135         "00001100",
136         "00001011",
137         "00001010",
138         "00001001",
139         "00001000",
140         "00000111",
141         "00000110",
142         "00000101",
143         "00000100",
144         "00000011",
145         "00000010",
146         "00000001",
147         "00000000",
148     };
149 
Decompression(SvStream * pInStream,SvStream * pOutStream)150 Decompression::Decompression(SvStream * pInStream, SvStream * pOutStream)
151     : m_pInStream(pInStream)
152     , m_pOutStream(pOutStream)
153     , m_nCurrent4Byte(0)
154     , m_nBitsLeft(0)
155     , m_pBuffer(m_Buffer)
156     , m_nBytesLeft(0)
157     , m_nOutputBufferPos(0)
158 {
159     if (!m_pInStream || !m_pOutStream )
160     {
161         assert(false);
162     }
163     ConstructTree1();
164     ConstructTree2();
165     fillArray();
166 }
167 /**
168  * @descr   read specified bits from input stream
169  * @argument iCount - number of bits to be read, less than 31
170  * @argument nBits - bits read
171  * @return  0 - read OK, otherwise error
172  */
ReadBits(sal_uInt16 iCount,sal_uInt32 & nBits)173 sal_uInt32 Decompression::ReadBits(sal_uInt16 iCount, sal_uInt32 & nBits)
174 {
175     if ( (iCount == 0) || (iCount > 31 ) )
176     {
177         return 1;
178     }
179 
180     /* load at least need bits into val */
181     sal_uInt32 val = m_nCurrent4Byte; /* bit accumulator */
182     while (m_nBitsLeft < iCount)
183     {
184         if (m_nBytesLeft == 0)
185         {
186             m_nBytesLeft = m_pInStream->ReadBytes(m_Buffer, CHUNK);
187             m_pBuffer = m_Buffer;
188             if (m_nBytesLeft == 0)  return 1;
189         }
190         val |= static_cast<sal_uInt32>(*m_pBuffer++) << m_nBitsLeft;       /* load eight bits */
191         m_nBytesLeft --;
192         m_nBitsLeft += 8;
193     }
194 
195     /* drop need bits and update buffer, always zero to seven bits left */
196     m_nCurrent4Byte = val >> iCount;
197     m_nBitsLeft -= iCount;
198 
199     /* return need bits, zeroing the bits above that */
200     nBits = val & ((1U << iCount) - 1);
201 
202     return 0;
203 }
204 /**
205  * @descr   decompress input and write output
206  * @return  0 - read OK, otherwise error
207  */
explode()208 sal_Int32 Decompression::explode()
209 {
210     /* The first 2 bytes are parameters */
211     sal_uInt32 P1;
212     if (0 != ReadBits(8, P1))/* 0 or 1 */
213         return -1;
214 
215     /* I think this means 0=binary and 1=ascii file, but in RESOURCEs I saw always 0 */
216     if (P1 >= 1) // changed per 's review comments
217         return -1;
218 
219     sal_uInt32 P2;
220     if (0 != ReadBits(8, P2))
221         return -1;
222 
223     /* must be 4,5 or 6 and it is a parameter for the decompression algorithm */
224     if (P2 < 4 || P2 > 6)
225         return -2;
226 
227     m_nOutputBufferPos = 0;
228     /* Now, a bit stream follows, which is decoded as described below: */
229     /*  The algorithm terminates as soon as it runs out of bits. */
230     while(true)
231     {
232         // read 1 bit (take bits from the lowest value (LSB) to the MSB i.e. bit 0, bit 1 etc ...)
233         sal_uInt32 iBit;
234         if (0 != ReadBits(1, iBit))
235             break;
236         if ( 0 == (iBit & 0x01) )
237         {
238             //if the bit is 0 read 8 bits and write it to the output as it is.
239             sal_uInt32 symbol;
240             if (0 != ReadBits(8, symbol))
241                 break;
242             m_Output[m_nOutputBufferPos++] = static_cast<sal_uInt8>(symbol);
243             if (m_nOutputBufferPos == MAXWIN)
244             {
245                 m_pOutStream->WriteBytes(m_Output, m_nOutputBufferPos);
246                 m_nOutputBufferPos = 0;
247             }
248             continue;
249         }
250         // if the bit is 1 we have here a length/distance pair:
251         // -decode a number with Hufmman Tree #1; variable bit length, result is 0x00 .. 0x0F -> L1
252         sal_uInt32 L1 = Decode(m_Tree1.get());
253         sal_uInt32 Length;
254         if (L1 <= 7)
255         {
256             //if L1 <= 7:
257             //           LENGTH = L1 + 2
258             Length = L1 + 2;
259         }
260         else
261         {
262             // if L1 > 7
263             //            read more (L1-7) bits -> L2
264             //             LENGTH = L2 + M[L1-7] + 2
265             sal_uInt32 L2;
266             if (0 != ReadBits(static_cast<sal_uInt16>(L1 - 7), L2))
267                 break;
268             Length = L2 + 2 + m_iArrayOfM[L1 -7];
269         }
270         if (Length == 519)
271         {
272             // end of compressed data
273             break;
274         }
275 
276         // - decode another number with Hufmann Tree #2 giving result 0x00..0x3F -> D1
277         sal_uInt32 D1 = Decode(m_Tree2.get());
278         sal_uInt32 D2;
279         if (Length == 2)
280         {
281             //       if LENGTH == 2
282             //              D1 = D1 << 2
283             //              read 2 bits -> D2
284             D1 = D1 << 2;
285             if (0 != ReadBits(2, D2))
286                 break;
287         }
288         else
289         {
290             //        else
291             //               D1 = D1 << P2  // the parameter 2
292             //               read P2 bits -> D2
293             D1 = D1 << P2;
294             if (0 != ReadBits(static_cast<sal_uInt16>(P2), D2))
295                 break;
296         }
297         // DISTANCE = (D1 | D2) + 1
298         sal_uInt32 distance = (D1 | D2) + 1;
299 
300             // - now copy LENGTH bytes from (output_ptr-DISTANCE) to output_ptr
301             // write current buffer to output
302         m_pOutStream->WriteBytes(m_Output, m_nOutputBufferPos);
303         m_nOutputBufferPos = 0;
304 
305         // remember current position
306         sal_uInt32 nOutputPos = m_pOutStream->Tell();
307         if (distance > nOutputPos)
308             return -3; // format error
309 
310         m_pOutStream->Flush();
311         // point back to copy position and read bytes
312         m_pOutStream->SeekRel(-static_cast<tools::Long>(distance));
313         sal_uInt8 sTemp[MAXWIN];
314         sal_uInt32 nRead = std::min(distance, Length);
315         m_pOutStream->ReadBytes(sTemp, nRead);
316         if (nRead != Length)
317         {
318             // fill the buffer with read content repeatedly until full
319             for (sal_uInt32 i=nRead; i<Length; i++)
320             {
321                 sTemp[i] = sTemp[i-nRead];
322             }
323         }
324 
325         // restore output stream position
326         m_pOutStream->Seek(nOutputPos);
327 
328            // write current buffer to output
329         m_pOutStream->WriteBytes(sTemp, Length);
330     }
331     return 0;
332 }
333 /**
334  * @descr   bits to string
335  * @return
336  */
ToString(sal_uInt32 nBits,char * pChar,sal_uInt32 nLen)337 void Decompression::ToString(sal_uInt32 nBits, char *pChar, sal_uInt32 nLen)
338 {
339     sal_uInt32 nBit;
340     for (sal_uInt32 i=nLen; i > 0; i--)
341     {
342         nBit = (nBits >> (i -1) ) & 0x01;
343         pChar[nLen - i]  = nBit ? '1':'0';
344     }
345     pChar[nLen] = '\0';
346 }
347 
348 /**
349  * @descr   decode tree 1 for length
350  * @return  the decoded value
351  */
Decode(HuffmanTreeNode * pRoot)352 sal_uInt32 Decompression::Decode(HuffmanTreeNode * pRoot)
353 {
354     sal_uInt32 nRet(0);
355     sal_uInt32 nRead, nReadAlready;
356 
357     if( 0 != ReadBits(1, nReadAlready))
358         return 0; // something wrong
359 
360     for (sal_uInt16 i=2; i <= 8; i++)
361     {
362         if ( 0 != ReadBits(1, nRead))
363             return 0; // something wrong
364 
365         nReadAlready  = (nReadAlready << 1) | (nRead & 0x01);
366 
367         char sCode[16];
368         ToString(nReadAlready, sCode, i);
369         nRet = pRoot->QueryValue(sCode);
370         if (nRet != 0xffffffff)
371         {
372             break;
373         }
374     }
375     return nRet;
376 }
377 /**
378  * @descr   construct tree 1 for length
379  * @return
380  */
ConstructTree1()381 void Decompression::ConstructTree1()
382 {   // Huffman Tree #1
383     // The first huffman tree (the Section called Decompression algorithm HUFFMAN) contains the length values. It is described by the following table:
384     // value (hex)  code (binary)
385     // 0    101
386     // 1    11
387     // 2    100
388     // 3    011
389     // 4    0101
390     // 5    0100
391     // 6    0011
392     // 7    0010 1
393     // 8    0010 0
394     // 9    0001 1
395     // a    0001 0
396     // b    0000 11
397     // c    0000 10
398     // d    0000 01
399     // e    0000 001
400     // f    0000 000
401     m_Tree1.reset( new HuffmanTreeNode());
402     for (sal_uInt32 i=0; i< 16; i++)
403     {
404         m_Tree1->InsertNode(i, Tree1String[i]);
405     }
406     /*
407     m_Tree1->InsertNode(0,   "101");
408     m_Tree1->InsertNode(1,   "11");
409     m_Tree1->InsertNode(2,   "100");
410     m_Tree1->InsertNode(3,   "011");
411     m_Tree1->InsertNode(4,   "0101");
412     m_Tree1->InsertNode(5,   "0100");
413     m_Tree1->InsertNode(6,   "0011");
414     m_Tree1->InsertNode(7,   "00101");
415     m_Tree1->InsertNode(8,   "00100");
416     m_Tree1->InsertNode(9,   "00011");
417     m_Tree1->InsertNode(10, "00010");
418     m_Tree1->InsertNode(11, "000011");
419     m_Tree1->InsertNode(12, "000010");
420     m_Tree1->InsertNode(13, "000001");
421     m_Tree1->InsertNode(14, "0000001");
422     m_Tree1->InsertNode(15, "0000000");
423     */
424 }
425 /**
426  * @descr   construct tree 2 for distance
427  * @return
428  */
ConstructTree2()429 void Decompression::ConstructTree2()
430 {
431 
432     m_Tree2.reset(new HuffmanTreeNode());
433     for (sal_uInt32 i=0; i< 64; i++)
434     {
435         m_Tree2->InsertNode(i, Tree2String[i]);
436     }
437     //where bits should be read from the left to the right.
438 }
439 /**
440  * @descr
441  * @return
442  */
fillArray()443 void Decompression::fillArray()
444 {
445     m_iArrayOfM[0] = 7;
446     for (int i=1; i < 16; i++)
447     {
448         m_iArrayOfM[i]  = m_iArrayOfM[i - 1]+ static_cast<sal_uInt32>(pow(2.0, i-1));//2
449     }
450 }
451 
HuffmanTreeNode(sal_uInt32 nValue)452 HuffmanTreeNode::HuffmanTreeNode(sal_uInt32 nValue):value(nValue)
453 {
454 }
~HuffmanTreeNode()455 HuffmanTreeNode::~HuffmanTreeNode()
456 {
457 }
458 
InsertNode(sal_uInt32 nValue,const char * pInCode)459 HuffmanTreeNode * HuffmanTreeNode::InsertNode(sal_uInt32 nValue, const char * pInCode)
460 {
461     HuffmanTreeNode *pNew = new HuffmanTreeNode(nValue);
462     std::string aCode(pInCode);
463 
464     // query its parents
465     const char cLast = aCode.back();
466     aCode.pop_back();
467     HuffmanTreeNode * pParent = QueryNode(aCode.c_str());
468     if (!pParent)
469     {
470         pParent = InsertNode(0xffffffff, aCode.c_str());
471     }
472     if (cLast == '0')
473         pParent->left.reset(pNew);
474     else // (cChar == '1')
475         pParent->right.reset(pNew);
476 
477     return pNew;
478 }
479 
QueryNode(const char * pCode)480 HuffmanTreeNode * HuffmanTreeNode::QueryNode(const char * pCode)
481 {
482     sal_uInt32 nLen = strlen(pCode);
483 
484     HuffmanTreeNode * pNode = this; // this is the root
485     for(sal_uInt32 i=0; i<nLen && pNode; i++)
486     {
487         char cChar= pCode[i];
488         if (cChar == '0')
489         {
490             pNode = pNode->left.get();
491         }
492         else // (cChar == '1')
493         {
494             pNode = pNode->right.get();
495         }
496     }
497     return pNode;
498 }
499 
QueryValue(const char * pCode)500 sal_uInt32 HuffmanTreeNode::QueryValue(const char * pCode)
501 {
502     HuffmanTreeNode * pNode =QueryNode(pCode);
503     if (pNode)
504         return pNode->value;
505 
506     return 0xffffffff;
507 }
508 
509 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
510