1 /* Lziprecover - Data recovery tool for the lzip format
2    Copyright (C) 2009-2021 Antonio Diaz Diaz.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17 
18 #define _FILE_OFFSET_BITS 64
19 
20 #include <algorithm>
21 #include <cerrno>
22 #include <climits>
23 #include <cstdio>
24 #include <cstdlib>
25 #include <cstring>
26 #include <string>
27 #include <vector>
28 #include <stdint.h>
29 #include <unistd.h>
30 
31 #include "lzip.h"
32 #include "md5.h"
33 #include "mtester.h"
34 
35 
36 namespace {
37 
format_byte(const uint8_t byte)38 const char * format_byte( const uint8_t byte )
39   {
40   enum { buffers = 8, bufsize = 16 };
41   static char buffer[buffers][bufsize];	// circle of static buffers for printf
42   static int current = 0;
43   char * const buf = buffer[current++]; current %= buffers;
44   if( ( byte >= 0x20 && byte <= 0x7E ) || byte >= 0xA0 )
45     snprintf( buf, bufsize, "'%c' (0x%02X)", byte, byte );
46   else
47     snprintf( buf, bufsize, "    (0x%02X)", byte );
48   return buf;
49   }
50 
51 } // end namespace
52 
53 
print_block(const int len)54 void LZ_mtester::print_block( const int len )
55   {
56   std::fputs( " \"", stdout );
57   for( int i = len - 1; i >= 0; --i )
58     {
59     uint8_t byte = peek( i );
60     if( byte < 0x20 || ( byte > 0x7E && byte < 0xA0 ) ) byte = '.';
61     std::fputc( byte, stdout );
62     }
63   std::fputs( "\"\n", stdout );
64   }
65 
66 
duplicate_buffer()67 void LZ_mtester::duplicate_buffer()
68   {
69   uint8_t * const tmp = new uint8_t[dictionary_size];
70   if( data_position() > 0 )
71     std::memcpy( tmp, buffer, std::min( data_position(),
72                                         (unsigned long long)dictionary_size ) );
73   else tmp[dictionary_size-1] = 0;		// prev_byte of first byte
74   buffer = tmp;
75   }
76 
77 
flush_data()78 void LZ_mtester::flush_data()
79   {
80   if( pos > stream_pos )
81     {
82     const int size = pos - stream_pos;
83     crc32.update_buf( crc_, buffer + stream_pos, size );
84     if( md5sum ) md5sum->md5_update( buffer + stream_pos, size );
85     if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
86       throw Error( "Write error" );
87     if( pos >= dictionary_size )
88       { partial_data_pos += pos; pos = 0; pos_wrapped = true; }
89     stream_pos = pos;
90     }
91   }
92 
93 
verify_trailer(FILE * const f,unsigned long long byte_pos)94 bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos )
95   {
96   const Lzip_trailer * const trailer = rdec.get_trailer();
97   if( !trailer )
98     {
99     if( verbosity >= 0 && f )
100       { if( byte_pos )
101           { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
102         std::fputs( "Can't get trailer.\n", f ); }
103     return false;
104     }
105   const unsigned long long data_size = data_position();
106   const unsigned long long member_size = member_position();
107   bool error = false;
108 
109   const unsigned td_crc = trailer->data_crc();
110   if( td_crc != crc() )
111     {
112     error = true;
113     if( verbosity >= 0 && f )
114       { if( byte_pos )
115           { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
116         std::fprintf( f, "CRC mismatch; stored %08X, computed %08X\n",
117                       td_crc, crc() ); }
118     }
119   const unsigned long long td_size = trailer->data_size();
120   if( td_size != data_size )
121     {
122     error = true;
123     if( verbosity >= 0 && f )
124       { if( byte_pos )
125           { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
126         std::fprintf( f, "Data size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
127                       td_size, td_size, data_size, data_size ); }
128     }
129   const unsigned long long tm_size = trailer->member_size();
130   if( tm_size != member_size )
131     {
132     error = true;
133     if( verbosity >= 0 && f )
134       { if( byte_pos )
135           { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
136         std::fprintf( f, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
137                       tm_size, tm_size, member_size, member_size ); }
138     }
139   return !error;
140   }
141 
142 
143 /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
144                  3 = trailer error, 4 = unknown marker found,
145                  -1 = pos_limit reached. */
test_member(const unsigned long long mpos_limit,const unsigned long long dpos_limit,FILE * const f,const unsigned long long byte_pos)146 int LZ_mtester::test_member( const unsigned long long mpos_limit,
147                              const unsigned long long dpos_limit,
148                              FILE * const f, const unsigned long long byte_pos )
149   {
150   if( mpos_limit < Lzip_header::size + 5 ) return -1;
151   if( member_position() == Lzip_header::size ) rdec.load();
152   while( !rdec.finished() )
153     {
154     if( member_position() >= mpos_limit || data_position() >= dpos_limit )
155       { flush_data(); return -1; }
156     const int pos_state = data_position() & pos_state_mask;
157     if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 )	// 1st bit
158       {
159       // literal byte
160       Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
161       if( state.is_char_set_char() )
162         put_byte( rdec.decode_tree8( bm ) );
163       else
164         put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
165       continue;
166       }
167     // match or repeated match
168     int len;
169     if( rdec.decode_bit( bm_rep[state()] ) != 0 )		// 2nd bit
170       {
171       if( rdec.decode_bit( bm_rep0[state()] ) == 0 )		// 3rd bit
172         {
173         if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
174           { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
175         }
176       else
177         {
178         unsigned distance;
179         if( rdec.decode_bit( bm_rep1[state()] ) == 0 )		// 4th bit
180           distance = rep1;
181         else
182           {
183           if( rdec.decode_bit( bm_rep2[state()] ) == 0 )	// 5th bit
184             distance = rep2;
185           else
186             { distance = rep3; rep3 = rep2; }
187           rep2 = rep1;
188           }
189         rep1 = rep0;
190         rep0 = distance;
191         }
192       state.set_rep();
193       len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
194       }
195     else					// match
196       {
197       len = min_match_len + rdec.decode_len( match_len_model, pos_state );
198       unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
199       if( distance >= start_dis_model )
200         {
201         const unsigned dis_slot = distance;
202         const int direct_bits = ( dis_slot >> 1 ) - 1;
203         distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
204         if( dis_slot < end_dis_model )
205           distance += rdec.decode_tree_reversed(
206                       bm_dis + ( distance - dis_slot ), direct_bits );
207         else
208           {
209           distance +=
210             rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
211           distance += rdec.decode_tree_reversed4( bm_align );
212           if( distance == 0xFFFFFFFFU )		// marker found
213             {
214             rdec.normalize();
215             flush_data();
216             if( len == min_match_len )		// End Of Stream marker
217               {
218               if( verify_trailer( f, byte_pos ) ) return 0; else return 3;
219               }
220             if( verbosity >= 0 && f )
221               {
222               if( byte_pos ) std::fprintf( f, "byte %llu\n", byte_pos );
223               std::fprintf( f, "Unsupported marker code '%d'\n", len );
224               }
225             return 4;
226             }
227           }
228         }
229       rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
230       if( rep0 > max_rep0 ) max_rep0 = rep0;
231       state.set_match();
232       if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
233         { flush_data(); return 1; }
234       }
235     copy_block( rep0, len );
236     }
237   flush_data();
238   return 2;
239   }
240 
241 
242 /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
243                  3 = trailer error, 4 = unknown marker found. */
debug_decode_member(const long long dpos,const long long mpos,const bool show_packets)244 int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
245                                      const bool show_packets )
246   {
247   rdec.load();
248   unsigned old_tmpos = member_position();	// truncated member_position
249   while( !rdec.finished() )
250     {
251     const unsigned long long dp = data_position() + dpos;
252     const unsigned long long mp = member_position() + mpos - 4;
253     const unsigned tmpos = member_position();
254     set_max_packet( tmpos - old_tmpos, mp );
255     old_tmpos = tmpos;
256     ++total_packets_;
257     const int pos_state = data_position() & pos_state_mask;
258     if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 )	// 1st bit
259       {
260       // literal byte
261       Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
262       if( state.is_char_set_char() )
263         {
264         const uint8_t cur_byte = rdec.decode_tree8( bm );
265         put_byte( cur_byte );
266         if( show_packets )
267           std::printf( "%6llu %6llu  literal %s\n",
268                        mp, dp, format_byte( cur_byte ) );
269         }
270       else
271         {
272         const uint8_t match_byte = peek( rep0 );
273         const uint8_t cur_byte = rdec.decode_matched( bm, match_byte );
274         put_byte( cur_byte );
275         if( show_packets )
276           std::printf( "%6llu %6llu  literal %s, match byte %6llu %s\n",
277                        mp, dp, format_byte( cur_byte ), dp - rep0 - 1,
278                        format_byte( match_byte ) );
279         }
280       continue;
281       }
282     // match or repeated match
283     int len;
284     if( rdec.decode_bit( bm_rep[state()] ) != 0 )		// 2nd bit
285       {
286       int rep = 0;
287       if( rdec.decode_bit( bm_rep0[state()] ) == 0 )		// 3rd bit
288         {
289         if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
290           {
291           if( show_packets )
292             std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n",
293                          mp, dp, format_byte( peek( rep0 ) ),
294                          rep0 + 1, dp - rep0 - 1 );
295           state.set_short_rep(); put_byte( peek( rep0 ) ); continue;
296           }
297         }
298       else
299         {
300         unsigned distance;
301         if( rdec.decode_bit( bm_rep1[state()] ) == 0 )		// 4th bit
302           { distance = rep1; rep = 1; }
303         else
304           {
305           if( rdec.decode_bit( bm_rep2[state()] ) == 0 )	// 5th bit
306             { distance = rep2; rep = 2; }
307           else
308             { distance = rep3; rep3 = rep2; rep = 3; }
309           rep2 = rep1;
310           }
311         rep1 = rep0;
312         rep0 = distance;
313         }
314       state.set_rep();
315       len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
316       if( show_packets )
317         std::printf( "%6llu %6llu  rep%c  %6u,%3d (%6llu)",
318                      mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 );
319       }
320     else					// match
321       {
322       len = min_match_len + rdec.decode_len( match_len_model, pos_state );
323       unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
324       if( distance >= start_dis_model )
325         {
326         const unsigned dis_slot = distance;
327         const int direct_bits = ( dis_slot >> 1 ) - 1;
328         distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
329         if( dis_slot < end_dis_model )
330           distance += rdec.decode_tree_reversed(
331                       bm_dis + ( distance - dis_slot ), direct_bits );
332         else
333           {
334           distance +=
335             rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
336           distance += rdec.decode_tree_reversed4( bm_align );
337           if( distance == 0xFFFFFFFFU )		// marker found
338             {
339             rdec.normalize();
340             flush_data();
341             const unsigned tmpos = member_position();
342             set_max_marker( tmpos - old_tmpos );
343             old_tmpos = tmpos;
344             if( show_packets )
345               std::printf( "%6llu %6llu  marker code '%d'\n", mp, dp, len );
346             if( len == min_match_len )		// End Of Stream marker
347               {
348               if( show_packets )
349                 std::printf( "%6llu %6llu  member trailer\n",
350                              mpos + member_position(), dpos + data_position() );
351               if( verify_trailer( show_packets ? stdout : 0 ) ) return 0;
352               return 3;
353               }
354             if( len == min_match_len + 1 )	// Sync Flush marker
355               {
356               rdec.load(); continue;
357               }
358             return 4;
359             }
360           }
361         }
362       rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
363       if( rep0 > max_rep0 ) { max_rep0 = rep0; max_rep0_pos = mp; }
364       state.set_match();
365       if( show_packets )
366         std::printf( "%6llu %6llu  match %6u,%3d (%6lld)",
367                      mp, dp, rep0 + 1, len, dp - rep0 - 1 );
368       if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
369         { flush_data(); if( show_packets ) std::fputc( '\n', stdout );
370           return 1; }
371       }
372     copy_block( rep0, len );
373     if( show_packets ) print_block( len );
374     }
375   flush_data();
376   return 2;
377   }
378