1 /* Lziprecover - Data recovery tool for the lzip format
2 Copyright (C) 2009-2021 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #define _FILE_OFFSET_BITS 64
19
20 #include <algorithm>
21 #include <cerrno>
22 #include <climits>
23 #include <cstdio>
24 #include <cstdlib>
25 #include <cstring>
26 #include <string>
27 #include <vector>
28 #include <stdint.h>
29 #include <unistd.h>
30
31 #include "lzip.h"
32 #include "md5.h"
33 #include "mtester.h"
34
35
36 namespace {
37
format_byte(const uint8_t byte)38 const char * format_byte( const uint8_t byte )
39 {
40 enum { buffers = 8, bufsize = 16 };
41 static char buffer[buffers][bufsize]; // circle of static buffers for printf
42 static int current = 0;
43 char * const buf = buffer[current++]; current %= buffers;
44 if( ( byte >= 0x20 && byte <= 0x7E ) || byte >= 0xA0 )
45 snprintf( buf, bufsize, "'%c' (0x%02X)", byte, byte );
46 else
47 snprintf( buf, bufsize, " (0x%02X)", byte );
48 return buf;
49 }
50
51 } // end namespace
52
53
print_block(const int len)54 void LZ_mtester::print_block( const int len )
55 {
56 std::fputs( " \"", stdout );
57 for( int i = len - 1; i >= 0; --i )
58 {
59 uint8_t byte = peek( i );
60 if( byte < 0x20 || ( byte > 0x7E && byte < 0xA0 ) ) byte = '.';
61 std::fputc( byte, stdout );
62 }
63 std::fputs( "\"\n", stdout );
64 }
65
66
duplicate_buffer()67 void LZ_mtester::duplicate_buffer()
68 {
69 uint8_t * const tmp = new uint8_t[dictionary_size];
70 if( data_position() > 0 )
71 std::memcpy( tmp, buffer, std::min( data_position(),
72 (unsigned long long)dictionary_size ) );
73 else tmp[dictionary_size-1] = 0; // prev_byte of first byte
74 buffer = tmp;
75 }
76
77
flush_data()78 void LZ_mtester::flush_data()
79 {
80 if( pos > stream_pos )
81 {
82 const int size = pos - stream_pos;
83 crc32.update_buf( crc_, buffer + stream_pos, size );
84 if( md5sum ) md5sum->md5_update( buffer + stream_pos, size );
85 if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
86 throw Error( "Write error" );
87 if( pos >= dictionary_size )
88 { partial_data_pos += pos; pos = 0; pos_wrapped = true; }
89 stream_pos = pos;
90 }
91 }
92
93
verify_trailer(FILE * const f,unsigned long long byte_pos)94 bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos )
95 {
96 const Lzip_trailer * const trailer = rdec.get_trailer();
97 if( !trailer )
98 {
99 if( verbosity >= 0 && f )
100 { if( byte_pos )
101 { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
102 std::fputs( "Can't get trailer.\n", f ); }
103 return false;
104 }
105 const unsigned long long data_size = data_position();
106 const unsigned long long member_size = member_position();
107 bool error = false;
108
109 const unsigned td_crc = trailer->data_crc();
110 if( td_crc != crc() )
111 {
112 error = true;
113 if( verbosity >= 0 && f )
114 { if( byte_pos )
115 { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
116 std::fprintf( f, "CRC mismatch; stored %08X, computed %08X\n",
117 td_crc, crc() ); }
118 }
119 const unsigned long long td_size = trailer->data_size();
120 if( td_size != data_size )
121 {
122 error = true;
123 if( verbosity >= 0 && f )
124 { if( byte_pos )
125 { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
126 std::fprintf( f, "Data size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
127 td_size, td_size, data_size, data_size ); }
128 }
129 const unsigned long long tm_size = trailer->member_size();
130 if( tm_size != member_size )
131 {
132 error = true;
133 if( verbosity >= 0 && f )
134 { if( byte_pos )
135 { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
136 std::fprintf( f, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
137 tm_size, tm_size, member_size, member_size ); }
138 }
139 return !error;
140 }
141
142
143 /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
144 3 = trailer error, 4 = unknown marker found,
145 -1 = pos_limit reached. */
test_member(const unsigned long long mpos_limit,const unsigned long long dpos_limit,FILE * const f,const unsigned long long byte_pos)146 int LZ_mtester::test_member( const unsigned long long mpos_limit,
147 const unsigned long long dpos_limit,
148 FILE * const f, const unsigned long long byte_pos )
149 {
150 if( mpos_limit < Lzip_header::size + 5 ) return -1;
151 if( member_position() == Lzip_header::size ) rdec.load();
152 while( !rdec.finished() )
153 {
154 if( member_position() >= mpos_limit || data_position() >= dpos_limit )
155 { flush_data(); return -1; }
156 const int pos_state = data_position() & pos_state_mask;
157 if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
158 {
159 // literal byte
160 Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
161 if( state.is_char_set_char() )
162 put_byte( rdec.decode_tree8( bm ) );
163 else
164 put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
165 continue;
166 }
167 // match or repeated match
168 int len;
169 if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
170 {
171 if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
172 {
173 if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
174 { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
175 }
176 else
177 {
178 unsigned distance;
179 if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
180 distance = rep1;
181 else
182 {
183 if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
184 distance = rep2;
185 else
186 { distance = rep3; rep3 = rep2; }
187 rep2 = rep1;
188 }
189 rep1 = rep0;
190 rep0 = distance;
191 }
192 state.set_rep();
193 len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
194 }
195 else // match
196 {
197 len = min_match_len + rdec.decode_len( match_len_model, pos_state );
198 unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
199 if( distance >= start_dis_model )
200 {
201 const unsigned dis_slot = distance;
202 const int direct_bits = ( dis_slot >> 1 ) - 1;
203 distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
204 if( dis_slot < end_dis_model )
205 distance += rdec.decode_tree_reversed(
206 bm_dis + ( distance - dis_slot ), direct_bits );
207 else
208 {
209 distance +=
210 rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
211 distance += rdec.decode_tree_reversed4( bm_align );
212 if( distance == 0xFFFFFFFFU ) // marker found
213 {
214 rdec.normalize();
215 flush_data();
216 if( len == min_match_len ) // End Of Stream marker
217 {
218 if( verify_trailer( f, byte_pos ) ) return 0; else return 3;
219 }
220 if( verbosity >= 0 && f )
221 {
222 if( byte_pos ) std::fprintf( f, "byte %llu\n", byte_pos );
223 std::fprintf( f, "Unsupported marker code '%d'\n", len );
224 }
225 return 4;
226 }
227 }
228 }
229 rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
230 if( rep0 > max_rep0 ) max_rep0 = rep0;
231 state.set_match();
232 if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
233 { flush_data(); return 1; }
234 }
235 copy_block( rep0, len );
236 }
237 flush_data();
238 return 2;
239 }
240
241
242 /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
243 3 = trailer error, 4 = unknown marker found. */
debug_decode_member(const long long dpos,const long long mpos,const bool show_packets)244 int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
245 const bool show_packets )
246 {
247 rdec.load();
248 unsigned old_tmpos = member_position(); // truncated member_position
249 while( !rdec.finished() )
250 {
251 const unsigned long long dp = data_position() + dpos;
252 const unsigned long long mp = member_position() + mpos - 4;
253 const unsigned tmpos = member_position();
254 set_max_packet( tmpos - old_tmpos, mp );
255 old_tmpos = tmpos;
256 ++total_packets_;
257 const int pos_state = data_position() & pos_state_mask;
258 if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
259 {
260 // literal byte
261 Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
262 if( state.is_char_set_char() )
263 {
264 const uint8_t cur_byte = rdec.decode_tree8( bm );
265 put_byte( cur_byte );
266 if( show_packets )
267 std::printf( "%6llu %6llu literal %s\n",
268 mp, dp, format_byte( cur_byte ) );
269 }
270 else
271 {
272 const uint8_t match_byte = peek( rep0 );
273 const uint8_t cur_byte = rdec.decode_matched( bm, match_byte );
274 put_byte( cur_byte );
275 if( show_packets )
276 std::printf( "%6llu %6llu literal %s, match byte %6llu %s\n",
277 mp, dp, format_byte( cur_byte ), dp - rep0 - 1,
278 format_byte( match_byte ) );
279 }
280 continue;
281 }
282 // match or repeated match
283 int len;
284 if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
285 {
286 int rep = 0;
287 if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
288 {
289 if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
290 {
291 if( show_packets )
292 std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n",
293 mp, dp, format_byte( peek( rep0 ) ),
294 rep0 + 1, dp - rep0 - 1 );
295 state.set_short_rep(); put_byte( peek( rep0 ) ); continue;
296 }
297 }
298 else
299 {
300 unsigned distance;
301 if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
302 { distance = rep1; rep = 1; }
303 else
304 {
305 if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
306 { distance = rep2; rep = 2; }
307 else
308 { distance = rep3; rep3 = rep2; rep = 3; }
309 rep2 = rep1;
310 }
311 rep1 = rep0;
312 rep0 = distance;
313 }
314 state.set_rep();
315 len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
316 if( show_packets )
317 std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)",
318 mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 );
319 }
320 else // match
321 {
322 len = min_match_len + rdec.decode_len( match_len_model, pos_state );
323 unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
324 if( distance >= start_dis_model )
325 {
326 const unsigned dis_slot = distance;
327 const int direct_bits = ( dis_slot >> 1 ) - 1;
328 distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
329 if( dis_slot < end_dis_model )
330 distance += rdec.decode_tree_reversed(
331 bm_dis + ( distance - dis_slot ), direct_bits );
332 else
333 {
334 distance +=
335 rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
336 distance += rdec.decode_tree_reversed4( bm_align );
337 if( distance == 0xFFFFFFFFU ) // marker found
338 {
339 rdec.normalize();
340 flush_data();
341 const unsigned tmpos = member_position();
342 set_max_marker( tmpos - old_tmpos );
343 old_tmpos = tmpos;
344 if( show_packets )
345 std::printf( "%6llu %6llu marker code '%d'\n", mp, dp, len );
346 if( len == min_match_len ) // End Of Stream marker
347 {
348 if( show_packets )
349 std::printf( "%6llu %6llu member trailer\n",
350 mpos + member_position(), dpos + data_position() );
351 if( verify_trailer( show_packets ? stdout : 0 ) ) return 0;
352 return 3;
353 }
354 if( len == min_match_len + 1 ) // Sync Flush marker
355 {
356 rdec.load(); continue;
357 }
358 return 4;
359 }
360 }
361 }
362 rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
363 if( rep0 > max_rep0 ) { max_rep0 = rep0; max_rep0_pos = mp; }
364 state.set_match();
365 if( show_packets )
366 std::printf( "%6llu %6llu match %6u,%3d (%6lld)",
367 mp, dp, rep0 + 1, len, dp - rep0 - 1 );
368 if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
369 { flush_data(); if( show_packets ) std::fputc( '\n', stdout );
370 return 1; }
371 }
372 copy_block( rep0, len );
373 if( show_packets ) print_block( len );
374 }
375 flush_data();
376 return 2;
377 }
378