1 /* This program implements Block ECC using a Forward Error Correction (FEC)
2 code based on Vandermonde (VDM) matrices in GF(2^8) due to Luigi Rizzo.
3 
4 Given a blocksize, and the VDM parameters K and N, N blocks are written for
5 every chunk of K input packets, so that N - K blocks can be lost per chunk,
6 without error.
7 
8 This means that large consecutive sections of files can be lost, and the
9 data recovered.  For example, diskettes typically lose whole sectors at
10 once, or related groups of sectors, or even entire tracks.  Data written to
11 a diskette with this program may be recovered even with many read errors.
12 
13 This program is distributed under the GNU GENERAL PUBLIC LICENSE, version 2. */
14 
15 #include "defs.h"
16 #include "util.h"
17 #include "md5.h"
18 #include "fec.h"
19 #include "version.h"
20 
21 char Usage[] =
22 	"usage: %s [-v] [-d] [-b<blocksize>] [-n<n>] [-k<k>] [file]\n";
23 #define USAGE() msg(Usage, Progname)
24 
25 /* Default parameters, suitable for standard diskettes. */
26 
27 int Blocksize = 1024;
28 int N = 18;
29 int K = 14;
30 
31 int Decode = FALSE;             /* -d means decode, else encode */
32 int Verbose = 0;                /* -v or -vv print informative stuff */
33 
34 int Pipe = FALSE;               /* set automatically if the input is a pipe */
35 
36 /* Various parameters used by both the encoder and the decoder. */
37 
38 void *Fec;                      /* opaque FEC code tables & stuff */
39 u_char *Chunk;                  /* space for K buffers of size Blocksize */
40 u_char **Packet;                /* K pointers into Chunk */
41 u_char *Block;                  /* another buffer of size Blocksize */
42 int Packlen;                    /* size of data packets within blocks */
43 int Chunksize;                  /* length of encoding unit == K packets */
44 int *Idx;                       /* index array of size K, for FEC decoder */
45 
46 /* Block footer. */
47 
48 #define BLOCK_FOOT_SIZE (MD5_DIGEST_SIZE + 2)
49 
50 /* Chunk header. */
51 
52 #define CHUNK_HEAD_SIZE 8       /* clen and chkid */
53 #define CHUNK_FINAL 0xFFFFFFFF  /* flag last chunk */
54 
55 /* Stats. */
56 
57 long Read_count;
58 long Write_count;
59 
60 /* Forward decls. */
61 
62 void encode(FILE *);
63 int read_chunk(u_int32_t *chkid, FILE *infile);
64 void write_block(int id, u_char *packet);
65 
66 void decode(FILE *);
67 u_int32_t decode_chunk(long offset, FILE *infile, u_int32_t *clenp);
68 int read_block(long pos, u_char *block, FILE *infile);
69 int check_block(u_char *block);
70 
main(int argc,char ** argv)71 int main(int argc, char **argv)
72 {
73 	int i, c;
74 	FILE *infile;
75 
76 	Progname = *argv;
77 
78 	/* Parse option arguments. */
79 
80 	while ((c = getopt(argc, argv, "vdb:n:k:")) != EOF) {
81 		switch (c) {
82 
83 		case 'v':
84 			Verbose++;
85 			break;
86 
87 		case 'd':
88 			Decode = TRUE;
89 			break;
90 
91 		case 'b':
92 			Blocksize = atoik(optarg);
93 			break;
94 
95 		case 'n':
96 			N = atoi(optarg);
97 			break;
98 
99 		case 'k':
100 			K = atoi(optarg);
101 			break;
102 
103 		default:
104 			USAGE();
105 			exit(1);
106 		}
107 	}
108 	argc -= optind;
109 	argv += optind;
110 
111 	/* Error check arguments. */
112 
113 	if (N < 2 || N > GF_SIZE + 1 || K < 1 || K > N) {
114 		fatalerr("invalid (k, n)");
115 	}
116 	if (Blocksize < BLOCK_FOOT_SIZE + CHUNK_HEAD_SIZE) {
117 		fatalerr("invalid blocksize");
118 	}
119 
120 	if (argc > 1) {
121 		USAGE();
122 		exit(1);
123 	}
124 
125 	/* Read from stdin or the given file. */
126 
127 	infile = stdin;
128 	if (argc == 1) {
129 		infile = fileopen(argv[0], "r");
130 	}
131 
132 	/* Initialize.  Allocate enough memory for these arrays to be used
133 	by both the encoder and the decoder. */
134 
135 	Fec = fec_new(K, N);
136 	Chunk = new_array(u_char, K * Blocksize);
137 	Packet = new_array(u_char *, K);
138 	for (i = 0; i < K; i++) {
139 		Packet[i] = Chunk + i * Blocksize;
140 	}
141 	Block = new_array(u_char, Blocksize);
142 	Packlen = Blocksize - BLOCK_FOOT_SIZE;
143 	Chunksize = Packlen * K;
144 	Idx = new_array(int, K);
145 
146 	/* If this program is called *decode, or if -d was passed,
147 	we're a decoder, otherwise we're an encoder. */
148 
149 	i = strlen(Progname);
150 	if (i >= 6 && strcmp(&Progname[i - 6], "decode") == 0) {
151 		Decode = TRUE;
152 	}
153 
154 	/* In decode mode, we expect read errors, which leave the stream
155 	position undefined.  So we use seek.  However, it is sometimes
156 	useful to do the reading with a different program, e.g. dd, and
157 	pipe the output to the decoder.  So we also want to allow reading
158 	from non-seekable streams.  We detect whether or not the stream is
159 	seekable by trying to seek to the start and checking for errors. */
160 
161 	if (fseek(infile, 0L, SEEK_SET) < 0) {
162 		if (errno == ESPIPE) {
163 			Pipe = TRUE;
164 		}
165 	}
166 
167 	if (Verbose) {
168 		msg("Vandermonde Forward Error Correction v%s\n", VERSION);
169 		msg("(K, N) = (%d, %d)\n", K, N);
170 		msg("Blocksize = %d\n", Blocksize);
171 		if (Decode) {
172 			msg("Decode mode.\n");
173 			if (Pipe) {
174 				msg("Input is a pipe.\n");
175 			}
176 			if (Verbose > 1) {
177 				msg("Reading data in chunks of %d bytes.\n",
178 					N * Blocksize);
179 				msg("Writing data in chunks of %d bytes.\n",
180 					K * Packlen - CHUNK_HEAD_SIZE);
181 			}
182 		} else {
183 			msg("Encode mode.\n");
184 			if (Verbose > 1) {
185 				msg("Reading data in chunks of %d bytes.\n",
186 					K * Packlen - CHUNK_HEAD_SIZE);
187 				msg("Writing data in chunks of %d bytes.\n",
188 					N * Blocksize);
189 			}
190 		}
191 	}
192 
193 	Read_count = 0;
194 	Write_count = 0;
195 
196 	if (Decode) {
197 		decode(infile);
198 	} else {
199 		encode(infile);
200 	}
201 
202 	if (!Decode && Verbose) {
203 		msg("Read %ld bytes, wrote %ld bytes.\n",
204 			Read_count, Write_count);
205 		msg("Expansion factor = %g.\n",
206 			(double)Write_count / Read_count);
207 	}
208 
209 	return 0;
210 }
211 
212 /*** The encoder. ***/
213 
214 /* Read chunks of data, encode them, and write them out.  Each input
215 chunk yields N output blocks. */
216 
encode(FILE * infile)217 void encode(FILE *infile)
218 {
219 	int i;
220 	u_int32_t chkid;
221 
222 	chkid = 0;
223 	while (read_chunk(&chkid, infile)) {
224 		for (i = 0; i < N; i++) {
225 
226 			/* Get the i'th code block for this set of packets,
227 			and write it out. */
228 
229 			fec_encode(Fec, Packet, Block, i, Packlen);
230 			write_block(i, Block);
231 		}
232 		if (chkid == CHUNK_FINAL) {
233 			break;
234 		}
235 		chkid++;
236 	}
237 }
238 
239 /* Read a chunk from the input stream and return the amount read.  On EOF,
240 the chunk is partially filled but padded with zeros, and the chkid is set
241 to CHUNK_FINAL.  In any case the chunk header (clen and chkid) is set. */
242 
read_chunk(u_int32_t * chkid,FILE * infile)243 int read_chunk(u_int32_t *chkid, FILE *infile)
244 {
245 	int i, len, clen, rlen;
246 	u_char *data;
247 	u_int32_t *p;
248 
249 	/* Initialize and zero pad. */
250 
251 	clen = 0;
252 	memset(Chunk, 0, K * Blocksize);
253 
254 	/* Try to read a whole chunk. */
255 
256 	for (i = 0; i < K; i++) {
257 		len = Packlen;
258 		data = Packet[i];
259 
260 		/* The first packet in a chunk has a header. */
261 
262 		if (i == 0) {
263 			len -= CHUNK_HEAD_SIZE;
264 			data += CHUNK_HEAD_SIZE;
265 		}
266 
267 		/* Read the packet.  A short read ends the chunk. */
268 
269 		rlen = fread(data, 1, len, infile);
270 		clen += rlen;
271 		Read_count += rlen;
272 		if (rlen < len) {
273 			break;
274 		}
275 	}
276 
277 	/* Get EOF status by trying to read a byte and then pushing it back. */
278 
279 	i = getc(infile);
280 	if (i == EOF) {
281 		*chkid = CHUNK_FINAL;
282 	} else {
283 		ungetc(i, infile);
284 	}
285 
286 	/* Set the header. */
287 
288 	p = (u_int32_t *)Packet[0];
289 	*p++ = htonl(clen);
290 	*p = htonl(*chkid);
291 
292 	if (Verbose > 1) {
293 		msg("read a chunk of size %d\n", clen);
294 	}
295 
296 	return clen;
297 }
298 
299 /* Write a block consisting of an output packet and a hash. */
300 
write_block(int id,u_char * packet)301 void write_block(int id, u_char *packet)
302 {
303 	u_char footer[BLOCK_FOOT_SIZE];
304 	struct MD5Context ctx;
305 
306 	/* First, write the output packet. */
307 
308 	if (fwrite(packet, 1, Packlen, stdout) != Packlen) {
309 		fatalerr("write error");
310 	}
311 
312 	/* Put the block id and K into the first two bytes of the footer. */
313 
314 	footer[0] = id;
315 	footer[1] = K;
316 
317 	/* Then, make an MD5 hash and write the block footer. */
318 
319 	MD5Init(&ctx);
320 	MD5Update(&ctx, packet, Packlen);
321 	MD5Update(&ctx, footer, 2);
322 	MD5Final(footer + 2, &ctx);
323 
324 	if (fwrite(footer, 1, BLOCK_FOOT_SIZE, stdout) != BLOCK_FOOT_SIZE) {
325 		fatalerr("write error");
326 	}
327 
328 	Write_count += Packlen + BLOCK_FOOT_SIZE;
329 }
330 
331 /*** The decoder. ***/
332 
333 /* Read blocks of data N at a time, decode them, and write them out.  Each N
334 input blocks yield one output chunk of K packets. */
335 
decode(FILE * infile)336 void decode(FILE *infile)
337 {
338 	int i, j;
339 	u_char *data;
340 	long offset;
341 	u_int32_t clen, chkid, id;
342 
343 	offset = 0;
344 	id = 0;
345 	do {
346 		chkid = decode_chunk(offset, infile, &clen);
347 		if (chkid != CHUNK_FINAL && chkid != id) {
348 			fatalerr("chunk out of order at %ld id %ld expecting %ld",
349 				offset, (long)chkid, (long)id);
350 		}
351 		id++;
352 
353 		/* We've got a good chunk.  Write out K packets,
354 		or clen bytes, whichever is first. */
355 
356 		i = 0;
357 		while (clen > 0 && i < K) {
358 			data = Packet[i];
359 			j = Packlen;
360 
361 			/* The first packet in a chunk has a header. */
362 
363 			if (i == 0) {
364 				j -= CHUNK_HEAD_SIZE;
365 				data += CHUNK_HEAD_SIZE;
366 			}
367 
368 			/* The last packet may be short (clen < j), so
369 			only write what's there. */
370 
371 			if (clen < j) {
372 				j = clen;
373 			}
374 			fwrite(data, 1, j, stdout);
375 			i++;
376 			clen -= j;
377 		}
378 		offset += N * Blocksize;
379 	} while (chkid != CHUNK_FINAL);
380 }
381 
382 /* From the next N input blocks starting at offset, we need K good ones.
383 If this is a pipe, we'll read all N, but otherwise we'll stop after K.
384 Return the chkid and clen from a good chunk. */
385 
decode_chunk(long offset,FILE * infile,u_int32_t * clenp)386 u_int32_t decode_chunk(long offset, FILE *infile, u_int32_t *clenp)
387 {
388 	int i, id, blkid;
389 	long pos;
390 	u_int32_t *p, chkid;
391 
392 	/* Read K good blocks.  id is the FEC block number relative to
393 	this chunk, i counts how many good ones we've read. */
394 
395 	i = id = 0;
396 	while (i < K && id < N) {
397 
398 		/* pos is where we think the block is.  It's ignored for
399 		pipes, so in that case we'd better be in the right place
400 		(if we're not, the hash will probably fail). */
401 
402 		pos = offset + id * Blocksize;
403 		blkid = read_block(pos, Packet[i], infile);
404 		if (blkid == id) {
405 
406 			/* Got a good one, save it. */
407 
408 			Idx[i++] = id;
409 		} else if (Verbose > 1) {
410 			msg("bad block at %ld id %d expecting %d\n",
411 				pos, blkid, id);
412 		}
413 		id++;
414 	}
415 	if (i < K) {
416 		fatalerr("unrecoverable error -- too many bad blocks");
417 	}
418 
419 	/* If this is a pipe, we want to read the next N - id blocks.
420 	If there are read errors, we'll be out of sync and likely fail. */
421 
422 	if (Pipe) {
423 		while (id < N) {
424 			fread(Block, 1, Blocksize, infile);
425 			id++;
426 		}
427 	}
428 
429 	/* We have K good packets.  Decode them in place. */
430 
431 	fec_decode(Fec, Packet, Idx, Packlen);
432 
433 	/* Get clen and chkid. */
434 
435 	p = (u_int32_t *)Packet[0];
436 	*clenp = ntohl(*p);
437 	p++;
438 	chkid = ntohl(*p);
439 
440 	if (Verbose > 1) {
441 		msg("read %d blocks, clen = %d\n", K, *clenp);
442 	}
443 	return chkid;
444 }
445 
446 /* Read a block and check the hash.  If a seek or read error occurs the
447 block is presumed bad.  Return the block id for a good block, a negative
448 code for a bad one.  Since a failed read() may or may not change the file
449 position, we explicitly seek to the correct position before every read,
450 unless this is a pipe. */
451 
read_block(long pos,u_char * block,FILE * infile)452 int read_block(long pos, u_char *block, FILE *infile)
453 {
454 	u_char *p;
455 
456 	if (!Pipe && fseek(infile, pos, SEEK_SET) < 0) {
457 		return -1;
458 	}
459 	if (fread(block, 1, Blocksize, infile) != Blocksize) {
460 		return -2;
461 	}
462 	if (check_block(block)) {
463 		p = block + Packlen;
464 		if (p[1] != K) {
465 			fatalerr("incorrect K value");
466 		}
467 		return p[0];
468 	}
469 	return -3;
470 }
471 
472 /* Check the hash on a block.  Return TRUE if it's OK, FALSE otherwise. */
473 
check_block(u_char * block)474 int check_block(u_char *block)
475 {
476 	u_char digest[MD5_DIGEST_SIZE];
477 	struct MD5Context ctx;
478 
479 	/* Make an MD5 hash of the packet data and the first two bytes of
480 	the footer. */
481 
482 	MD5Init(&ctx);
483 	MD5Update(&ctx, block, Packlen + 2);
484 	MD5Final(digest, &ctx);
485 	if (memcmp(digest, block + Packlen + 2, MD5_DIGEST_SIZE) == 0) {
486 		return TRUE;
487 	}
488 	return FALSE;
489 }
490