xref: /minix/minix/drivers/storage/filter/sum.c (revision 433d6423)
1 /* Filter driver - middle layer - checksumming */
2 
3 #include "inc.h"
4 #include "crc.h"
5 #include "md5.h"
6 
7 #define GROUP_SIZE	(SECTOR_SIZE * NR_SUM_SEC)
8 #define SEC2SUM_NR(nr)	((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + NR_SUM_SEC)
9 #define LOG2PHYS(nr)	((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + (nr)%NR_SUM_SEC)
10 
11 #define POS2SEC(nr)	(unsigned long)((nr) / SECTOR_SIZE)
12 #define SEC2POS(nr)	((u64_t)(nr) * SECTOR_SIZE)
13 
14 /* Data buffers. */
15 static char *ext_array, *ext_buffer;	/* interspersed buffer */
16 static char *rb0_array;			/* write readback buffer for disk 0 */
17 static char *rb1_array;			/* write readback buffer for disk 1 */
18 
19 /*===========================================================================*
20  *				sum_init				     *
21  *===========================================================================*/
sum_init(void)22 void sum_init(void)
23 {
24 	/* Initialize buffers. */
25 
26 	ext_array = flt_malloc(SBUF_SIZE, NULL, 0);
27 	rb0_array = flt_malloc(SBUF_SIZE, NULL, 0);
28 	rb1_array = flt_malloc(SBUF_SIZE, NULL, 0);
29 
30 	if (ext_array == NULL || rb0_array == NULL || rb1_array == NULL)
31 		panic("no memory available");
32 }
33 
34 /*===========================================================================*
35  *				calc_sum				     *
36  *===========================================================================*/
calc_sum(unsigned sector,char * data,char * sum)37 static void calc_sum(unsigned sector, char *data, char *sum)
38 {
39 	/* Compute the checksum for a sector. The sector number must be part
40 	 * of the checksum in some way.
41 	 */
42 	unsigned long crc, *p, *q;
43 	int i, j;
44 	struct MD5Context ctx;
45 
46 	switch(SUM_TYPE) {
47 	case ST_NIL:
48 		/* No checksum at all */
49 
50 		q = (unsigned long *) sum;
51 		*q = sector;
52 
53 		break;
54 
55 	case ST_XOR:
56 		/* Basic XOR checksum */
57 		p = (unsigned long *) data;
58 
59 		memset(sum, 0, SUM_SIZE);
60 		for(i = 0; i < SECTOR_SIZE / SUM_SIZE; i++) {
61 			q = (unsigned long *) sum;
62 			for(j = 0; (size_t) j < SUM_SIZE / sizeof(*p); j++) {
63 				*q ^= *p;
64 				q++;
65 				p++;
66 			}
67 		}
68 		q = (unsigned long *) sum;
69 		*q ^= sector;
70 
71 		break;
72 
73 	case ST_CRC:
74 		/* CRC32 checksum */
75 
76 		crc = compute_crc((unsigned char *) data, SECTOR_SIZE);
77 
78 		q = (unsigned long *) sum;
79 
80 		*q = crc ^ sector;
81 
82 		break;
83 
84 	case ST_MD5:
85 		/* MD5 checksum */
86 
87 		MD5Init(&ctx);
88 		MD5Update(&ctx, (unsigned char *) data, SECTOR_SIZE);
89 		MD5Update(&ctx, (unsigned char *) &sector, sizeof(sector));
90 		MD5Final((unsigned char *) sum, &ctx);
91 
92 		break;
93 
94 	default:
95 		panic("invalid checksum type: %d", SUM_TYPE);
96 	}
97 }
98 
99 /*===========================================================================*
100  *				read_sectors				     *
101  *===========================================================================*/
read_sectors(char * buf,sector_t phys_sector,int count)102 static int read_sectors(char *buf, sector_t phys_sector, int count)
103 {
104 	/* Read 'count' sectors starting at 'phys_sector' into 'buf'. If an
105 	 * EOF occurs, zero-fill the remaining part of the buffer.
106 	 */
107 	size_t size, wsize;
108 	int r;
109 
110 	size = wsize = count * SECTOR_SIZE;
111 
112 	r = read_write(SEC2POS(phys_sector), buf, buf, &size, FLT_READ);
113 
114 	if (r != OK)
115 		return r;
116 
117 	if (size != wsize) {
118 #if DEBUG
119 		printf("Filter: EOF reading sector %lu\n", phys_sector);
120 #endif
121 
122 		memset(buf + size, 0, wsize - size);
123 	}
124 
125 	return OK;
126 }
127 
128 /*===========================================================================*
129  *				make_group_sum				     *
130  *===========================================================================*/
make_group_sum(char * bufp,char * sump,sector_t sector,int index,int count)131 static void make_group_sum(char *bufp, char *sump, sector_t sector, int index,
132  int count)
133 {
134 	/* Compute checksums for 'count' sectors within a group, starting at
135 	 * sector 'index' into the group, which has logical sector number
136 	 * 'sector'. The 'bufp' pointer points to the same first sector to
137 	 * start checksumming; 'sump' is a pointer to the checksum sector.
138 	 */
139 
140 	sump += index * SUM_SIZE;
141 
142 	while (count--) {
143 		calc_sum(sector, bufp, sump);
144 
145 		bufp += SECTOR_SIZE;
146 
147 		sump += SUM_SIZE;
148 		sector++;
149 	}
150 }
151 
152 /*===========================================================================*
153  *				check_group_sum				     *
154  *===========================================================================*/
check_group_sum(char * bufp,const char * sump,sector_t sector,int index,int count)155 static int check_group_sum(char *bufp, const char *sump, sector_t sector,
156   int index, int count)
157 {
158 	/* Check checksums in a group. Parameters are the same as in
159 	 * make_group_sum(). Return OK if all checksums check out, or RET_REDO
160 	 * upon failure.
161 	 */
162 	char sum_buffer[SECTOR_SIZE];
163 
164 	sump += index * SUM_SIZE;
165 
166 	while (count--) {
167 		calc_sum(sector, bufp, sum_buffer);
168 
169 		if (memcmp(sum_buffer, sump, SUM_SIZE)) {
170 			printf("Filter: BAD CHECKSUM at sector %lu\n", sector);
171 
172 			if (BAD_SUM_ERROR)
173 				return bad_driver(DRIVER_MAIN, BD_DATA, EIO);
174 		}
175 
176 		bufp += SECTOR_SIZE;
177 		sump += SUM_SIZE;
178 		sector++;
179 	}
180 
181 	return OK;
182 }
183 
184 /*===========================================================================*
185  *				make_sum				     *
186  *===========================================================================*/
make_sum(sector_t current_sector,sector_t sectors_left)187 static int make_sum(sector_t current_sector, sector_t sectors_left)
188 {
189 	/* Compute checksums over all data in the buffer with expanded data.
190 	 * As side effect, possibly read in first and last checksum sectors
191 	 * and data to fill the gap between the last data sector and the last
192 	 * checksum sector.
193 	 */
194 	sector_t sector_in_group, group_left;
195 	size_t size, gap;
196 	char *extp;
197 	int r;
198 
199 	/* See the description of the extended buffer in transfer(). A number
200 	 * of points are relevant for this function in particular:
201 	 *
202 	 * 1) If the "xx" head of the buffer does not cover an entire group,
203 	 *    we need to copy in the first checksum sector so that we can
204 	 *    modify it.
205 	 * 2) We can generate checksums for the full "yyyyy" groups without
206 	 *    copying in the corresponding checksum sectors first, because
207 	 *    those sectors will be overwritten entirely anyway.
208 	 * 3) We copy in not only the checksum sector for the group containing
209 	 *    the "zzz" tail data, but also all the data between "zzz" and the
210 	 *    last checksum sector. This allows us to write all the data in
211 	 *    the buffer in one operation. In theory, we could verify the
212 	 *    checksum of the data in this gap for extra early failure
213 	 *    detection, but we currently do not do this.
214 	 *
215 	 * If points 1 and 3 cover the same group (implying a small, unaligned
216 	 * write operation), the read operation is done only once. Whether
217 	 * point 1 or 3 is skipped depends on whether there is a gap before
218 	 * the checksum sector.
219 	 */
220 
221 	sector_in_group = current_sector % NR_SUM_SEC;
222 	group_left = NR_SUM_SEC - sector_in_group;
223 
224 	extp = ext_buffer;
225 
226 	/* This loop covers points 1 and 2. */
227 	while (sectors_left >= group_left) {
228 		size = group_left * SECTOR_SIZE;
229 
230 		if (sector_in_group > 0) {
231 			if ((r = read_sectors(extp + size,
232 					LOG2PHYS(current_sector) + group_left,
233 					1)) != OK)
234 				return r;
235 		}
236 		else memset(extp + size, 0, SECTOR_SIZE);
237 
238 		make_group_sum(extp, extp + size, current_sector,
239 			sector_in_group, group_left);
240 
241 		extp += size + SECTOR_SIZE;
242 
243 		sectors_left -= group_left;
244 		current_sector += group_left;
245 
246 		sector_in_group = 0;
247 		group_left = NR_SUM_SEC;
248 	}
249 
250 	/* The remaining code covers point 3. */
251 	if (sectors_left > 0) {
252 		size = sectors_left * SECTOR_SIZE;
253 
254 		if (group_left != NR_SUM_SEC - sector_in_group)
255 			panic("group_left assertion: %d", 0);
256 
257 		gap = group_left - sectors_left;
258 
259 		if (gap <= 0)
260 			panic("gap assertion: %d", 0);
261 
262 		if ((r = read_sectors(extp + size,
263 				LOG2PHYS(current_sector) + sectors_left,
264 				gap + 1)) != OK)
265 			return r;
266 
267 		make_group_sum(extp, extp + size + gap * SECTOR_SIZE,
268 			current_sector, sector_in_group, sectors_left);
269 	}
270 
271 	return OK;
272 }
273 
274 /*===========================================================================*
275  *				check_sum				     *
276  *===========================================================================*/
check_sum(sector_t current_sector,size_t bytes_left)277 static int check_sum(sector_t current_sector, size_t bytes_left)
278 {
279 	/* Check checksums of all data in the buffer with expanded data.
280 	 * Return OK if all checksums are okay, or RET_REDO upon failure.
281 	 */
282 	sector_t sector_in_group;
283 	size_t size, groupbytes_left;
284 	int count;
285 	char *extp;
286 
287 	extp = ext_buffer;
288 
289 	sector_in_group = current_sector % NR_SUM_SEC;
290 	groupbytes_left = (NR_SUM_SEC - sector_in_group) * SECTOR_SIZE;
291 
292 	while (bytes_left > 0) {
293 		size = MIN(bytes_left, groupbytes_left);
294 		count = size / SECTOR_SIZE;
295 
296 		if (check_group_sum(extp, extp + groupbytes_left,
297 				current_sector, sector_in_group, count))
298 			return RET_REDO;
299 
300 		extp += size + SECTOR_SIZE;
301 
302 		bytes_left -= MIN(size + SECTOR_SIZE, bytes_left);
303 		current_sector += count;
304 
305 		sector_in_group = 0;
306 		groupbytes_left = GROUP_SIZE;
307 	}
308 
309 	return OK;
310 }
311 
312 /*===========================================================================*
313  *				check_write				     *
314  *===========================================================================*/
check_write(u64_t pos,size_t size)315 static int check_write(u64_t pos, size_t size)
316 {
317 	/* Read back the data just written, from both disks if mirroring is
318 	 * enabled, and check the result against the original. Return OK on
319 	 * success; report the malfunctioning driver and return RET_REDO
320 	 * otherwise.
321 	 */
322 	char *rb0_buffer, *rb1_buffer;
323 	size_t orig_size;
324 	int r;
325 
326 	if (size == 0)
327 		return OK;
328 
329 	rb0_buffer = rb1_buffer =
330 		flt_malloc(size, rb0_array, SBUF_SIZE);
331 	if (USE_MIRROR)
332 		rb1_buffer = flt_malloc(size, rb1_array, SBUF_SIZE);
333 
334 	orig_size = size;
335 
336 	r = read_write(pos, rb0_buffer, rb1_buffer, &size, FLT_READ2);
337 
338 	if (r != OK) {
339 		if (USE_MIRROR) flt_free(rb1_buffer, orig_size, rb1_array);
340 		flt_free(rb0_buffer, orig_size, rb0_array);
341 
342 		return r;
343 	}
344 
345 	/* If we get a size smaller than what we requested, then we somehow
346 	 * succeeded in writing past the disk end, and now fail to read it all
347 	 * back. This is not an error, and we just compare the part that we
348 	 * did manage to read back in.
349 	 */
350 
351 	if (memcmp(ext_buffer, rb0_buffer, size)) {
352 #if DEBUG
353 		printf("Filter: readback from disk 0 failed (size %d)\n",
354 			size);
355 #endif
356 
357 		return bad_driver(DRIVER_MAIN, BD_DATA, EFAULT);
358 	}
359 
360 	if (USE_MIRROR && memcmp(ext_buffer, rb1_buffer, size)) {
361 #if DEBUG
362 		printf("Filter: readback from disk 1 failed (size %d)\n",
363 			size);
364 #endif
365 
366 		return bad_driver(DRIVER_BACKUP, BD_DATA, EFAULT);
367 	}
368 
369 	if (USE_MIRROR) flt_free(rb1_buffer, orig_size, rb1_array);
370 	flt_free(rb0_buffer, orig_size, rb0_array);
371 
372 	return OK;
373 }
374 
375 /*===========================================================================*
376  *				expand					     *
377  *===========================================================================*/
expand(sector_t first_sector,char * buffer,sector_t sectors_left)378 static void expand(sector_t first_sector, char *buffer, sector_t sectors_left)
379 {
380 	/* Expand the contiguous data in 'buffer' to interspersed format in
381 	 * 'ext_buffer'. The checksum areas are not touched.
382 	 */
383 	char *srcp, *dstp;
384 	sector_t group_left;
385 	size_t size;
386 	int count;
387 
388 	srcp = buffer;
389 	dstp = ext_buffer;
390 
391 	group_left = NR_SUM_SEC - first_sector % NR_SUM_SEC;
392 
393 	while (sectors_left > 0) {
394 		count = MIN(sectors_left, group_left);
395 		size = count * SECTOR_SIZE;
396 
397 		memcpy(dstp, srcp, size);
398 
399 		srcp += size;
400 		dstp += size + SECTOR_SIZE;
401 
402 		sectors_left -= count;
403 		group_left = NR_SUM_SEC;
404 	}
405 }
406 
407 /*===========================================================================*
408  *				collapse				     *
409  *===========================================================================*/
collapse(sector_t first_sector,char * buffer,size_t * sizep)410 static void collapse(sector_t first_sector, char *buffer, size_t *sizep)
411 {
412 	/* Collapse the interspersed data in 'ext_buffer' to contiguous format
413 	 * in 'buffer'. As side effect, adjust the given size to reflect the
414 	 * resulting contiguous data size.
415 	 */
416 	char *srcp, *dstp;
417 	size_t size, bytes_left, groupbytes_left;
418 
419 	srcp = ext_buffer;
420 	dstp = buffer;
421 
422 	bytes_left = *sizep;
423 	groupbytes_left =
424 		(NR_SUM_SEC - first_sector % NR_SUM_SEC) * SECTOR_SIZE;
425 
426 	while (bytes_left > 0) {
427 		size = MIN(bytes_left, groupbytes_left);
428 
429 		memcpy(dstp, srcp, size);
430 
431 		srcp += size + SECTOR_SIZE;
432 		dstp += size;
433 
434 		bytes_left -= MIN(size + SECTOR_SIZE, bytes_left);
435 		groupbytes_left = GROUP_SIZE;
436 	}
437 
438 	*sizep = dstp - buffer;
439 }
440 
441 /*===========================================================================*
442  *				expand_sizes				     *
443  *===========================================================================*/
expand_sizes(sector_t first_sector,sector_t nr_sectors,size_t * req_size)444 static size_t expand_sizes(sector_t first_sector, sector_t nr_sectors,
445 	size_t *req_size)
446 {
447 	/* Compute the size of the data area including interspersed checksum
448 	 * sectors (req_size) and the size of the data area including
449 	 * interspersed and trailing checksum sectors (the return value).
450 	 */
451 	sector_t last_sector, sum_sector, phys_sector;
452 
453 	last_sector = LOG2PHYS(first_sector + nr_sectors - 1);
454 
455 	sum_sector = SEC2SUM_NR(first_sector + nr_sectors - 1);
456 
457 	phys_sector = LOG2PHYS(first_sector);
458 
459 	*req_size = (last_sector - phys_sector + 1) * SECTOR_SIZE;
460 
461 	return (sum_sector - phys_sector + 1) * SECTOR_SIZE;
462 }
463 
464 /*===========================================================================*
465  *				collapse_size				     *
466  *===========================================================================*/
collapse_size(sector_t first_sector,size_t * sizep)467 static void collapse_size(sector_t first_sector, size_t *sizep)
468 {
469 	/* Compute the size of the contiguous user data written to disk, given
470 	 * the result size of the write operation with interspersed checksums.
471 	 */
472 	sector_t sector_in_group;
473 	size_t sectors_from_group_base, nr_sum_secs, nr_data_secs;
474 
475 	sector_in_group = first_sector % NR_SUM_SEC;
476 
477 	sectors_from_group_base = *sizep / SECTOR_SIZE + sector_in_group;
478 
479 	nr_sum_secs = sectors_from_group_base / (NR_SUM_SEC+1);
480 
481 	nr_data_secs = sectors_from_group_base - sector_in_group - nr_sum_secs;
482 
483 	*sizep = nr_data_secs * SECTOR_SIZE;
484 }
485 
486 /*===========================================================================*
487  *				transfer				     *
488  *===========================================================================*/
transfer(u64_t pos,char * buffer,size_t * sizep,int flag_rw)489 int transfer(u64_t pos, char *buffer, size_t *sizep, int flag_rw)
490 {
491 	/* Transfer data in interspersed-checksum format. When writing, first
492 	 * compute checksums, and read back the written data afterwards. When
493 	 * reading, check the stored checksums afterwards.
494 	 */
495 	sector_t first_sector, nr_sectors;
496 	size_t ext_size, req_size, res_size;
497 	u64_t phys_pos;
498 	int r;
499 
500 	/* If we don't use checksums or even checksum layout, simply pass on
501 	 * the request to the drivers as is.
502 	 */
503 	if (!USE_SUM_LAYOUT)
504 		return read_write(pos, buffer, buffer, sizep, flag_rw);
505 
506 	/* The extended buffer (for checksumming) essentially looks like this:
507 	 *
508 	 *  ------------------------------
509 	 *  |xx|C|yyyyy|C|yyyyy|C|zzz  |C|
510 	 *  ------------------------------
511 	 *
512 	 * In this example, "xxyyyyyyyyyyzzz" is our actual data. The data is
513 	 * split up into groups, so that each group is followed by a checksum
514 	 * sector C containing the checksums for all data sectors in that
515 	 * group. The head and tail of the actual data may cover parts of
516 	 * groups; the remaining data (nor their checksums) are not to be
517 	 * modified.
518 	 *
519 	 * The entire buffer is written or read in one operation: the
520 	 * read_write() call below. In order to write, we may first have to
521 	 * read some data; see the description in make_sum().
522 	 *
523 	 * Some points of interest here:
524 	 * - We need a buffer large enough to hold the all user and non-user
525 	 *   data, from the first "xx" to the last checksum sector. This size
526 	 *   is ext_size.
527 	 * - For writing, we need to expand the user-provided data from
528 	 *   contiguous layout to interspersed format. The size of the user
529 	 *   data after expansion is req_size.
530 	 * - For reading, we need to collapse the user-requested data from
531 	 *   interspersed to contiguous format. For writing, we still need to
532 	 *   compute the contiguous result size to return to the user.
533 	 * - In both cases, the result size may be different from the
534 	 *   requested write size, because an EOF (as in, disk end) may occur
535 	 *   and the resulting size is less than the requested size.
536 	 * - If we only follow the checksum layout, and do not do any
537 	 *   checksumming, ext_size is reduced to req_size.
538 	 */
539 
540 	first_sector = POS2SEC(pos);
541 	nr_sectors = *sizep / SECTOR_SIZE;
542 	phys_pos = SEC2POS(LOG2PHYS(first_sector));
543 
544 #if DEBUG2
545 	printf("Filter: transfer: pos 0x%lx:0x%lx -> phys_pos 0x%lx:0x%lx\n",
546 		ex64hi(pos), ex64lo(pos), ex64hi(phys_pos), ex64lo(phys_pos));
547 #endif
548 
549 	/* Compute the size for the buffer and for the user data after
550 	 * expansion.
551 	 */
552 	ext_size = expand_sizes(first_sector, nr_sectors, &req_size);
553 
554 	if (!USE_CHECKSUM)
555 		ext_size = req_size;
556 
557 	ext_buffer = flt_malloc(ext_size, ext_array, SBUF_SIZE);
558 
559 	if (flag_rw == FLT_WRITE) {
560 		expand(first_sector, buffer, nr_sectors);
561 
562 		if (USE_CHECKSUM && make_sum(first_sector, nr_sectors))
563 			return RET_REDO;
564 	}
565 
566 	/* Perform the actual I/O. */
567 	res_size = ext_size;
568 	r = read_write(phys_pos, ext_buffer, ext_buffer, &res_size, flag_rw);
569 
570 #if DEBUG2
571 	printf("Filter: transfer: read_write(%"PRIx64", %u, %d) = %d, %u\n",
572 		phys_pos, ext_size, flag_rw, r, res_size);
573 #endif
574 
575 	if (r != OK) {
576 		flt_free(ext_buffer, ext_size, ext_array);
577 
578 		return r;
579 	}
580 
581 	/* Limit the resulting size to the user data part of the buffer.
582 	 * The resulting size may already be less, due to an EOF.
583 	 */
584 	*sizep = MIN(req_size, res_size);
585 
586 	if (flag_rw == FLT_WRITE) {
587 		if (USE_CHECKSUM && check_write(phys_pos, res_size))
588 			return RET_REDO;
589 
590 		collapse_size(first_sector, sizep);
591 	}
592 	else { /* FLT_READ */
593 		if (USE_CHECKSUM && check_sum(first_sector, *sizep))
594 			return RET_REDO;
595 
596 		collapse(first_sector, buffer, sizep);
597 	}
598 
599 	flt_free(ext_buffer, ext_size, ext_array);
600 
601 	return OK;
602 }
603 
604 /*===========================================================================*
605  *				convert					     *
606  *===========================================================================*/
convert(u64_t size)607 u64_t convert(u64_t size)
608 {
609 	/* Given a raw disk size, subtract the amount of disk space used for
610 	 * checksums, resulting in the user-visible disk size.
611 	 */
612 	sector_t sectors;
613 
614 	if (!USE_SUM_LAYOUT)
615 		return size;
616 
617 	sectors = POS2SEC(size);
618 
619 	return SEC2POS(sectors / (NR_SUM_SEC + 1) * NR_SUM_SEC);
620 }
621