1 #ifdef PLAN9PORT	/* SORRY! */
2 #	include <u.h>
3 #	include <sys/types.h>
4 #	ifdef __linux__	/* REALLY SORRY! */
5 #		define CANBLOCKSIZE 1
6 #		include <sys/vfs.h>
7 #	elif defined(__FreeBSD__)
8 #		define CANBLOCKSIZE 1
9 #		include <sys/param.h>
10 #		include <sys/stat.h>
11 #		include <sys/mount.h>
12 #	endif
13 #endif
14 #include "stdinc.h"
15 #include <ctype.h>
16 #include "dat.h"
17 #include "fns.h"
18 
19 u32int	maxblocksize;
20 int	readonly;
21 
22 int findsubpart(Part *part, char *name);
23 
24 static int
strtoullsuf(char * p,char ** pp,int rad,u64int * u)25 strtoullsuf(char *p, char **pp, int rad, u64int *u)
26 {
27 	u64int v;
28 
29 	if(!isdigit((uchar)*p))
30 		return -1;
31 	v = strtoull(p, &p, rad);
32 	switch(*p){
33 	case 'k':
34 	case 'K':
35 		v *= 1024;
36 		p++;
37 		break;
38 	case 'm':
39 	case 'M':
40 		v *= 1024*1024;
41 		p++;
42 		break;
43 	case 'g':
44 	case 'G':
45 		v *= 1024*1024*1024;
46 		p++;
47 		break;
48 	case 't':
49 	case 'T':
50 		v *= 1024*1024;
51 		v *= 1024*1024;
52 		p++;
53 		break;
54 	}
55 	*pp = p;
56 	*u = v;
57 	return 0;
58 }
59 
60 static int
parsepart(char * name,char ** file,char ** subpart,u64int * lo,u64int * hi)61 parsepart(char *name, char **file, char **subpart, u64int *lo, u64int *hi)
62 {
63 	char *p;
64 
65 	*file = estrdup(name);
66 	*lo = 0;
67 	*hi = 0;
68 	*subpart = nil;
69 	if((p = strrchr(*file, ':')) == nil)
70 		return 0;
71 	*p++ = 0;
72 	if(isalpha(*p)){
73 		*subpart = p;
74 		return 0;
75 	}
76 	if(*p == '-')
77 		*lo = 0;
78 	else{
79 		if(strtoullsuf(p, &p, 0, lo) < 0){
80 			free(*file);
81 			return -1;
82 		}
83 	}
84 	if(*p == '-')
85 		p++;
86 	if(*p == 0){
87 		*hi = 0;
88 		return 0;
89 	}
90 	if(strtoullsuf(p, &p, 0, hi) < 0 || *p != 0){
91 		free(*file);
92 		return -1;
93 	}
94 	return 0;
95 }
96 
97 #undef min
98 #define min(a, b) ((a) < (b) ? (a) : (b))
99 Part*
initpart(char * name,int mode)100 initpart(char *name, int mode)
101 {
102 	Part *part;
103 	Dir *dir;
104 	char *file, *subname;
105 	u64int lo, hi;
106 
107 	if(parsepart(name, &file, &subname, &lo, &hi) < 0){
108 		werrstr("cannot parse name %s", name);
109 		return nil;
110 	}
111 	trace(TraceDisk, "initpart %s file %s lo 0x%llx hi 0x%llx", name, file, lo, hi);
112 	part = MKZ(Part);
113 	part->name = estrdup(name);
114 	part->filename = estrdup(file);
115 	if(readonly){
116 		mode &= ~(OREAD|OWRITE|ORDWR);
117 		mode |= OREAD;
118 	}
119 #ifdef __linux__	/* sorry, but linus made O_DIRECT unusable! */
120 	mode &= ~ODIRECT;
121 #endif
122 	part->fd = open(file, mode);
123 	if(part->fd < 0){
124 		if((mode&(OREAD|OWRITE|ORDWR)) == ORDWR)
125 			part->fd = open(file, (mode&~ORDWR)|OREAD);
126 		if(part->fd < 0){
127 			freepart(part);
128 			fprint(2, "can't open partition='%s': %r\n", file);
129 			seterr(EOk, "can't open partition='%s': %r", file);
130 			fprint(2, "%r\n");
131 			free(file);
132 			return nil;
133 		}
134 		fprint(2, "warning: %s opened for reading only\n", name);
135 	}
136 	part->offset = lo;
137 	dir = dirfstat(part->fd);
138 	if(dir == nil){
139 		freepart(part);
140 		seterr(EOk, "can't stat partition='%s': %r", file);
141 		free(file);
142 		return nil;
143 	}
144 	if(dir->length == 0){
145 		free(dir);
146 		dir = dirstat(file);
147 		if(dir == nil || dir->length == 0) {
148 			freepart(part);
149 			seterr(EOk, "can't determine size of partition %s", file);
150 			free(file);
151 			return nil;
152 		}
153 	}
154 	if(dir->length < hi || dir->length < lo){
155 		freepart(part);
156 		seterr(EOk, "partition '%s': bounds out of range (max %lld)", name, dir->length);
157 		free(dir);
158 		free(file);
159 		return nil;
160 	}
161 	if(hi == 0)
162 		hi = dir->length;
163 	part->size = hi - part->offset;
164 #ifdef CANBLOCKSIZE
165 	{
166 		struct statfs sfs;
167 		if(fstatfs(part->fd, &sfs) >= 0 && sfs.f_bsize > 512)
168 			part->fsblocksize = sfs.f_bsize;
169 	}
170 #endif
171 
172 	part->fsblocksize = min(part->fsblocksize, MaxIo);
173 
174 	if(subname && findsubpart(part, subname) < 0){
175 		werrstr("cannot find subpartition %s", subname);
176 		freepart(part);
177 		return nil;
178 	}
179 	free(dir);
180 	return part;
181 }
182 
183 int
flushpart(Part * part)184 flushpart(Part *part)
185 {
186 	USED(part);
187 #ifdef __linux__	/* grrr! */
188 	if(fsync(part->fd) < 0){
189 		logerr(EAdmin, "flushpart %s: %r", part->name);
190 		return -1;
191 	}
192 	posix_fadvise(part->fd, 0, 0, POSIX_FADV_DONTNEED);
193 #endif
194 	return 0;
195 }
196 
197 void
freepart(Part * part)198 freepart(Part *part)
199 {
200 	if(part == nil)
201 		return;
202 	if(part->fd >= 0)
203 		close(part->fd);
204 	free(part->name);
205 	free(part);
206 }
207 
208 void
partblocksize(Part * part,u32int blocksize)209 partblocksize(Part *part, u32int blocksize)
210 {
211 	if(part->blocksize)
212 		sysfatal("resetting partition=%s's block size", part->name);
213 	part->blocksize = blocksize;
214 	if(blocksize > maxblocksize)
215 		maxblocksize = blocksize;
216 }
217 
218 /*
219  * Read/write some amount of data between a block device or file and a memory buffer.
220  *
221  * Most Unix systems require that when accessing a block device directly,
222  * the buffer, offset, and count are all multiples of the device block size,
223  * making this a lot more complicated than it otherwise would be.
224  *
225  * Most of our callers will make things easy on us, but for some callers it's best
226  * if we just do the work here, with only one place to get it right (hopefully).
227  *
228  * If everything is aligned properly, prwb will try to do big transfers in the main
229  * body of the loop: up to MaxIo bytes at a time.  If everything isn't aligned properly,
230  * we work one block at a time.
231  */
232 int
prwb(char * name,int fd,int isread,u64int offset,void * vbuf,u32int count,u32int blocksize)233 prwb(char *name, int fd, int isread, u64int offset, void *vbuf, u32int count, u32int blocksize)
234 {
235 	char *op;
236 	u8int *buf, *freetmp, *dst;
237 	u32int icount, opsize;
238 	int r, count1;
239 
240 
241 #ifndef PLAN9PORT
242 	USED(blocksize);
243 	icount = count;
244 	buf = vbuf;
245 	op = isread ? "read" : "write";
246 	dst = buf;
247 	freetmp = nil;
248 	while(count > 0){
249 		opsize = min(count, 131072 /* blocksize */);
250 		if(isread)
251 			r = pread(fd, dst, opsize, offset);
252 		else
253 			r = pwrite(fd, dst, opsize, offset);
254 		if(r <= 0)
255 			goto Error;
256 		offset += r;
257 		count -= r;
258 		dst += r;
259 		if(r != opsize)
260 			goto Error;
261 	}
262 	return icount;
263 #else
264 	u32int c, delta;
265 	u8int *tmp;
266 
267 	icount = count;
268 	buf = vbuf;
269 	tmp = nil;
270 	freetmp = nil;
271 	opsize = blocksize;
272 
273 	if(count == 0){
274 		logerr(EStrange, "pwrb %s called to %s 0 bytes", name, isread ? "read" : "write");
275 		return 0;
276 	}
277 
278 	assert(blocksize > 0);
279 
280 	/* allocate blocksize-aligned temp buffer if needed */
281 	if((ulong)offset%blocksize || (ulong)buf%blocksize || count%blocksize){
282 		if((freetmp = malloc(blocksize*2)) == nil)
283 			return -1;
284 		tmp = freetmp;
285 		tmp += blocksize - (ulong)tmp%blocksize;
286 	}
287 
288 	/* handle beginning fringe */
289 	if((delta = (ulong)offset%blocksize) != 0){
290 		assert(tmp != nil);
291 		if((r=pread(fd, tmp, blocksize, offset-delta)) != blocksize){
292 			dst = tmp;
293 			offset = offset-delta;
294 			op = "read";
295 			count1 = blocksize;
296 			goto Error;
297 		}
298 		c = min(count, blocksize-delta);
299 		assert(c > 0 && c < blocksize);
300 		if(isread)
301 			memmove(buf, tmp+delta, c);
302 		else{
303 			memmove(tmp+delta, buf, c);
304 			if((r=pwrite(fd, tmp, blocksize, offset-delta)) != blocksize){
305 				dst = tmp;
306 				offset = offset-delta;
307 				op = "read";
308 				count1 = blocksize;
309 				goto Error;
310 			}
311 		}
312 		assert(c > 0);
313 		offset += c;
314 		buf += c;
315 		count -= c;
316 	}
317 
318 	/* handle full blocks */
319 	while(count >= blocksize){
320 		assert((ulong)offset%blocksize == 0);
321 		if((ulong)buf%blocksize){
322 			assert(tmp != nil);
323 			dst = tmp;
324 			opsize = blocksize;
325 		}else{
326 			dst = buf;
327 			opsize = count - count%blocksize;
328 			if(opsize > MaxIo)
329 				opsize = MaxIo;
330 		}
331 		if(isread){
332 			if((r=pread(fd, dst, opsize, offset))<=0 || r%blocksize){
333 				op = "read";
334 				count1 = opsize;
335 				goto Error;
336 			}
337 			if(dst == tmp){
338 				assert(r == blocksize);
339 				memmove(buf, tmp, blocksize);
340 			}
341 		}else{
342 			if(dst == tmp){
343 				assert(opsize == blocksize);
344 				memmove(dst, buf, blocksize);
345 			}
346 			if((r=pwrite(fd, dst, opsize, offset))<=0 || r%blocksize){
347 				count1 = opsize;
348 				op = "write";
349 				goto Error;
350 			}
351 			if(dst == tmp)
352 				assert(r == blocksize);
353 		}
354 		assert(r > 0);
355 		offset += r;
356 		buf += r;
357 		count -= r;
358 	}
359 
360 	/* handle ending fringe */
361 	if(count > 0){
362 		assert((ulong)offset%blocksize == 0);
363 		assert(tmp != nil);
364 		/*
365 		 * Complicated condition: if we're reading it's okay to get less than
366 		 * a block as long as it's enough to satisfy the read - maybe this is
367 		 * a normal file.  (We never write to normal files, or else things would
368 		 * be even more complicated.)
369 		 */
370 		r = pread(fd, tmp, blocksize, offset);
371 		if((isread && r < count) || (!isread && r != blocksize)){
372 print("FAILED isread=%d r=%d count=%d blocksize=%d\n", isread, r, count, blocksize);
373 			dst = tmp;
374 			op = "read";
375 			count1 = blocksize;
376 			goto Error;
377 		}
378 		if(isread)
379 			memmove(buf, tmp, count);
380 		else{
381 			memmove(tmp, buf, count);
382 			if(pwrite(fd, tmp, blocksize, offset) != blocksize){
383 				dst = tmp;
384 				count1 = blocksize;
385 				op = "write";
386 				goto Error;
387 			}
388 		}
389 	}
390 	if(freetmp)
391 		free(freetmp);
392 	return icount;
393 #endif
394 
395 Error:
396 	seterr(EAdmin, "%s %s offset 0x%llux count %ud buf %p returned %d: %r",
397 		op, name, offset, count1, dst, r);
398 	if(freetmp)
399 		free(freetmp);
400 	return -1;
401 }
402 
403 #ifndef PLAN9PORT
404 static int sdreset(Part*);
405 static int reopen(Part*);
406 static int threadspawnl(int[3], char*, char*, ...);
407 #endif
408 
409 int
rwpart(Part * part,int isread,u64int offset,u8int * buf,u32int count)410 rwpart(Part *part, int isread, u64int offset, u8int *buf, u32int count)
411 {
412 	int n, try;
413 	u32int blocksize;
414 
415 	trace(TraceDisk, "%s %s %ud at 0x%llx",
416 		isread ? "read" : "write", part->name, count, offset);
417 	if(offset >= part->size || offset+count > part->size){
418 		seterr(EStrange, "out of bounds %s offset 0x%llux count %ud to partition %s size 0x%llux",
419 			isread ? "read" : "write", offset, count, part->name, part->size);
420 		return -1;
421 	}
422 
423 	blocksize = part->fsblocksize;
424 	if(blocksize == 0)
425 		blocksize = part->blocksize;
426 	if(blocksize == 0)
427 		blocksize = 4096;
428 
429 	for(try=0;; try++){
430 		n = prwb(part->filename, part->fd, isread, part->offset+offset, buf, count, blocksize);
431 		if(n >= 0 || try > 10)
432 			break;
433 
434 #ifndef PLAN9PORT
435 	    {
436 		char err[ERRMAX];
437 		/*
438 		 * This happens with the sdmv disks frustratingly often.
439 		 * Try to fix things up and continue.
440 		 */
441 		rerrstr(err, sizeof err);
442 		if(strstr(err, "i/o timeout") || strstr(err, "i/o error") || strstr(err, "partition has changed")){
443 			reopen(part);
444 			continue;
445 		}
446 	    }
447 #endif
448 		break;
449 	}
450 #ifdef __linux__	/* sigh */
451 	posix_fadvise(part->fd, part->offset+offset, n, POSIX_FADV_DONTNEED);
452 #endif
453 	return n;
454 }
455 int
readpart(Part * part,u64int offset,u8int * buf,u32int count)456 readpart(Part *part, u64int offset, u8int *buf, u32int count)
457 {
458 	return rwpart(part, 1, offset, buf, count);
459 }
460 
461 int
writepart(Part * part,u64int offset,u8int * buf,u32int count)462 writepart(Part *part, u64int offset, u8int *buf, u32int count)
463 {
464 	return rwpart(part, 0, offset, buf, count);
465 }
466 
467 ZBlock*
readfile(char * name)468 readfile(char *name)
469 {
470 	Part *p;
471 	ZBlock *b;
472 
473 	p = initpart(name, OREAD);
474 	if(p == nil)
475 		return nil;
476 	b = alloczblock(p->size, 0, p->blocksize);
477 	if(b == nil){
478 		seterr(EOk, "can't alloc %s: %r", name);
479 		freepart(p);
480 		return nil;
481 	}
482 	if(readpart(p, 0, b->data, p->size) < 0){
483 		seterr(EOk, "can't read %s: %r", name);
484 		freepart(p);
485 		freezblock(b);
486 		return nil;
487 	}
488 	freepart(p);
489 	return b;
490 }
491 
492 /*
493  * Search for the Plan 9 partition with the given name.
494  * This lets you write things like /dev/ad4:arenas
495  * if you move a disk from a Plan 9 system to a FreeBSD system.
496  *
497  * God I hope I never write this code again.
498  */
499 #define MAGIC "plan9 partitions"
500 static int
tryplan9part(Part * part,char * name)501 tryplan9part(Part *part, char *name)
502 {
503 	uchar buf[512];
504 	char *line[40], *f[4];
505 	int i, n;
506 	vlong start, end;
507 
508 	/*
509 	 * Partition table in second sector.
510 	 * Could also look on 2nd last sector and last sector,
511 	 * but those disks died out long before venti came along.
512 	 */
513 	if(readpart(part, 512, buf, 512) != 512)
514 		return -1;
515 
516 	/* Plan 9 partition table is just text strings */
517 	if(strncmp((char*)buf, "part ", 5) != 0)
518 		return -1;
519 
520 	buf[511] = 0;
521 	n = getfields((char*)buf, line, 40, 1, "\n");
522 	for(i=0; i<n; i++){
523 		if(getfields(line[i], f, 4, 1, " ") != 4)
524 			break;
525 		if(strcmp(f[0], "part") != 0)
526 			break;
527 		if(strcmp(f[1], name) == 0){
528 			start = 512*strtoll(f[2], 0, 0);
529 			end = 512*strtoll(f[3], 0, 0);
530 			if(start  < end && end <= part->size){
531 				part->offset += start;
532 				part->size = end - start;
533 				return 0;
534 			}
535 			return -1;
536 		}
537 	}
538 	return -1;
539 }
540 
541 #define	GSHORT(p)	(((p)[1]<<8)|(p)[0])
542 #define	GLONG(p)	((GSHORT(p+2)<<16)|GSHORT(p))
543 
544 typedef struct Dospart Dospart;
545 struct Dospart
546 {
547 	uchar flag;		/* active flag */
548 	uchar shead;		/* starting head */
549 	uchar scs[2];		/* starting cylinder/sector */
550 	uchar type;		/* partition type */
551 	uchar ehead;		/* ending head */
552 	uchar ecs[2];		/* ending cylinder/sector */
553 	uchar offset[4];		/* starting sector */
554 	uchar size[4];		/* length in sectors */
555 };
556 
557 
558 int
findsubpart(Part * part,char * name)559 findsubpart(Part *part, char *name)
560 {
561 	int i;
562 	uchar buf[512];
563 	u64int size;
564 	Dospart *dp;
565 
566 	/* See if this is a Plan 9 partition. */
567 	if(tryplan9part(part, name) >= 0)
568 		return 0;
569 
570 	/* Otherwise try for an MBR and then narrow to Plan 9 partition. */
571 	if(readpart(part, 0, buf, 512) != 512)
572 		return -1;
573 	if(buf[0x1FE] != 0x55 || buf[0x1FF] != 0xAA)
574 		return -1;
575 	dp = (Dospart*)(buf+0x1BE);
576 	size = part->size;
577 	for(i=0; i<4; i++){
578 		if(dp[i].type == '9'){
579 			part->offset = 512LL*GLONG(dp[i].offset);
580 			part->size = 512LL*GLONG(dp[i].size);
581 			if(tryplan9part(part, name) >= 0)
582 				return 0;
583 			part->offset = 0;
584 			part->size = size;
585 		}
586 		/* Not implementing extended partitions - enough is enough. */
587 	}
588 	return -1;
589 }
590