1 /*
2  * Copyright (C) 2016 Jakub Kruszona-Zawadzki, Core Technology Sp. z o.o.
3  *
4  * This file is part of MooseFS.
5  *
6  * MooseFS is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, version 2 (only).
9  *
10  * MooseFS is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with MooseFS; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301, USA
18  * or visit http://www.gnu.org/licenses/gpl-2.0.html
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #define MMAP_ALLOC 1
26 
27 // #include <execinfo.h> // for backtrace - debugs only
28 #include <inttypes.h>
29 #include <syslog.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <string.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/statvfs.h>
38 #include <sys/time.h>
39 #include <sys/resource.h>
40 #include <time.h>
41 #include <dirent.h>
42 #include <errno.h>
43 #include <limits.h>
44 #include <math.h>
45 #include <pthread.h>
46 #ifdef MMAP_ALLOC
47 #include <sys/mman.h>
48 #endif
49 
50 #include "MFSCommunication.h"
51 #include "cfg.h"
52 #include "datapack.h"
53 #include "crc.h"
54 #include "main.h"
55 #include "masterconn.h"
56 #include "slogger.h"
57 #include "massert.h"
58 #include "random.h"
59 #include "clocks.h"
60 #include "portable.h"
61 #include "sockets.h"
62 
63 #define PRESERVE_BLOCK 1
64 
65 #if defined(HAVE_PREAD) && defined(HAVE_PWRITE)
66 #define USE_PIO 1
67 #endif
68 
69 /* usec's to wait after last rebalance before choosing disk for new chunk */
70 #define REBALANCE_GRACE_PERIOD 10000000
71 
72 #define REBALANCE_TOTAL_MIN 1000000000
73 #define REBALANCE_DST_MAX_USAGE 0.99
74 #define REBALANCE_DIFF_MAX 0.01
75 
76 /* system every DELAYEDSTEP seconds searches opened/crc_loaded chunk list for chunks to be closed/free crc */
77 #define DELAYEDSTEP 1
78 
79 #define OPEN_DELAY 5
80 #define CRC_DELAY 100
81 
82 #ifdef PRESERVE_BLOCK
83 #define BLOCK_DELAY 10
84 #endif
85 
86 #define LOSTCHUNKSBLOCKSIZE 1024
87 #define NEWCHUNKSBLOCKSIZE 4096
88 
89 #define CHUNKHDRSIZE (1024+4*1024)
90 #define CHUNKHDRCRC 1024
91 
92 #define STATSHISTORY (24*60)
93 
94 #define LASTERRSIZE 30
95 
96 #define RANDOM_CHUNK_RETRIES 50
97 
98 #define HASHSIZE (0x1000000)
99 #define HASHPOS(chunkid) ((chunkid)&0xFFFFFF)
100 
101 #define DHASHSIZE 64
102 #define DHASHPOS(chunkid) ((chunkid)&0x3F)
103 
104 #define CH_NEW_NONE 0
105 #define CH_NEW_AUTO 1
106 #define CH_NEW_EXCLUSIVE 2
107 
108 #define CHUNKLOCKED ((void*)1)
109 
110 typedef struct damagedchunk {
111 	uint64_t chunkid;
112 	struct damagedchunk *next;
113 } damagedchunk;
114 
115 typedef struct lostchunk {
116 	uint64_t chunkidblock[LOSTCHUNKSBLOCKSIZE];
117 	uint32_t chunksinblock;
118 	struct lostchunk *next;
119 } lostchunk;
120 
121 typedef struct newchunk {
122 	uint64_t chunkidblock[NEWCHUNKSBLOCKSIZE];
123 	uint32_t versionblock[NEWCHUNKSBLOCKSIZE];
124 	uint32_t chunksinblock;
125 	struct newchunk *next;
126 } newchunk;
127 
128 typedef struct dopchunk {
129 	uint64_t chunkid;
130 	struct dopchunk *next;
131 } dopchunk;
132 
133 struct folder;
134 
135 typedef struct ioerror {
136 	uint64_t chunkid;
137 	uint32_t timestamp;
138 	double monotonic_time;
139 	int errornumber;
140 } ioerror;
141 
142 typedef struct _cntcond {
143 	pthread_cond_t cond;
144 	uint32_t wcnt;
145 	struct _cntcond *next;
146 } cntcond;
147 
148 typedef struct chunk {
149 	char *filename;
150 	uint64_t chunkid;
151 	struct folder *owner;
152 	uint32_t ownerindx;
153 	uint32_t version;
154 	uint16_t blocks;
155 	uint16_t crcrefcount;
156 	double opento;
157 	double crcto;
158 	unsigned crcchanged:1;
159 	unsigned fsyncneeded:1;
160 #define CH_AVAIL 0
161 #define CH_LOCKED 1
162 #define CH_DELETED 2
163 #define CH_TOBEDELETED 3
164 	unsigned state:2;	// CH_AVAIL,CH_LOCKED,CH_DELETED
165 	cntcond *ccond;
166 	uint8_t *crc;
167 	int fd;
168 
169 #ifdef PRESERVE_BLOCK
170 	double blockto;
171 	uint8_t *block;
172 	uint16_t blockno;	// 0xFFFF == invalid
173 #endif
174 	uint8_t validattr;
175 	uint8_t todel;
176 //	uint32_t testtime;	// at start use max(atime,mtime) then every operation set it to current time
177 	struct chunk *testnext,**testprev;
178 	struct chunk *next;
179 } chunk;
180 
181 typedef struct hddstats {
182 	uint64_t rbytes;
183 	uint64_t wbytes;
184 	uint64_t nsecreadsum;
185 	uint64_t nsecwritesum;
186 	uint64_t nsecfsyncsum;
187 	uint32_t rops;
188 	uint32_t wops;
189 	uint32_t fsyncops;
190 	uint32_t nsecreadmax;
191 	uint32_t nsecwritemax;
192 	uint32_t nsecfsyncmax;
193 } hddstats;
194 
195 typedef struct folder {
196 	char *path;
197 #define SCST_SCANNEEDED 0
198 #define SCST_SCANINPROGRESS 1
199 #define SCST_SCANTERMINATE 2
200 #define SCST_SCANFINISHED 3
201 #define SCST_SENDNEEDED 4
202 #define SCST_WORKING 5
203 	unsigned int scanstate:3;
204 	unsigned int needrefresh:1;
205 	unsigned int todel:2;
206 #define REBALANCE_STD 0
207 #define REBALANCE_FORCE_SRC 1
208 #define REBALANCE_FORCE_DST 2
209 	unsigned int balancemode:2;
210 	unsigned int damaged:1;
211 	unsigned int toremove:2;
212 #define REBALANCE_NONE 0
213 #define REBALANCE_SRC 1
214 #define REBALANCE_DST 2
215 	unsigned int tmpbalancemode:2;
216 	uint8_t scanprogress;
217 	uint64_t sizelimit;
218 	uint64_t leavefree;
219 	uint64_t avail;
220 	uint64_t total;
221 	hddstats cstat;
222 	hddstats stats[STATSHISTORY];
223 	uint32_t statspos;
224 	ioerror lasterrtab[LASTERRSIZE];
225 	struct chunk **chunktab;
226 	uint32_t chunkcount;
227 	uint32_t chunktabsize;
228 	uint32_t lasterrindx;
229 	double lastrefresh;
230 	dev_t devid;
231 	ino_t lockinode;
232 	int lfd;
233 	double read_corr;
234 	double write_corr;
235 	uint32_t read_dist;
236 	uint32_t write_dist;
237 	uint8_t read_first;
238 	uint8_t write_first;
239 	uint8_t rebalance_in_progress;
240 	uint64_t rebalance_last_usec;
241 //	double carry;
242 	pthread_t scanthread;
243 	struct chunk *testhead,**testtail;
244 	struct folder *next;
245 } folder;
246 
247 /*
248 typedef struct damaged {
249 	char *path;
250 	uint64_t avail;
251 	uint64_t total;
252 	ioerror lasterror;
253 	uint32_t chunkcount;
254 	struct damaged_disk *next;
255 } damaged;
256 */
257 
258 static uint32_t HDDTestFreq = 10;
259 static uint32_t HDDRebalancePerc = 20;
260 static uint32_t HDDErrorCount = 2;
261 static uint32_t HDDErrorTime = 600;
262 static uint64_t LeaveFree;
263 static uint8_t DoFsyncBeforeClose = 0;
264 
265 /* folders data */
266 static folder *folderhead = NULL;
267 
268 /* chunk hash */
269 static chunk* hashtab[HASHSIZE];
270 
271 /* extra chunk info */
272 static dopchunk *dophashtab[DHASHSIZE];
273 //static dopchunk *dopchunks = NULL;
274 static dopchunk *newdopchunks = NULL;
275 
276 // master reports
277 static damagedchunk *damagedchunks = NULL;
278 static lostchunk *lostchunks = NULL;
279 static newchunk *newchunks = NULL;
280 static uint32_t errorcounter = 0;
281 static int hddspacechanged = 0;
282 
283 static pthread_t rebalancethread,foldersthread,delayedthread,testerthread;
284 static uint8_t term = 0;
285 static uint8_t folderactions = 0;
286 static uint8_t testerreset = 0;
287 static pthread_mutex_t termlock = PTHREAD_MUTEX_INITIALIZER;
288 
289 // stats_X
290 static pthread_mutex_t statslock = PTHREAD_MUTEX_INITIALIZER;
291 
292 // newdopchunks + dophashtab
293 static pthread_mutex_t doplock = PTHREAD_MUTEX_INITIALIZER;
294 static pthread_mutex_t ndoplock = PTHREAD_MUTEX_INITIALIZER;
295 
296 // master reports = damaged chunks, lost chunks, errorcounter, hddspacechanged
297 static pthread_mutex_t dclock = PTHREAD_MUTEX_INITIALIZER;
298 
299 // hashtab - only hash tab, chunks have their own separate locks
300 static pthread_mutex_t hashlock = PTHREAD_MUTEX_INITIALIZER;
301 static cntcond *cclist = NULL;
302 
303 // folderhead + all data in structures
304 static pthread_mutex_t folderlock = PTHREAD_MUTEX_INITIALIZER;
305 
306 // chunk tester
307 static pthread_mutex_t testlock = PTHREAD_MUTEX_INITIALIZER;
308 
309 #ifndef PRESERVE_BLOCK
310 static pthread_key_t hdrbufferkey;
311 static pthread_key_t blockbufferkey;
312 #endif
313 
314 /*
315 static uint8_t wait_for_scan = 0;
316 static uint32_t scanprogress;
317 static uint8_t scanprogresswaiting;
318 static pthread_cond_t scanprogresscond = PTHREAD_COND_INITIALIZER;
319 */
320 
321 static uint32_t emptyblockcrc;
322 
323 static uint64_t stats_bytesr = 0;
324 static uint64_t stats_bytesw = 0;
325 static uint32_t stats_opr = 0;
326 static uint32_t stats_opw = 0;
327 static uint32_t stats_databytesr = 0;
328 static uint32_t stats_databytesw = 0;
329 static uint32_t stats_dataopr = 0;
330 static uint32_t stats_dataopw = 0;
331 static uint64_t stats_rtime = 0;
332 static uint64_t stats_wtime = 0;
333 
334 static uint32_t stats_create = 0;
335 static uint32_t stats_delete = 0;
336 static uint32_t stats_test = 0;
337 static uint32_t stats_version = 0;
338 static uint32_t stats_duplicate = 0;
339 static uint32_t stats_truncate = 0;
340 static uint32_t stats_duptrunc = 0;
341 
hdd_stats_clear(hddstats * r)342 static inline void hdd_stats_clear(hddstats *r) {
343 	memset(r,0,sizeof(hddstats));
344 }
345 
hdd_stats_add(hddstats * dst,hddstats * src)346 static inline void hdd_stats_add(hddstats *dst,hddstats *src) {
347 	dst->rbytes += src->rbytes;
348 	dst->wbytes += src->wbytes;
349 	dst->nsecreadsum += src->nsecreadsum;
350 	dst->nsecwritesum += src->nsecwritesum;
351 	dst->nsecfsyncsum += src->nsecfsyncsum;
352 	dst->rops += src->rops;
353 	dst->wops += src->wops;
354 	dst->fsyncops += src->fsyncops;
355 	if (src->nsecreadmax>dst->nsecreadmax) {
356 		dst->nsecreadmax = src->nsecreadmax;
357 	}
358 	if (src->nsecwritemax>dst->nsecwritemax) {
359 		dst->nsecwritemax = src->nsecwritemax;
360 	}
361 	if (src->nsecfsyncmax>dst->nsecfsyncmax) {
362 		dst->nsecfsyncmax = src->nsecfsyncmax;
363 	}
364 }
365 
366 /* size: 64 */
hdd_stats_binary_pack(uint8_t ** buff,hddstats * r)367 static inline void hdd_stats_binary_pack(uint8_t **buff,hddstats *r) {
368 	put64bit(buff,r->rbytes);
369 	put64bit(buff,r->wbytes);
370 	put64bit(buff,r->nsecreadsum/1000);
371 	put64bit(buff,r->nsecwritesum/1000);
372 	put64bit(buff,r->nsecfsyncsum/1000);
373 	put32bit(buff,r->rops);
374 	put32bit(buff,r->wops);
375 	put32bit(buff,r->fsyncops);
376 	put32bit(buff,r->nsecreadmax/1000);
377 	put32bit(buff,r->nsecwritemax/1000);
378 	put32bit(buff,r->nsecfsyncmax/1000);
379 }
380 
381 /*
382 void printbacktrace(void) {
383 	void* callstack[128];
384 	int i, frames = backtrace(callstack, 128);
385 	char** strs = backtrace_symbols(callstack, frames);
386 	for (i=0 ; i<frames ; ++i) {
387 		printf("%s\n", strs[i]);
388 	}
389 	free(strs);
390 }
391 */
hdd_report_damaged_chunk(uint64_t chunkid)392 void hdd_report_damaged_chunk(uint64_t chunkid) {
393 	damagedchunk *dc;
394 	zassert(pthread_mutex_lock(&dclock));
395 	dc = malloc(sizeof(damagedchunk));
396 	passert(dc);
397 	dc->chunkid = chunkid;
398 	dc->next = damagedchunks;
399 	damagedchunks = dc;
400 	zassert(pthread_mutex_unlock(&dclock));
401 }
402 
hdd_get_damaged_chunk_count(void)403 uint32_t hdd_get_damaged_chunk_count(void) {
404 	damagedchunk *dc;
405 	uint32_t result;
406 	zassert(pthread_mutex_lock(&dclock));
407 	result = 0;
408 	for (dc=damagedchunks ; dc ; dc=dc->next) {
409 		result++;
410 	}
411 	return result;
412 }
413 
hdd_get_damaged_chunk_data(uint8_t * buff)414 void hdd_get_damaged_chunk_data(uint8_t *buff) {
415 	damagedchunk *dc,*ndc;
416 	uint64_t chunkid;
417 	if (buff) {
418 		dc = damagedchunks;
419 		while (dc) {
420 			ndc = dc;
421 			dc = dc->next;
422 			chunkid = ndc->chunkid;
423 			put64bit(&buff,chunkid);
424 			free(ndc);
425 		}
426 		damagedchunks = NULL;
427 	}
428 	zassert(pthread_mutex_unlock(&dclock));
429 }
430 
hdd_report_lost_chunk(uint64_t chunkid)431 void hdd_report_lost_chunk(uint64_t chunkid) {
432 	lostchunk *lc;
433 	zassert(pthread_mutex_lock(&dclock));
434 	if (lostchunks && lostchunks->chunksinblock<LOSTCHUNKSBLOCKSIZE) {
435 		lostchunks->chunkidblock[lostchunks->chunksinblock++] = chunkid;
436 	} else {
437 		lc = malloc(sizeof(lostchunk));
438 		passert(lc);
439 		lc->chunkidblock[0] = chunkid;
440 		lc->chunksinblock = 1;
441 		lc->next = lostchunks;
442 		lostchunks = lc;
443 	}
444 	zassert(pthread_mutex_unlock(&dclock));
445 }
446 
hdd_get_lost_chunk_count(uint32_t limit)447 uint32_t hdd_get_lost_chunk_count(uint32_t limit) {
448 	lostchunk *lc;
449 	uint32_t result;
450 	zassert(pthread_mutex_lock(&dclock));
451 	result = 0;
452 	for (lc=lostchunks ; lc ; lc=lc->next) {
453 		if (limit>lc->chunksinblock) {
454 			limit -= lc->chunksinblock;
455 			result += lc->chunksinblock;
456 		}
457 	}
458 	return result;
459 }
460 
hdd_get_lost_chunk_data(uint8_t * buff,uint32_t limit)461 void hdd_get_lost_chunk_data(uint8_t *buff,uint32_t limit) {
462 	lostchunk *lc,**lcptr;
463 	uint64_t chunkid;
464 	uint32_t i;
465 	if (buff) {
466 		lcptr = &lostchunks;
467 		while ((lc=*lcptr)) {
468 			if (limit>lc->chunksinblock) {
469 				for (i=0 ; i<lc->chunksinblock ; i++) {
470 					chunkid = lc->chunkidblock[i];
471 					put64bit(&buff,chunkid);
472 				}
473 				limit -= lc->chunksinblock;
474 				*lcptr = lc->next;
475 				free(lc);
476 			} else {
477 				lcptr = &(lc->next);
478 			}
479 		}
480 	}
481 	zassert(pthread_mutex_unlock(&dclock));
482 }
483 
hdd_report_new_chunk(uint64_t chunkid,uint32_t version)484 void hdd_report_new_chunk(uint64_t chunkid,uint32_t version) {
485 	newchunk *nc;
486 	zassert(pthread_mutex_lock(&dclock));
487 	if (newchunks && newchunks->chunksinblock<NEWCHUNKSBLOCKSIZE) {
488 		newchunks->chunkidblock[newchunks->chunksinblock] = chunkid;
489 		newchunks->versionblock[newchunks->chunksinblock] = version;
490 		newchunks->chunksinblock++;
491 	} else {
492 		nc = malloc(sizeof(newchunk));
493 		passert(nc);
494 		nc->chunkidblock[0] = chunkid;
495 		nc->versionblock[0] = version;
496 		nc->chunksinblock = 1;
497 		nc->next = newchunks;
498 		newchunks = nc;
499 	}
500 	zassert(pthread_mutex_unlock(&dclock));
501 }
502 
hdd_get_new_chunk_count(uint32_t limit)503 uint32_t hdd_get_new_chunk_count(uint32_t limit) {
504 	newchunk *nc;
505 	uint32_t result;
506 	zassert(pthread_mutex_lock(&dclock));
507 	result = 0;
508 	for (nc=newchunks ; nc ; nc=nc->next) {
509 		if (limit>nc->chunksinblock) {
510 			limit -= nc->chunksinblock;
511 			result += nc->chunksinblock;
512 		}
513 	}
514 	return result;
515 }
516 
hdd_get_new_chunk_data(uint8_t * buff,uint32_t limit)517 void hdd_get_new_chunk_data(uint8_t *buff,uint32_t limit) {
518 	newchunk *nc,**ncptr;
519 	uint64_t chunkid;
520 	uint32_t version;
521 	uint32_t i;
522 	if (buff) {
523 		ncptr = &newchunks;
524 		while ((nc=*ncptr)) {
525 			if (limit>nc->chunksinblock) {
526 				for (i=0 ; i<nc->chunksinblock ; i++) {
527 					chunkid = nc->chunkidblock[i];
528 					version = nc->versionblock[i];
529 					put64bit(&buff,chunkid);
530 					put32bit(&buff,version);
531 				}
532 				limit -= nc->chunksinblock;
533 				*ncptr = nc->next;
534 				free(nc);
535 			} else {
536 				ncptr = &(nc->next);
537 			}
538 		}
539 	}
540 	zassert(pthread_mutex_unlock(&dclock));
541 }
542 
hdd_errorcounter(void)543 uint32_t hdd_errorcounter(void) {
544 	uint32_t result;
545 	zassert(pthread_mutex_lock(&dclock));
546 	result = errorcounter;
547 	errorcounter = 0;
548 	zassert(pthread_mutex_unlock(&dclock));
549 	return result;
550 }
551 
hdd_spacechanged(void)552 int hdd_spacechanged(void) {
553 	uint32_t result;
554 	zassert(pthread_mutex_lock(&dclock));
555 	result = hddspacechanged;
556 	hddspacechanged = 0;
557 	zassert(pthread_mutex_unlock(&dclock));
558 	return result;
559 }
560 
hdd_stats(uint64_t * br,uint64_t * bw,uint32_t * opr,uint32_t * opw,uint32_t * dbr,uint32_t * dbw,uint32_t * dopr,uint32_t * dopw,uint64_t * rtime,uint64_t * wtime)561 void hdd_stats(uint64_t *br,uint64_t *bw,uint32_t *opr,uint32_t *opw,uint32_t *dbr,uint32_t *dbw,uint32_t *dopr,uint32_t *dopw,uint64_t *rtime,uint64_t *wtime) {
562 	zassert(pthread_mutex_lock(&statslock));
563 	*br = stats_bytesr;
564 	*bw = stats_bytesw;
565 	*opr = stats_opr;
566 	*opw = stats_opw;
567 	*dbr = stats_databytesr;
568 	*dbw = stats_databytesw;
569 	*dopr = stats_dataopr;
570 	*dopw = stats_dataopw;
571 	*rtime = stats_rtime;
572 	*wtime = stats_wtime;
573 	stats_bytesr = 0;
574 	stats_bytesw = 0;
575 	stats_opr = 0;
576 	stats_opw = 0;
577 	stats_databytesr = 0;
578 	stats_databytesw = 0;
579 	stats_dataopr = 0;
580 	stats_dataopw = 0;
581 	stats_rtime = 0;
582 	stats_wtime = 0;
583 	zassert(pthread_mutex_unlock(&statslock));
584 }
585 
hdd_op_stats(uint32_t * op_create,uint32_t * op_delete,uint32_t * op_version,uint32_t * op_duplicate,uint32_t * op_truncate,uint32_t * op_duptrunc,uint32_t * op_test)586 void hdd_op_stats(uint32_t *op_create,uint32_t *op_delete,uint32_t *op_version,uint32_t *op_duplicate,uint32_t *op_truncate,uint32_t *op_duptrunc,uint32_t *op_test) {
587 	zassert(pthread_mutex_lock(&statslock));
588 	*op_create = stats_create;
589 	*op_delete = stats_delete;
590 	*op_version = stats_version;
591 	*op_duplicate = stats_duplicate;
592 	*op_truncate = stats_truncate;
593 	*op_duptrunc = stats_duptrunc;
594 	*op_test = stats_test;
595 	stats_create = 0;
596 	stats_delete = 0;
597 	stats_version = 0;
598 	stats_duplicate = 0;
599 	stats_truncate = 0;
600 	stats_duptrunc = 0;
601 	stats_test = 0;
602 	zassert(pthread_mutex_unlock(&statslock));
603 }
604 
hdd_stats_read(uint32_t size)605 static inline void hdd_stats_read(uint32_t size) {
606 	zassert(pthread_mutex_lock(&statslock));
607 	stats_opr++;
608 	stats_bytesr += size;
609 	zassert(pthread_mutex_unlock(&statslock));
610 }
611 
hdd_stats_write(uint32_t size)612 static inline void hdd_stats_write(uint32_t size) {
613 	zassert(pthread_mutex_lock(&statslock));
614 	stats_opw++;
615 	stats_bytesw += size;
616 	zassert(pthread_mutex_unlock(&statslock));
617 }
618 
hdd_stats_dataread(folder * f,uint32_t size,int64_t rtime)619 static inline void hdd_stats_dataread(folder *f,uint32_t size,int64_t rtime) {
620 	if (rtime<=0) {
621 		return;
622 	}
623 	zassert(pthread_mutex_lock(&statslock));
624 	stats_dataopr++;
625 	stats_databytesr += size;
626 	stats_rtime += rtime;
627 	f->cstat.rops++;
628 	f->cstat.rbytes += size;
629 	f->cstat.nsecreadsum += rtime;
630 	if (rtime>f->cstat.nsecreadmax) {
631 		f->cstat.nsecreadmax = rtime;
632 	}
633 	zassert(pthread_mutex_unlock(&statslock));
634 }
635 
hdd_stats_datawrite(folder * f,uint32_t size,int64_t wtime)636 static inline void hdd_stats_datawrite(folder *f,uint32_t size,int64_t wtime) {
637 	if (wtime<=0) {
638 		return;
639 	}
640 	zassert(pthread_mutex_lock(&statslock));
641 	stats_dataopw++;
642 	stats_databytesw += size;
643 	stats_wtime += wtime;
644 	f->cstat.wops++;
645 	f->cstat.wbytes += size;
646 	f->cstat.nsecwritesum += wtime;
647 	if (wtime>f->cstat.nsecwritemax) {
648 		f->cstat.nsecwritemax = wtime;
649 	}
650 	zassert(pthread_mutex_unlock(&statslock));
651 }
652 
hdd_stats_datafsync(folder * f,int64_t fsynctime)653 static inline void hdd_stats_datafsync(folder *f,int64_t fsynctime) {
654 	if (fsynctime<=0) {
655 		return;
656 	}
657 	zassert(pthread_mutex_lock(&statslock));
658 	stats_wtime += fsynctime;
659 	f->cstat.fsyncops++;
660 	f->cstat.nsecfsyncsum += fsynctime;
661 	if (fsynctime>f->cstat.nsecfsyncmax) {
662 		f->cstat.nsecfsyncmax = fsynctime;
663 	}
664 	zassert(pthread_mutex_unlock(&statslock));
665 }
666 
hdd_diskinfo_v1_size()667 uint32_t hdd_diskinfo_v1_size() {
668 	folder *f;
669 	uint32_t s,sl;
670 
671 	s = 0;
672 	zassert(pthread_mutex_lock(&folderlock));
673 	for (f=folderhead ; f ; f=f->next ) {
674 		sl = strlen(f->path);
675 		if (sl>255) {
676 			sl = 255;
677 		}
678 		s += 34+sl;
679 	}
680 	return s;
681 }
682 
hdd_diskinfo_v1_data(uint8_t * buff)683 void hdd_diskinfo_v1_data(uint8_t *buff) {
684 	folder *f;
685 	uint32_t sl;
686 	uint32_t ei;
687 	if (buff) {
688 		for (f=folderhead ; f ; f=f->next ) {
689 			sl = strlen(f->path);
690 			if (sl>255) {
691 				put8bit(&buff,255);
692 				memcpy(buff,"(...)",5);
693 				memcpy(buff+5,f->path+(sl-250),250);
694 				buff += 255;
695 			} else {
696 				put8bit(&buff,sl);
697 				if (sl>0) {
698 					memcpy(buff,f->path,sl);
699 					buff += sl;
700 				}
701 			}
702 			put8bit(&buff,((f->todel)?1:0)+((f->damaged)?2:0)+((f->scanstate==SCST_SCANINPROGRESS)?4:0));
703 			ei = (f->lasterrindx+(LASTERRSIZE-1))%LASTERRSIZE;
704 			put64bit(&buff,f->lasterrtab[ei].chunkid);
705 			put32bit(&buff,f->lasterrtab[ei].timestamp);
706 			put64bit(&buff,f->total-f->avail);
707 			put64bit(&buff,f->total);
708 			put32bit(&buff,f->chunkcount);
709 		}
710 	}
711 	zassert(pthread_mutex_unlock(&folderlock));
712 }
713 
hdd_diskinfo_v2_size()714 uint32_t hdd_diskinfo_v2_size() {
715 	folder *f;
716 	uint32_t s,sl;
717 
718 	s = 0;
719 	zassert(pthread_mutex_lock(&folderlock));
720 	for (f=folderhead ; f ; f=f->next ) {
721 		sl = strlen(f->path);
722 		if (sl>255) {
723 			sl = 255;
724 		}
725 		s += 2+226+sl;
726 	}
727 	return s;
728 }
729 
hdd_diskinfo_v2_data(uint8_t * buff)730 void hdd_diskinfo_v2_data(uint8_t *buff) {
731 	folder *f;
732 	hddstats s;
733 	uint32_t sl;
734 	uint32_t ei;
735 	uint32_t pos;
736 	if (buff) {
737 		zassert(pthread_mutex_lock(&statslock));
738 		for (f=folderhead ; f ; f=f->next ) {
739 			sl = strlen(f->path);
740 			if (sl>255) {
741 				put16bit(&buff,226+255);	// size of this entry
742 				put8bit(&buff,255);
743 				memcpy(buff,"(...)",5);
744 				memcpy(buff+5,f->path+(sl-250),250);
745 				buff += 255;
746 			} else {
747 				put16bit(&buff,226+sl);	// size of this entry
748 				put8bit(&buff,sl);
749 				if (sl>0) {
750 					memcpy(buff,f->path,sl);
751 					buff += sl;
752 				}
753 			}
754 			put8bit(&buff,((f->todel)?1:0)+((f->damaged)?2:0)+((f->scanstate==SCST_SCANINPROGRESS)?4:0));
755 			ei = (f->lasterrindx+(LASTERRSIZE-1))%LASTERRSIZE;
756 			put64bit(&buff,f->lasterrtab[ei].chunkid);
757 			put32bit(&buff,f->lasterrtab[ei].timestamp);
758 			if (f->scanstate==SCST_SCANINPROGRESS) {
759 				put64bit(&buff,f->scanprogress);
760 				put64bit(&buff,0);
761 			} else {
762 				put64bit(&buff,f->total-f->avail);
763 				put64bit(&buff,f->total);
764 			}
765 			put32bit(&buff,f->chunkcount);
766 			s = f->stats[f->statspos];
767 			hdd_stats_binary_pack(&buff,&s);	// 64B
768 			for (pos=1 ; pos<60 ; pos++) {
769 				hdd_stats_add(&s,&(f->stats[(f->statspos+pos)%STATSHISTORY]));
770 			}
771 			hdd_stats_binary_pack(&buff,&s);	// 64B
772 			for (pos=60 ; pos<24*60 ; pos++) {
773 				hdd_stats_add(&s,&(f->stats[(f->statspos+pos)%STATSHISTORY]));
774 			}
775 			hdd_stats_binary_pack(&buff,&s);	// 64B
776 		}
777 		zassert(pthread_mutex_unlock(&statslock));
778 	}
779 	zassert(pthread_mutex_unlock(&folderlock));
780 }
781 
782 #define OF_BEFORE_OPEN 0
783 #define OF_AFTER_CLOSE 1
784 #define OF_INIT 2
785 #define OF_INFO 3
786 
hdd_open_files_handle(uint8_t mode)787 static inline void hdd_open_files_handle(uint8_t mode) {
788 	static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
789 	static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
790 	static uint32_t count = 0;
791 	static uint32_t limit = 500;
792 	static uint32_t waiting = 0;
793 	if (mode==OF_BEFORE_OPEN) { // before open
794 		zassert(pthread_mutex_lock(&lock));
795 		while (count >= limit) {
796 			waiting++;
797 			zassert(pthread_cond_wait(&cond,&lock));
798 		}
799 		count++;
800 		zassert(pthread_mutex_unlock(&lock));
801 	} else if (mode==OF_AFTER_CLOSE) { // after close
802 		zassert(pthread_mutex_lock(&lock));
803 		count--;
804 		if (waiting>0) {
805 			zassert(pthread_cond_signal(&cond));
806 			waiting--;
807 		}
808 		zassert(pthread_mutex_unlock(&lock));
809 	} else if (mode==OF_INIT) {
810 		struct rlimit rl;
811 		getrlimit(RLIMIT_NOFILE,&rl);
812 		limit = (rl.rlim_cur * 2) / 3;
813 		syslog(LOG_NOTICE,"hdd space manager: setting open chunks limit to: %"PRIu32,limit);
814 	} else if (mode==OF_INFO) {
815 		uint32_t c;
816 		zassert(pthread_mutex_lock(&lock));
817 		c = count;
818 		zassert(pthread_mutex_unlock(&lock));
819 		syslog(LOG_NOTICE,"hdd space manager: open files: %"PRIu32"/%"PRIu32,c,limit);
820 	}
821 }
822 
hdd_diskinfo_movestats(void)823 void hdd_diskinfo_movestats(void) {
824 	folder *f;
825 	zassert(pthread_mutex_lock(&folderlock));
826 	zassert(pthread_mutex_lock(&statslock));
827 	for (f=folderhead ; f ; f=f->next ) {
828 		if (f->statspos==0) {
829 			f->statspos = STATSHISTORY-1;
830 		} else {
831 			f->statspos--;
832 		}
833 		f->stats[f->statspos] = f->cstat;
834 		hdd_stats_clear(&(f->cstat));
835 	}
836 	zassert(pthread_mutex_unlock(&statslock));
837 	zassert(pthread_mutex_unlock(&folderlock));
838 }
839 
840 // testlock:locked
hdd_remove_chunk_from_test_chain(chunk * c,folder * f)841 static inline void hdd_remove_chunk_from_test_chain(chunk *c,folder *f) {
842 	*(c->testprev) = c->testnext;
843 	if (c->testnext) {
844 		c->testnext->testprev = c->testprev;
845 	} else {
846 		f->testtail = c->testprev;
847 	}
848 	c->testnext = NULL;
849 	c->testprev = NULL;
850 }
851 
852 // testlock:locked
hdd_add_chunk_to_test_chain(chunk * c,folder * f)853 static inline void hdd_add_chunk_to_test_chain(chunk *c,folder *f) {
854 	c->testnext = NULL;
855 	c->testprev = f->testtail;
856 	*(c->testprev) = c;
857 	f->testtail = &(c->testnext);
858 }
859 
860 // folderlock:locked
hdd_remove_chunk_from_folder(chunk * c,folder * f)861 static inline void hdd_remove_chunk_from_folder(chunk *c,folder *f) {
862 	f->chunkcount--;
863 	f->chunktab[c->ownerindx] = f->chunktab[f->chunkcount];
864 	f->chunktab[c->ownerindx]->ownerindx = c->ownerindx;
865 	c->owner = NULL;
866 	c->ownerindx = 0;
867 }
868 
869 // folderlock:locked
hdd_add_chunk_to_folder(chunk * c,folder * f)870 static inline void hdd_add_chunk_to_folder(chunk *c,folder *f) {
871 	if (f->chunkcount==f->chunktabsize) {
872 		if (f->chunktabsize==0) {
873 			f->chunktabsize=10000;
874 			f->chunktab = malloc(sizeof(chunk*)*f->chunktabsize);
875 		} else {
876 			f->chunktabsize*=3;
877 			f->chunktabsize/=2;
878 			f->chunktab = realloc(f->chunktab,sizeof(chunk*)*f->chunktabsize);
879 		}
880 		passert(f->chunktab);
881 	}
882 	f->chunktab[f->chunkcount] = c;
883 	c->owner = f;
884 	c->ownerindx = f->chunkcount;
885 	f->chunkcount++;
886 }
887 
hdd_chunk_remove(chunk * c)888 static inline void hdd_chunk_remove(chunk *c) {
889 	chunk **cptr,*cp;
890 	uint32_t hashpos = HASHPOS(c->chunkid);
891 	cptr = &(hashtab[hashpos]);
892 	while ((cp=*cptr)) {
893 		if (c==cp) {
894 			*cptr = cp->next;
895 			if (cp->fd>=0) {
896 				close(cp->fd);
897 				hdd_open_files_handle(OF_AFTER_CLOSE);
898 			}
899 			if (cp->crc!=NULL) {
900 #ifdef MMAP_ALLOC
901 				munmap((void*)(cp->crc),4096);
902 #else
903 				free(cp->crc);
904 #endif
905 			}
906 #ifdef PRESERVE_BLOCK
907 			if (cp->block!=NULL) {
908 # ifdef MMAP_ALLOC
909 				munmap((void*)(cp->block),MFSBLOCKSIZE);
910 # else
911 				free(cp->block);
912 # endif
913 			}
914 #endif /* PRESERVE_BLOCK */
915 			if (cp->filename!=NULL) {
916 				free(cp->filename);
917 			}
918 			free(cp);
919 			return;
920 		}
921 		cptr = &(cp->next);
922 	}
923 }
924 
hdd_chunk_release(chunk * c)925 static void hdd_chunk_release(chunk *c) {
926 	zassert(pthread_mutex_lock(&hashlock));
927 //	syslog(LOG_WARNING,"hdd_chunk_release got chunk: %016"PRIX64" (c->state:%u)",c->chunkid,c->state);
928 	if (c->state==CH_LOCKED) {
929 		c->state = CH_AVAIL;
930 		if (c->ccond) {
931 //			printf("wake up one thread waiting for AVAIL chunk: %"PRIu64" on ccond:%p\n",c->chunkid,c->ccond);
932 //			printbacktrace();
933 			zassert(pthread_cond_signal(&(c->ccond->cond)));
934 		}
935 	} else if (c->state==CH_TOBEDELETED) {
936 		if (c->ccond) {
937 			c->state = CH_DELETED;
938 //			printf("wake up one thread waiting for DELETED chunk: %"PRIu64" on ccond:%p\n",c->chunkid,c->ccond);
939 //			printbacktrace();
940 			zassert(pthread_cond_signal(&(c->ccond->cond)));
941 		} else {
942 			hdd_chunk_remove(c);
943 		}
944 	}
945 	zassert(pthread_mutex_unlock(&hashlock));
946 }
947 
hdd_chunk_getattr(chunk * c)948 static int hdd_chunk_getattr(chunk *c) {
949 	struct stat sb;
950 	if (c->fd>=0) {
951 		if (fstat(c->fd,&sb)<0) {
952 			return -1;
953 		}
954 	} else {
955 		if (stat(c->filename,&sb)<0) {
956 			return -1;
957 		}
958 	}
959 	if ((sb.st_mode & S_IFMT) != S_IFREG) {
960 		return -1;
961 	}
962 	if (sb.st_size<CHUNKHDRSIZE || sb.st_size>(CHUNKHDRSIZE+MFSCHUNKSIZE) || ((sb.st_size-CHUNKHDRSIZE)&MFSBLOCKMASK)!=0) {
963 		return -1;
964 	}
965 	c->blocks = (sb.st_size - CHUNKHDRSIZE) / MFSBLOCKSIZE;
966 //	c->testtime = (sb.st_atime>sb.st_mtime)?sb.st_atime:sb.st_mtime;
967 	c->validattr = 1;
968 	return 0;
969 }
970 
hdd_chunk_tryfind(uint64_t chunkid)971 static chunk* hdd_chunk_tryfind(uint64_t chunkid) {
972 	uint32_t hashpos = HASHPOS(chunkid);
973 	chunk *c;
974 	zassert(pthread_mutex_lock(&hashlock));
975 	for (c=hashtab[hashpos] ; c && c->chunkid!=chunkid ; c=c->next) {}
976 	if (c!=NULL) {
977 		if (c->state==CH_LOCKED) {
978 			c = CHUNKLOCKED;
979 		} else if (c->state!=CH_AVAIL) {
980 			c = NULL;
981 		} else {
982 			c->state = CH_LOCKED;
983 		}
984 	}
985 //	if (c!=NULL && c!=CHUNKLOCKED) {
986 //		syslog(LOG_WARNING,"hdd_chunk_tryfind returns chunk: %016"PRIX64" (c->state:%u)",c->chunkid,c->state);
987 //	}
988 	zassert(pthread_mutex_unlock(&hashlock));
989 	return c;
990 }
991 
992 static void hdd_chunk_delete(chunk *c);
993 
hdd_chunk_get(uint64_t chunkid,uint8_t cflag)994 static chunk* hdd_chunk_get(uint64_t chunkid,uint8_t cflag) {
995 	uint32_t hashpos = HASHPOS(chunkid);
996 	chunk *c;
997 	cntcond *cc;
998 	zassert(pthread_mutex_lock(&hashlock));
999 	for (c=hashtab[hashpos] ; c && c->chunkid!=chunkid ; c=c->next) {}
1000 	if (c==NULL) {
1001 		if (cflag!=CH_NEW_NONE) {
1002 			c = malloc(sizeof(chunk));
1003 			passert(c);
1004 			c->chunkid = chunkid;
1005 			c->version = 0;
1006 			c->owner = NULL;
1007 			c->filename = NULL;
1008 			c->blocks = 0;
1009 			c->crcrefcount = 0;
1010 			c->opento = 0.0;
1011 			c->crcto = 0.0;
1012 			c->crcchanged = 0;
1013 			c->fsyncneeded = 0;
1014 			c->fd = -1;
1015 			c->crc = NULL;
1016 			c->state = CH_LOCKED;
1017 			c->ccond = NULL;
1018 #ifdef PRESERVE_BLOCK
1019 			c->blockto = 0.0;
1020 			c->block = NULL;
1021 			c->blockno = 0xFFFF;
1022 #endif
1023 			c->validattr = 0;
1024 			c->todel = 0;
1025 			c->testnext = NULL;
1026 			c->testprev = NULL;
1027 			c->next = hashtab[hashpos];
1028 			hashtab[hashpos] = c;
1029 		}
1030 //		syslog(LOG_WARNING,"hdd_chunk_get returns chunk: %016"PRIX64" (c->state:%u)",c->chunkid,c->state);
1031 		zassert(pthread_mutex_unlock(&hashlock));
1032 		return c;
1033 	}
1034 	if (cflag==CH_NEW_EXCLUSIVE) {
1035 		if (c->state==CH_AVAIL || c->state==CH_LOCKED) {
1036 			zassert(pthread_mutex_unlock(&hashlock));
1037 			return NULL;
1038 		}
1039 	}
1040 	for (;;) {
1041 		switch (c->state) {
1042 		case CH_AVAIL:
1043 			c->state = CH_LOCKED;
1044 //			syslog(LOG_WARNING,"hdd_chunk_get returns chunk: %016"PRIX64" (c->state:%u)",c->chunkid,c->state);
1045 			zassert(pthread_mutex_unlock(&hashlock));
1046 			if (c->validattr==0) {
1047 				if (hdd_chunk_getattr(c)) {
1048 					hdd_report_damaged_chunk(c->chunkid);
1049 					unlink(c->filename);
1050 					hdd_chunk_delete(c);
1051 					return NULL;
1052 				}
1053 			}
1054 			return c;
1055 		case CH_DELETED:
1056 			if (cflag!=CH_NEW_NONE) {
1057 				if (c->fd>=0) {
1058 					close(c->fd);
1059 					hdd_open_files_handle(OF_AFTER_CLOSE);
1060 				}
1061 				if (c->crc!=NULL) {
1062 #ifdef MMAP_ALLOC
1063 					munmap((void*)(c->crc),4096);
1064 #else
1065 					free(c->crc);
1066 #endif
1067 				}
1068 #ifdef PRESERVE_BLOCK
1069 				if (c->block!=NULL) {
1070 # ifdef MMAP_ALLOC
1071 					munmap((void*)(c->crc),MFSBLOCKSIZE);
1072 # else
1073 					free(c->block);
1074 # endif
1075 				}
1076 #endif /* PRESERVE_BLOCK */
1077 				if (c->filename!=NULL) {
1078 					free(c->filename);
1079 				}
1080 				c->version = 0;
1081 				c->owner = NULL;
1082 				c->filename = NULL;
1083 				c->blocks = 0;
1084 				c->crcrefcount = 0;
1085 				c->opento = 0.0;
1086 				c->crcto = 0.0;
1087 				c->crcchanged = 0;
1088 				c->fsyncneeded = 0;
1089 				c->fd = -1;
1090 				c->crc = NULL;
1091 #ifdef PRESERVE_BLOCK
1092 				c->blockto = 0.0;
1093 				c->block = NULL;
1094 				c->blockno = 0xFFFF;
1095 #endif /* PRESERVE_BLOCK */
1096 				c->validattr = 0;
1097 				c->todel = 0;
1098 				c->state = CH_LOCKED;
1099 //				syslog(LOG_WARNING,"hdd_chunk_get returns chunk: %016"PRIX64" (c->state:%u)",c->chunkid,c->state);
1100 				zassert(pthread_mutex_unlock(&hashlock));
1101 				return c;
1102 			}
1103 			if (c->ccond==NULL) {	// no more waiting threads - remove
1104 				hdd_chunk_remove(c);
1105 			} else {	// there are waiting threads - wake them up
1106 //				printf("wake up one thread waiting for DELETED chunk: %"PRIu64" on ccond:%p\n",c->chunkid,c->ccond);
1107 //				printbacktrace();
1108 				zassert(pthread_cond_signal(&(c->ccond->cond)));
1109 			}
1110 			zassert(pthread_mutex_unlock(&hashlock));
1111 			return NULL;
1112 		case CH_TOBEDELETED:
1113 		case CH_LOCKED:
1114 			if (c->ccond==NULL) {
1115 				for (cc=cclist ; cc && cc->wcnt ; cc=cc->next) {}
1116 				if (cc==NULL) {
1117 					cc = malloc(sizeof(cntcond));
1118 					passert(cc);
1119 					zassert(pthread_cond_init(&(cc->cond),NULL));
1120 					cc->wcnt = 0;
1121 					cc->next = cclist;
1122 					cclist = cc;
1123 				}
1124 				c->ccond = cc;
1125 			}
1126 			c->ccond->wcnt++;
1127 //			printf("wait for %s chunk: %"PRIu64" on ccond:%p\n",(c->state==CH_LOCKED)?"LOCKED":"TOBEDELETED",c->chunkid,c->ccond);
1128 //			printbacktrace();
1129 			zassert(pthread_cond_wait(&(c->ccond->cond),&hashlock));
1130 //			printf("%s chunk: %"PRIu64" woke up on ccond:%p\n",(c->state==CH_LOCKED)?"LOCKED":(c->state==CH_DELETED)?"DELETED":(c->state==CH_AVAIL)?"AVAIL":"TOBEDELETED",c->chunkid,c->ccond);
1131 			c->ccond->wcnt--;
1132 			if (c->ccond->wcnt==0) {
1133 				c->ccond = NULL;
1134 			}
1135 		}
1136 	}
1137 }
1138 
hdd_chunk_delete(chunk * c)1139 static void hdd_chunk_delete(chunk *c) {
1140 	folder *f;
1141 	zassert(pthread_mutex_lock(&folderlock));
1142 	f = c->owner;
1143 	hdd_remove_chunk_from_folder(c,f);
1144 	zassert(pthread_mutex_unlock(&folderlock));
1145 	zassert(pthread_mutex_lock(&testlock));
1146 	hdd_remove_chunk_from_test_chain(c,f);
1147 	zassert(pthread_mutex_unlock(&testlock));
1148 	zassert(pthread_mutex_lock(&hashlock));
1149 	if (c->ccond) {
1150 		c->state = CH_DELETED;
1151 //		printf("wake up one thread waiting for DELETED chunk: %"PRIu64" ccond:%p\n",c->chunkid,c->ccond);
1152 //		printbacktrace();
1153 		zassert(pthread_cond_signal(&(c->ccond->cond)));
1154 	} else {
1155 		hdd_chunk_remove(c);
1156 	}
1157 	zassert(pthread_mutex_unlock(&hashlock));
1158 }
1159 
hdd_chunk_create(folder * f,uint64_t chunkid,uint32_t version)1160 static chunk* hdd_chunk_create(folder *f,uint64_t chunkid,uint32_t version) {
1161 	uint32_t leng;
1162 	chunk *c;
1163 
1164 	c = hdd_chunk_get(chunkid,CH_NEW_EXCLUSIVE);
1165 	if (c==NULL) {
1166 		return NULL;
1167 	}
1168 	c->version = version;
1169 	leng = strlen(f->path);
1170 	c->filename = malloc(leng+39);
1171 	passert(c->filename);
1172 	memcpy(c->filename,f->path,leng);
1173 //	memcpy(c->filename+leng,"__/chunk_XXXXXXXXXXXXXXXX_XXXXXXXX.mfs");
1174 //	c->filename[leng]="0123456789ABCDEF"[(chunkid>>4)&15];
1175 //	c->filename[leng+1]="0123456789ABCDEF"[chunkid&15];
1176 //	sprintf(c->filename+leng,"%c%c/chunk_%016"PRIX64"_%08"PRIX32".mfs","0123456789ABCDEF"[(chunkid>>4)&15],"0123456789ABCDEF"[chunkid&15],chunkid,version);
1177 	sprintf(c->filename+leng,"%02X/chunk_%016"PRIX64"_%08"PRIX32".mfs",(unsigned int)(chunkid&255),chunkid,version);
1178 	c->blocks = 0;
1179 	c->validattr = 1;
1180 	f->needrefresh = 1;
1181 	hdd_add_chunk_to_folder(c,f);
1182 	zassert(pthread_mutex_lock(&testlock));
1183 	hdd_add_chunk_to_test_chain(c,f);
1184 	zassert(pthread_mutex_unlock(&testlock));
1185 	return c;
1186 }
1187 
1188 #define hdd_chunk_find(chunkid) hdd_chunk_get(chunkid,CH_NEW_NONE)
1189 
hdd_chunk_testmove(chunk * c)1190 static void hdd_chunk_testmove(chunk *c) {
1191 	zassert(pthread_mutex_lock(&testlock));
1192 	if (c->testnext) {
1193 		*(c->testprev) = c->testnext;
1194 		c->testnext->testprev = c->testprev;
1195 		c->testnext = NULL;
1196 		c->testprev = c->owner->testtail;
1197 		*(c->testprev) = c;
1198 		c->owner->testtail = &(c->testnext);
1199 	}
1200 //	c->testtime = time(NULL);
1201 	zassert(pthread_mutex_unlock(&testlock));
1202 }
1203 
1204 // no locks - locked by caller
hdd_refresh_usage(folder * f)1205 static inline void hdd_refresh_usage(folder *f) {
1206 	if (f->sizelimit) {
1207 		uint32_t knownblocks;
1208 		uint32_t knowncount;
1209 		uint64_t calcsize;
1210 		chunk *c;
1211 		knownblocks = 0;
1212 		knowncount = 0;
1213 		zassert(pthread_mutex_lock(&hashlock));
1214 		zassert(pthread_mutex_lock(&testlock));
1215 		for (c=f->testhead ; c ; c=c->testnext) {
1216 			if (c->state==CH_AVAIL && c->validattr==1) {
1217 				knowncount++;
1218 				knownblocks+=c->blocks;
1219 			}
1220 		}
1221 		zassert(pthread_mutex_unlock(&testlock));
1222 		zassert(pthread_mutex_unlock(&hashlock));
1223 		if (knowncount>0) {
1224 			calcsize = knownblocks;
1225 			calcsize *= f->chunkcount;
1226 			calcsize /= knowncount;
1227 			calcsize *= 64;
1228 			calcsize += f->chunkcount*5;
1229 			calcsize *= 1024;
1230 		} else { // unknown result;
1231 			calcsize = 0;
1232 		}
1233 		f->total = f->sizelimit;
1234 		f->avail = (calcsize>f->sizelimit)?0:f->sizelimit-calcsize;
1235 	} else {
1236 		struct statvfs fsinfo;
1237 
1238 		if (statvfs(f->path,&fsinfo)<0) {
1239 			f->avail = 0ULL;
1240 			f->total = 0ULL;
1241 		}
1242 		f->avail = (uint64_t)(fsinfo.f_frsize)*(uint64_t)(fsinfo.f_bavail);
1243 		f->total = (uint64_t)(fsinfo.f_frsize)*(uint64_t)(fsinfo.f_blocks-(fsinfo.f_bfree-fsinfo.f_bavail));
1244 	//	f->total = (uint64_t)(fsinfo.f_frsize)*(uint64_t)(fsinfo.f_blocks);
1245 		if (f->avail < f->leavefree) {
1246 			f->avail = 0ULL;
1247 		} else {
1248 			f->avail -= f->leavefree;
1249 		}
1250 	}
1251 }
1252 
hdd_getfolder()1253 static inline folder* hdd_getfolder() {
1254 	folder *f,*bf;
1255 	double minerr,err,expdist;
1256 //	double usage;
1257 	uint64_t totalsum,good_totalsum;
1258 	uint32_t folder_cnt,good_cnt;
1259 	uint8_t onlygood;
1260 	uint64_t usectime;
1261 
1262 	usectime = monotonic_useconds();
1263 
1264 	totalsum = 0;
1265 	good_totalsum = 0;
1266 	folder_cnt = 0;
1267 	good_cnt = 0;
1268 	onlygood = 0;
1269 	for (f=folderhead ; f ; f=f->next) {
1270 		if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>0 && f->avail>0 && f->balancemode!=REBALANCE_FORCE_SRC) {
1271 			if (usectime < f->rebalance_last_usec) { // wall clock move forward protection
1272 				f->rebalance_last_usec = usectime;
1273 			}
1274 //			usage = f->total-f->avail;
1275 //			usage /= f->total;
1276 			if (f->rebalance_last_usec + REBALANCE_GRACE_PERIOD < usectime) {
1277 				good_cnt++;
1278 				good_totalsum += f->total;
1279 			}
1280 			folder_cnt++;
1281 			totalsum += f->total;
1282 		}
1283 	}
1284 //	syslog(LOG_NOTICE,"good_cnt: %"PRIu32" ; folder_cnt: %"PRIu32" ; good_totalsum:%"PRIu64" ; totalsum:%"PRIu64,good_cnt,folder_cnt,good_totalsum,totalsum);
1285 	if (good_cnt * 3 >= folder_cnt * 2) {
1286 		onlygood = 1;
1287 		totalsum = good_totalsum;
1288 	}
1289 	bf = NULL;
1290 	minerr = 0.0; // make some old compilers happy
1291 	for (f=folderhead ; f ; f=f->next) {
1292 		if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>0 && f->avail>0 && f->balancemode!=REBALANCE_FORCE_SRC) {
1293 //			usage = f->total-f->avail;
1294 //			usage /= f->total;
1295 			if (onlygood==0 || (f->rebalance_last_usec + REBALANCE_GRACE_PERIOD < usectime)) {
1296 				f->write_dist++;
1297 				if (f->write_first) {
1298 					err = 1.0;
1299 				} else {
1300 					expdist = totalsum;
1301 					expdist /= f->total;
1302 					err = (expdist + f->write_corr) / f->write_dist;
1303 				}
1304 				if (bf==NULL || err<minerr) {
1305 					minerr = err;
1306 					bf = f;
1307 				}
1308 			}
1309 		}
1310 	}
1311 	if (bf) {
1312 //		syslog(LOG_NOTICE,"chosen: %s",bf->path);
1313 		if (bf->write_first) {
1314 			bf->write_first = 0;
1315 		} else {
1316 			expdist = totalsum;
1317 			expdist /= bf->total;
1318 			bf->write_corr += expdist - bf->write_dist;
1319 		}
1320 		bf->write_dist = 0;
1321 	}
1322 	return bf;
1323 }
1324 /*
1325 static inline folder* hdd_getfolder() {
1326 	folder *f,*bf;
1327 	double maxcarry;
1328 	double minavail,maxavail;
1329 	double s,d;
1330 	double pavail;
1331 	int ok;
1332 //	uint64_t minavail;
1333 
1334 	minavail = 0.0;
1335 	maxavail = 0.0;
1336 	maxcarry = 1.0;
1337 	bf = NULL;
1338 	ok = 0;
1339 	for (f=folderhead ; f ; f=f->next) {
1340 		if (f->damaged || f->todel || f->total==0 || f->avail==0 || f->scanstate!=SCST_WORKING) {
1341 			continue;
1342 		}
1343 		if (f->carry >= maxcarry) {
1344 			maxcarry = f->carry;
1345 			bf = f;
1346 		}
1347 		pavail = (double)(f->avail)/(double)(f->total);
1348 		if (ok==0 || minavail>pavail) {
1349 			minavail = pavail;
1350 			ok = 1;
1351 		}
1352 		if (pavail>maxavail) {
1353 			maxavail = pavail;
1354 		}
1355 	}
1356 	if (bf) {
1357 		bf->carry -= 1.0;
1358 		return bf;
1359 	}
1360 	if (maxavail==0.0) {	// no space
1361 		return NULL;
1362 	}
1363 	if (maxavail<0.01) {
1364 		s = 0.0;
1365 	} else {
1366 		s = minavail*0.8;
1367 		if (s<0.01) {
1368 			s = 0.01;
1369 		}
1370 	}
1371 	d = maxavail-s;
1372 	maxcarry = 1.0;
1373 	for (f=folderhead ; f ; f=f->next) {
1374 		if (f->damaged || f->todel || f->total==0 || f->avail==0 || f->scanstate!=SCST_WORKING) {
1375 			continue;
1376 		}
1377 		pavail = (double)(f->avail)/(double)(f->total);
1378 		if (pavail>s) {
1379 			f->carry += ((pavail-s)/d);
1380 		}
1381 		if (f->carry >= maxcarry) {
1382 			maxcarry = f->carry;
1383 			bf = f;
1384 		}
1385 	}
1386 	if (bf) {	// should be always true
1387 		bf->carry -= 1.0;
1388 	}
1389 	return bf;
1390 }
1391 */
hdd_senddata(folder * f,int rmflag)1392 uint8_t hdd_senddata(folder *f,int rmflag) {
1393 	uint32_t i;
1394 	uint8_t todel;
1395 	uint8_t canberemoved;
1396 	chunk **cptr,*c;
1397 
1398 	todel = f->todel;
1399 	canberemoved = 1;
1400 	zassert(pthread_mutex_lock(&hashlock));
1401 	zassert(pthread_mutex_lock(&testlock));
1402 	for (i=0 ; i<HASHSIZE ; i++) {
1403 		cptr = &(hashtab[i]);
1404 		while ((c=*cptr)) {
1405 			if (c->owner==f) {
1406 				c->todel = todel;
1407 				if (rmflag) {
1408 					hdd_report_lost_chunk(c->chunkid);
1409 					if (c->state==CH_AVAIL) {
1410 						*cptr = c->next;
1411 						if (c->fd>=0) {
1412 							close(c->fd);
1413 							hdd_open_files_handle(OF_AFTER_CLOSE);
1414 						}
1415 						if (c->crc!=NULL) {
1416 #ifdef MMAP_ALLOC
1417 							munmap((void*)(c->crc),4096);
1418 #else
1419 							free(c->crc);
1420 #endif
1421 						}
1422 #ifdef PRESERVE_BLOCK
1423 						if (c->block!=NULL) {
1424 # ifdef MMAP_ALLOC
1425 							munmap((void*)(c->block),MFSBLOCKSIZE);
1426 # else
1427 							free(c->block);
1428 # endif
1429 						}
1430 #endif /* PRESERVE_BLOCK */
1431 						if (c->filename) {
1432 							free(c->filename);
1433 						}
1434 						hdd_remove_chunk_from_test_chain(c,c->owner);
1435 						free(c);
1436 					} else {
1437 						canberemoved = 0;
1438 						cptr = &(c->next);
1439 					}
1440 				} else {
1441 					hdd_report_new_chunk(c->chunkid,c->version|((c->todel)?0x80000000:0));
1442 					cptr = &(c->next);
1443 				}
1444 			} else {
1445 				cptr = &(c->next);
1446 			}
1447 		}
1448 	}
1449 	zassert(pthread_mutex_unlock(&testlock));
1450 	zassert(pthread_mutex_unlock(&hashlock));
1451 	return canberemoved;
1452 }
1453 
1454 void* hdd_folder_scan(void *arg);
1455 
hdd_check_folders(void)1456 void hdd_check_folders(void) {
1457 	folder *f,**fptr;
1458 	uint32_t i;
1459 	double monotonic_time;
1460 	uint32_t err;
1461 	uint8_t enoent;
1462 	int changed;
1463 
1464 	monotonic_time = monotonic_seconds();
1465 
1466 	changed = 0;
1467 //	syslog(LOG_NOTICE,"check folders ...");
1468 
1469 	zassert(pthread_mutex_lock(&folderlock));
1470 	if (folderactions==0) {
1471 		zassert(pthread_mutex_unlock(&folderlock));
1472 //		syslog(LOG_NOTICE,"check folders: disabled");
1473 		return;
1474 	}
1475 //	for (f=folderhead ; f ; f=f->next) {
1476 //		syslog(LOG_NOTICE,"folder: %s, toremove:%u, damaged:%u, todel:%u, scanstate:%u",f->path,f->toremove,f->damaged,f->todel,f->scanstate);
1477 //	}
1478 	fptr = &folderhead;
1479 	while ((f=*fptr)) {
1480 		if (f->toremove && f->rebalance_in_progress==0) {
1481 			switch (f->scanstate) {
1482 			case SCST_SCANINPROGRESS:
1483 				f->scanstate = SCST_SCANTERMINATE;
1484 				break;
1485 			case SCST_SCANFINISHED:
1486 				zassert(pthread_join(f->scanthread,NULL));
1487 				// no break - it's ok !!!
1488 			case SCST_SENDNEEDED:
1489 			case SCST_SCANNEEDED:
1490 				f->scanstate = SCST_WORKING;
1491 				// no break - it's ok !!!
1492 			case SCST_WORKING:
1493 				if (f->toremove==2) {
1494 					f->toremove = 1;
1495 				}
1496 				if (hdd_senddata(f,1)) {
1497 					f->toremove = 0;
1498 				}
1499 				changed = 1;
1500 				break;
1501 			}
1502 			if (f->toremove==0) { // 0 here means 'removed', so delete it from data structures
1503 				if (f->damaged) {
1504 					f->chunkcount = 0;
1505 					f->chunktabsize = 0;
1506 					if (f->chunktab) {
1507 						free(f->chunktab);
1508 					}
1509 					f->chunktab = NULL;
1510 				} else {
1511 					*fptr = f->next;
1512 					syslog(LOG_NOTICE,"folder %s successfully removed",f->path);
1513 					if (f->lfd>=0) {
1514 						close(f->lfd);
1515 					}
1516 					if (f->chunktab) {
1517 						free(f->chunktab);
1518 					}
1519 					free(f->path);
1520 					free(f);
1521 					testerreset = 1;
1522 				}
1523 			} else {
1524 				fptr = &(f->next);
1525 			}
1526 		} else {
1527 			fptr = &(f->next);
1528 		}
1529 	}
1530 	for (f=folderhead ; f ; f=f->next) {
1531 		if (f->damaged || f->toremove || (f->rebalance_in_progress==1 && f->scanstate!=SCST_WORKING)) {
1532 			if (f->damaged && f->toremove==0 && f->scanstate==SCST_WORKING && f->lastrefresh+60.0<monotonic_time) {
1533 				hdd_refresh_usage(f);
1534 				f->lastrefresh = monotonic_time;
1535 				changed = 1;
1536 			}
1537 			continue;
1538 		}
1539 		switch (f->scanstate) {
1540 		case SCST_SCANNEEDED:
1541 //			wait_for_scan = 0;
1542 			f->scanstate = SCST_SCANINPROGRESS;
1543 			zassert(main_minthread_create(&(f->scanthread),0,hdd_folder_scan,f));
1544 			break;
1545 		case SCST_SCANFINISHED:
1546 			zassert(pthread_join(f->scanthread,NULL));
1547 			f->scanstate = SCST_WORKING;
1548 			hdd_refresh_usage(f);
1549 			f->needrefresh = 0;
1550 			f->lastrefresh = monotonic_time;
1551 			changed = 1;
1552 			break;
1553 		case SCST_SENDNEEDED:
1554 			hdd_senddata(f,0);
1555 			f->scanstate = SCST_WORKING;
1556 			hdd_refresh_usage(f);
1557 			f->needrefresh = 0;
1558 			f->lastrefresh = monotonic_time;
1559 			changed = 1;
1560 			break;
1561 		case SCST_WORKING:
1562 			err = 0;
1563 			enoent = 0;
1564 			for (i=0 ; i<LASTERRSIZE; i++) {
1565 				if (f->lasterrtab[i].monotonic_time+HDDErrorTime>=monotonic_time && (f->lasterrtab[i].errornumber==EIO || f->lasterrtab[i].errornumber==EROFS || f->lasterrtab[i].errornumber==ENOENT)) {
1566 					err++;
1567 					if (f->lasterrtab[i].errornumber==ENOENT) {
1568 						enoent = 1;
1569 					}
1570 				}
1571 			}
1572 			if (err>HDDErrorCount && f->todel<2) {
1573 				syslog(LOG_WARNING,"%"PRIu32" errors occurred in %"PRIu32" seconds on folder: %s",err,HDDErrorTime,f->path);
1574 				f->toremove = 2;
1575 				f->damaged = 1;
1576 				changed = 1;
1577 			} else if (enoent && err>HDDErrorCount && f->todel>=2) {
1578 				syslog(LOG_WARNING,"%"PRIu32" errors occurred in %"PRIu32" seconds on folder: %s",err,HDDErrorTime,f->path);
1579 				f->damaged = 1;
1580 			} else if (f->needrefresh || f->lastrefresh+60.0<monotonic_time) {
1581 				hdd_refresh_usage(f);
1582 				f->needrefresh = 0;
1583 				f->lastrefresh = monotonic_time;
1584 				changed = 1;
1585 			}
1586 		}
1587 	}
1588 	zassert(pthread_mutex_unlock(&folderlock));
1589 	if (changed) {
1590 		zassert(pthread_mutex_lock(&dclock));
1591 		hddspacechanged = 1;
1592 		zassert(pthread_mutex_unlock(&dclock));
1593 	}
1594 }
1595 
hdd_error_occured(chunk * c)1596 static inline void hdd_error_occured(chunk *c) {
1597 	uint32_t i;
1598 	folder *f;
1599 	struct timeval tv;
1600 	int errmem = errno;
1601 
1602 	zassert(pthread_mutex_lock(&folderlock));
1603 	gettimeofday(&tv,NULL);
1604 	f = c->owner;
1605 	i = f->lasterrindx;
1606 	f->lasterrtab[i].chunkid = c->chunkid;
1607 	f->lasterrtab[i].errornumber = errmem;
1608 	f->lasterrtab[i].timestamp = tv.tv_sec;
1609 	f->lasterrtab[i].monotonic_time = monotonic_seconds();
1610 	i = (i+1)%LASTERRSIZE;
1611 	f->lasterrindx = i;
1612 	zassert(pthread_mutex_unlock(&folderlock));
1613 
1614 	zassert(pthread_mutex_lock(&dclock));
1615 	errorcounter++;
1616 	zassert(pthread_mutex_unlock(&dclock));
1617 
1618 	errno = errmem;
1619 }
1620 
1621 
1622 /* interface */
1623 
1624 #define CHUNKS_CUT_COUNT 10000
1625 static uint32_t hdd_get_chunks_pos = 0;
1626 static pthread_cond_t hdd_get_chunks_cond = PTHREAD_COND_INITIALIZER;
1627 static uint8_t hdd_get_chunks_waiting = 0;
1628 static uint8_t hdd_get_chunks_partialmode = 0;
1629 
hdd_get_chunks_begin(uint8_t partialmode)1630 void hdd_get_chunks_begin(uint8_t partialmode) {
1631 	zassert(pthread_mutex_lock(&hashlock));
1632 	hdd_get_chunks_pos = 0;
1633 	while (hdd_get_chunks_partialmode) {
1634 		hdd_get_chunks_waiting++;
1635 		zassert(pthread_cond_wait(&hdd_get_chunks_cond,&hashlock));
1636 	}
1637 	hdd_get_chunks_partialmode = partialmode;
1638 	if (partialmode) {
1639 		zassert(pthread_mutex_unlock(&hashlock));
1640 	}
1641 }
1642 
hdd_get_chunks_end()1643 void hdd_get_chunks_end() {
1644 	if (hdd_get_chunks_partialmode) {
1645 		zassert(pthread_mutex_lock(&hashlock));
1646 		hdd_get_chunks_partialmode = 0;
1647 		if (hdd_get_chunks_waiting) {
1648 			zassert(pthread_cond_signal(&hdd_get_chunks_cond));
1649 			hdd_get_chunks_waiting--;
1650 		}
1651 	}
1652 	zassert(pthread_mutex_unlock(&hashlock));
1653 }
1654 
hdd_get_chunks_next_list_count()1655 uint32_t hdd_get_chunks_next_list_count() {
1656 	uint32_t res = 0;
1657 	uint32_t i = 0;
1658 	chunk *c;
1659 	if (hdd_get_chunks_partialmode) {
1660 		zassert(pthread_mutex_lock(&hashlock));
1661 	}
1662 	while (res<CHUNKS_CUT_COUNT && hdd_get_chunks_pos+i<HASHSIZE) {
1663 		for (c=hashtab[hdd_get_chunks_pos+i] ; c ; c=c->next) {
1664 			res++;
1665 		}
1666 		i++;
1667 	}
1668 	if (res==0 && hdd_get_chunks_partialmode) {
1669 		zassert(pthread_mutex_unlock(&hashlock));
1670 	}
1671 	return res;
1672 }
1673 
hdd_get_chunks_next_list_data(uint8_t * buff)1674 void hdd_get_chunks_next_list_data(uint8_t *buff) {
1675 	uint32_t res = 0;
1676 	uint32_t v;
1677 	chunk *c;
1678 	while (res<CHUNKS_CUT_COUNT && hdd_get_chunks_pos<HASHSIZE) {
1679 		for (c=hashtab[hdd_get_chunks_pos] ; c ; c=c->next) {
1680 			put64bit(&buff,c->chunkid);
1681 			v = c->version;
1682 			if (c->todel) {
1683 				v |= 0x80000000;
1684 			}
1685 			put32bit(&buff,v);
1686 			res++;
1687 		}
1688 		hdd_get_chunks_pos++;
1689 	}
1690 	if (hdd_get_chunks_partialmode) {
1691 		zassert(pthread_mutex_unlock(&hashlock));
1692 	}
1693 }
1694 
1695 /*
1696 // for old register packets - deprecated
1697 uint32_t hdd_get_chunks_count() {
1698 	uint32_t res = 0;
1699 	uint32_t i;
1700 	chunk *c;
1701 	zassert(pthread_mutex_lock(&hashlock));
1702 	for (i=0 ; i<HASHSIZE ; i++) {
1703 		for (c=hashtab[i] ; c ; c=c->next) {
1704 			res++;
1705 		}
1706 	}
1707 	return res;
1708 }
1709 
1710 void hdd_get_chunks_data(uint8_t *buff) {
1711 	uint32_t i,v;
1712 	chunk *c;
1713 	if (buff) {
1714 		for (i=0 ; i<HASHSIZE ; i++) {
1715 			for (c=hashtab[i] ; c ; c=c->next) {
1716 				put64bit(&buff,c->chunkid);
1717 				v = c->version;
1718 				if (c->owner->todel) {
1719 					v |= 0x80000000;
1720 				}
1721 				put32bit(&buff,v);
1722 			}
1723 		}
1724 	}
1725 }
1726 */
1727 
1728 /*
1729 uint32_t get_changedchunkscount() {
1730 	uint32_t res = 0;
1731 	folder *f;
1732 	chunk *c;
1733 	if (somethingchanged==0) {
1734 		return 0;
1735 	}
1736 	for (f=folderhead ; f ; f=f->next) {
1737 		for (c=f->chunkhead ; c ; c=c->next) {
1738 			if (c->lengthchanged) {
1739 				res++;
1740 			}
1741 		}
1742 	}
1743 	return res;
1744 }
1745 
1746 void fill_changedchunksinfo(uint8_t *buff) {
1747 	folder *f;
1748 	chunk *c;
1749 	for (f=folderhead ; f ; f=f->next) {
1750 		for (c=f->chunkhead ; c ; c=c->next) {
1751 			if (c->lengthchanged) {
1752 				put64bit(&buff,c->chunkid);
1753 				put32bit(&buff,c->version);
1754 				c->lengthchanged = 0;
1755 			}
1756 		}
1757 	}
1758 	somethingchanged = 0;
1759 }
1760 */
1761 
hdd_get_space(uint64_t * usedspace,uint64_t * totalspace,uint32_t * chunkcount,uint64_t * tdusedspace,uint64_t * tdtotalspace,uint32_t * tdchunkcount)1762 void hdd_get_space(uint64_t *usedspace,uint64_t *totalspace,uint32_t *chunkcount,uint64_t *tdusedspace,uint64_t *tdtotalspace,uint32_t *tdchunkcount) {
1763 	folder *f;
1764 	uint64_t avail,total;
1765 	uint64_t tdavail,tdtotal;
1766 	uint32_t chunks,tdchunks;
1767 	zassert(pthread_mutex_lock(&folderlock));
1768 	avail = total = tdavail = tdtotal = 0ULL;
1769 	chunks = tdchunks = 0;
1770 	for (f=folderhead ; f ; f=f->next) {
1771 		if (f->damaged || f->toremove) {
1772 			continue;
1773 		}
1774 		if (f->todel==0) {
1775 			if (f->scanstate==SCST_WORKING) {
1776 				avail += f->avail;
1777 				total += f->total;
1778 			}
1779 			chunks += f->chunkcount;
1780 		} else {
1781 			if (f->scanstate==SCST_WORKING) {
1782 				tdavail += f->avail;
1783 				tdtotal += f->total;
1784 			}
1785 			tdchunks += f->chunkcount;
1786 		}
1787 	}
1788 	zassert(pthread_mutex_unlock(&folderlock));
1789 	*usedspace = total-avail;
1790 	*totalspace = total;
1791 	*chunkcount = chunks;
1792 	*tdusedspace = tdtotal-tdavail;
1793 	*tdtotalspace = tdtotal;
1794 	*tdchunkcount = tdchunks;
1795 }
1796 
chunk_emptycrc(chunk * c)1797 static inline void chunk_emptycrc(chunk *c) {
1798 #ifdef MMAP_ALLOC
1799 	c->crc = (uint8_t*)mmap(NULL,4096,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
1800 #else
1801 	c->crc = (uint8_t*)malloc(4096);
1802 #endif
1803 	passert(c->crc);
1804 	memset(c->crc,0,4096);	// make valgrind happy
1805 }
1806 
chunk_readcrc(chunk * c)1807 static inline int chunk_readcrc(chunk *c) {
1808 	int ret;
1809 	uint8_t hdr[20];
1810 	const uint8_t *ptr;
1811 	uint64_t chunkid;
1812 	uint32_t version;
1813 #ifdef USE_PIO
1814 	if (pread(c->fd,hdr,20,0)!=20) {
1815 		int errmem = errno;
1816 		mfs_arg_errlog_silent(LOG_WARNING,"chunk_readcrc: file:%s - read error",c->filename);
1817 		errno = errmem;
1818 		return ERROR_IO;
1819 	}
1820 #else /* USE_PIO */
1821 	lseek(c->fd,0,SEEK_SET);
1822 	if (read(c->fd,hdr,20)!=20) {
1823 		int errmem = errno;
1824 		mfs_arg_errlog_silent(LOG_WARNING,"chunk_readcrc: file:%s - read error",c->filename);
1825 		errno = errmem;
1826 		return ERROR_IO;
1827 	}
1828 #endif /* USE_PIO */
1829 	if (memcmp(hdr,MFSSIGNATURE "C 1.0",8)!=0) {
1830 		syslog(LOG_WARNING,"chunk_readcrc: file:%s - wrong header",c->filename);
1831 		errno = 0;
1832 		return ERROR_IO;
1833 	}
1834 	ptr = hdr+8;
1835 	chunkid = get64bit(&ptr);
1836 	version = get32bit(&ptr);
1837 	if (c->chunkid!=chunkid || c->version!=version) {
1838 		syslog(LOG_WARNING,"chunk_readcrc: file:%s - wrong id/version in header (%016"PRIX64"_%08"PRIX32")",c->filename,chunkid,version);
1839 		errno = 0;
1840 		return ERROR_IO;
1841 	}
1842 #ifdef MMAP_ALLOC
1843 	c->crc = (uint8_t*)mmap(NULL,4096,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
1844 #else
1845 	c->crc = (uint8_t*)malloc(4096);
1846 #endif
1847 	passert(c->crc);
1848 #ifdef USE_PIO
1849 	ret = pread(c->fd,c->crc,4096,CHUNKHDRCRC);
1850 #else /* USE_PIO */
1851 	lseek(c->fd,CHUNKHDRCRC,SEEK_SET);
1852 	ret = read(c->fd,c->crc,4096);
1853 #endif /* USE_PIO */
1854 	if (ret!=4096) {
1855 		int errmem = errno;
1856 		mfs_arg_errlog_silent(LOG_WARNING,"chunk_readcrc: file:%s - read error",c->filename);
1857 #ifdef MMAP_ALLOC
1858 		munmap((void*)(c->crc),4096);
1859 #else
1860 		free(c->crc);
1861 #endif
1862 		c->crc = NULL;
1863 		errno = errmem;
1864 		return ERROR_IO;
1865 	}
1866 	hdd_stats_read(4096);
1867 	errno = 0;
1868 	return STATUS_OK;
1869 }
1870 
chunk_freecrc(chunk * c)1871 static inline void chunk_freecrc(chunk *c) {
1872 #ifdef MMAP_ALLOC
1873 	munmap((void*)(c->crc),4096);
1874 #else
1875 	free(c->crc);
1876 #endif
1877 	c->crc = NULL;
1878 }
1879 
chunk_writecrc(chunk * c)1880 static inline int chunk_writecrc(chunk *c) {
1881 	int ret;
1882 	zassert(pthread_mutex_lock(&folderlock));
1883 	c->owner->needrefresh = 1;
1884 	zassert(pthread_mutex_unlock(&folderlock));
1885 #ifdef USE_PIO
1886 	ret = pwrite(c->fd,c->crc,4096,CHUNKHDRCRC);
1887 #else /* USE_PIO */
1888 	lseek(c->fd,CHUNKHDRCRC,SEEK_SET);
1889 	ret = write(c->fd,c->crc,4096);
1890 #endif /* USE_PIO */
1891 	if (ret!=4096) {
1892 		int errmem = errno;
1893 		mfs_arg_errlog_silent(LOG_WARNING,"chunk_writecrc: file:%s - write error",c->filename);
1894 		errno = errmem;
1895 		return ERROR_IO;
1896 	}
1897 	hdd_stats_write(4096);
1898 	return STATUS_OK;
1899 }
1900 
hdd_test_show_chunks(void)1901 void hdd_test_show_chunks(void) {
1902 	uint32_t hashpos;
1903 	chunk *c;
1904 	zassert(pthread_mutex_lock(&hashlock));
1905 	for (hashpos=0 ; hashpos<HASHSIZE ; hashpos++) {
1906 		for (c=hashtab[hashpos] ; c ; c=c->next) {
1907 			printf("chunk id:%"PRIu64" version:%"PRIu32" state:%u\n",c->chunkid,c->version,c->state);
1908 		}
1909 	}
1910 	zassert(pthread_mutex_unlock(&hashlock));
1911 }
1912 
1913 #if 0
1914 void hdd_test_show_openedchunks(void) {
1915 	dopchunk *cc,*tcc;
1916 	uint32_t dhashpos;
1917 	chunk *c;
1918 	double now;
1919 
1920 	printf("lock doplock\n");
1921 	if (pthread_mutex_lock(&doplock)<0) {
1922 		printf("lock error: %u\n",errno);
1923 	}
1924 	printf("lock ndoplock\n");
1925 	if (pthread_mutex_lock(&ndoplock)<0) {
1926 		printf("lock error: %u\n",errno);
1927 	}
1928 /* append new chunks */
1929 	cc = newdopchunks;
1930 	while (cc) {
1931 		dhashpos = DHASHPOS(cc->chunkid);
1932 		for (tcc=dophashtab[dhashpos] ; tcc && tcc->chunkid!=cc->chunkid ; tcc=tcc->next) {}
1933 		if (tcc) {	// found - ignore
1934 			tcc = cc;
1935 			cc = cc->next;
1936 			free(tcc);
1937 		} else {	// not found - add
1938 			tcc = cc;
1939 			cc = cc->next;
1940 			tcc->next = dophashtab[dhashpos];
1941 			dophashtab[dhashpos] = tcc;
1942 		}
1943 	}
1944 	newdopchunks = NULL;
1945 	printf("unlock ndoplock\n");
1946 	if (pthread_mutex_unlock(&ndoplock)<0) {
1947 		printf("unlock error: %u\n",errno);
1948 	}
1949 /* show all */
1950 	now = monotonic_seconds();
1951 	for (dhashpos=0 ; dhashpos<DHASHSIZE ; dhashpos++) {
1952 		for (cc=dophashtab[dhashpos]; cc ; cc=cc->next) {
1953 			c = hdd_chunk_find(cc->chunkid);
1954 			if (c==NULL) {	// no chunk - delete entry
1955 				printf("id: %"PRIu64" - chunk doesn't exist\n",cc->chunkid);
1956 			} else if (c->crcrefcount>0) {	// io in progress - skip entry
1957 				printf("id: %"PRIu64" - chunk in use (refcount:%u)\n",cc->chunkid,c->crcrefcount);
1958 				hdd_chunk_release(c);
1959 			} else {
1960 #ifdef PRESERVE_BLOCK
1961 				double fdsec,crcsec,blocksec;
1962 				fdsec = c->opento;
1963 				crcsec = c->crcto;
1964 				blocksec = c->blockto;
1965 				if (fdsec>0.0) {
1966 					fdsec -= now;
1967 				}
1968 				if (crcsec>0.0) {
1969 					crcsec -= now;
1970 				}
1971 				if (blocksec>0.0) {
1972 					blocksec -= now;
1973 				}
1974 				printf("id: %"PRIu64" - fd:%d (delay:%.3lfs) crc:%p (delay:%.3lfs) block:%p,blockno:%u (delay:%.3lfs)\n",cc->chunkid,c->fd,fdsec,(void*)(c->crc),crcsec,c->block,c->blockno,blocksec);
1975 #else /* PRESERVE_BLOCK */
1976 				double fdsec,crcsec;
1977 				fdsec = c->opento;
1978 				crcsec = c->crcto;
1979 				if (fdsec>0.0) {
1980 					fdsec -= now;
1981 				}
1982 				if (crcsec>0.0) {
1983 					crcsec -= now;
1984 				}
1985 				printf("id: %"PRIu64" - fd:%d (delay:%.3lfs) crc:%p (delay:%.3lfs)\n",cc->chunkid,c->fd,fdsec,(void*)(c->crc),crcsec);
1986 #endif /* PRESERVE_BLOCK */
1987 				hdd_chunk_release(c);
1988 			}
1989 		}
1990 	}
1991 	printf("unlock doplock\n");
1992 	if (pthread_mutex_unlock(&doplock)<0) {
1993 		printf("unlock error: %u\n",errno);
1994 	}
1995 }
1996 #endif
1997 
hdd_delayed_ops()1998 void hdd_delayed_ops() {
1999 	dopchunk **ccp,*cc,*tcc;
2000 	uint32_t dhashpos;
2001 	chunk *c;
2002 	uint64_t ts,te;
2003 //	int status;
2004 
2005 //	printf("delayed ops: before lock\n");
2006 	zassert(pthread_mutex_lock(&doplock));
2007 	zassert(pthread_mutex_lock(&ndoplock));
2008 //	printf("delayed ops: after lock\n");
2009 /* append new chunks */
2010 	cc = newdopchunks;
2011 	while (cc) {
2012 		dhashpos = DHASHPOS(cc->chunkid);
2013 		for (tcc=dophashtab[dhashpos] ; tcc && tcc->chunkid!=cc->chunkid ; tcc=tcc->next) {}
2014 		if (tcc) {	// found - ignore
2015 			tcc = cc;
2016 			cc = cc->next;
2017 			free(tcc);
2018 		} else {	// not found - add
2019 			tcc = cc;
2020 			cc = cc->next;
2021 			tcc->next = dophashtab[dhashpos];
2022 			dophashtab[dhashpos] = tcc;
2023 		}
2024 	}
2025 	newdopchunks = NULL;
2026 	zassert(pthread_mutex_unlock(&ndoplock));
2027 /* check all */
2028 //	printf("delayed ops: before loop\n");
2029 	for (dhashpos=0 ; dhashpos<DHASHSIZE ; dhashpos++) {
2030 		ccp = dophashtab+dhashpos;
2031 		while ((cc=*ccp)) {
2032 //			printf("find chunk: %llu\n",cc->chunkid);
2033 			c = hdd_chunk_tryfind(cc->chunkid);
2034 //			if (c!=NULL && c!=CHUNKLOCKED) {
2035 //				printf("found chunk: %llu (c->state:%u c->crcrefcount:%u)\n",cc->chunkid,c->state,c->crcrefcount);
2036 //			}
2037 //			c = hdd_chunk_find(cc->chunkid);
2038 			if (c==NULL) {	// no chunk - delete entry
2039 				*ccp = cc->next;
2040 				free(cc);
2041 			} else if (c==CHUNKLOCKED) {	// locked chunk - just ignore
2042 				ccp = &(cc->next);
2043 			} else if (c->crcrefcount>0) {	// io in progress - skip entry
2044 				hdd_chunk_release(c);
2045 				ccp = &(cc->next);
2046 			} else {
2047 				double now;
2048 				if (c->fsyncneeded && DoFsyncBeforeClose) {
2049 					ts = monotonic_nseconds();
2050 #ifdef F_FULLFSYNC
2051 					if (fcntl(c->fd,F_FULLFSYNC)<0) {
2052 						hdd_error_occured(c);
2053 						mfs_arg_errlog_silent(LOG_WARNING,"hdd_delayed_ops: file:%s - fsync (via fcntl) error",c->filename);
2054 						hdd_report_damaged_chunk(c->chunkid);
2055 					}
2056 #else
2057 					if (fsync(c->fd)<0) {
2058 						hdd_error_occured(c);
2059 						mfs_arg_errlog_silent(LOG_WARNING,"hdd_delayed_ops: file:%s - fsync (direct call) error",c->filename);
2060 						hdd_report_damaged_chunk(c->chunkid);
2061 					}
2062 #endif
2063 					te = monotonic_nseconds();
2064 					hdd_stats_datafsync(c->owner,te-ts);
2065 					c->fsyncneeded = 0;
2066 				}
2067 				now = monotonic_seconds();
2068 #ifdef PRESERVE_BLOCK
2069 //				printf("block\n");
2070 				if (c->block!=NULL && c->blockto<now) {
2071 # ifdef MMAP_ALLOC
2072 					munmap((void*)(c->block),MFSBLOCKSIZE);
2073 # else
2074 					free(c->block);
2075 # endif
2076 					c->block = NULL;
2077 					c->blockno = 0xFFFF;
2078 					c->blockto = 0.0;
2079 				}
2080 #endif /* PRESERVE_BLOCK */
2081 //				printf("descriptor\n");
2082 				if (c->fd>=0 && c->opento<now) {
2083 					if (close(c->fd)<0) {
2084 						hdd_error_occured(c);	// uses and preserves errno !!!
2085 						mfs_arg_errlog_silent(LOG_WARNING,"hdd_delayed_ops: file:%s - close error",c->filename);
2086 						hdd_report_damaged_chunk(c->chunkid);
2087 					}
2088 					c->fd = -1;
2089 					c->opento = 0.0;
2090 					hdd_open_files_handle(OF_AFTER_CLOSE);
2091 				}
2092 //				printf("crc\n");
2093 				if (c->crc!=NULL && c->crcto<now) {
2094 					if (c->crcchanged) {
2095 						syslog(LOG_ERR,"serious error: crc changes lost (chunk:%016"PRIX64"_%08"PRIX32")",c->chunkid,c->version);
2096 					}
2097 //					printf("chunk %llu - free crc record\n",c->chunkid);
2098 					chunk_freecrc(c);
2099 					c->crcto = 0.0;
2100 				}
2101 #ifdef PRESERVE_BLOCK
2102 				if (c->fd<0 && c->crc==NULL && c->block==NULL) {
2103 #else /* PRESERVE_BLOCK */
2104 				if (c->fd<0 && c->crc==NULL) {
2105 #endif /* PRESERVE_BLOCK */
2106 					*ccp = cc->next;
2107 					free(cc);
2108 				} else {
2109 					ccp = &(cc->next);
2110 				}
2111 				hdd_chunk_release(c);
2112 			}
2113 		}
2114 	}
2115 //	printf("delayed ops: after loop , before unlock\n");
2116 	zassert(pthread_mutex_unlock(&doplock));
2117 //	printf("delayed ops: after unlock\n");
2118 }
2119 
2120 static int hdd_io_begin(chunk *c,int newflag) {
2121 	dopchunk *cc;
2122 	int status;
2123 	int add;
2124 
2125 //	sassert(c->state==CH_LOCKED||c->state==CH_TOBEDELETED);
2126 
2127 //	syslog(LOG_NOTICE,"chunk: %"PRIu64" - before io",c->chunkid);
2128 	hdd_chunk_testmove(c);
2129 	if (c->crcrefcount==0) {
2130 #ifdef PRESERVE_BLOCK
2131 		add = (c->fd<0 && c->crc==NULL && c->block==NULL);
2132 #else /* PRESERVE_BLOCK */
2133 		add = (c->fd<0 && c->crc==NULL);
2134 #endif /* PRESERVE_BLOCK */
2135 		if (c->fd<0) {
2136 			hdd_open_files_handle(OF_BEFORE_OPEN);
2137 			if (newflag) {
2138 				c->fd = open(c->filename,O_RDWR | O_TRUNC | O_CREAT,0666);
2139 			} else {
2140 				if (c->todel<2) {
2141 					c->fd = open(c->filename,O_RDWR);
2142 				} else {
2143 					c->fd = open(c->filename,O_RDONLY);
2144 				}
2145 			}
2146 			if (c->fd<0) {
2147 				int errmem = errno;
2148 				mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_begin: file:%s - open error",c->filename);
2149 				hdd_open_files_handle(OF_AFTER_CLOSE);
2150 				errno = errmem;
2151 				return ERROR_IO;
2152 			}
2153 			c->fsyncneeded = 0;
2154 		}
2155 		if (c->crc==NULL) {
2156 			if (newflag) {
2157 				chunk_emptycrc(c);
2158 			} else {
2159 				status = chunk_readcrc(c);
2160 				if (status!=STATUS_OK) {
2161 					int errmem = errno;
2162 					if (add) {
2163 						close(c->fd);
2164 						c->fd=-1;
2165 						hdd_open_files_handle(OF_AFTER_CLOSE);
2166 					}
2167 					mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_begin: file:%s - read error",c->filename);
2168 					errno = errmem;
2169 					return status;
2170 				}
2171 			}
2172 			c->crcchanged = 0;
2173 		}
2174 #ifdef PRESERVE_BLOCK
2175 		if (c->block==NULL) {
2176 # ifdef MMAP_ALLOC
2177 			c->block = (uint8_t*)mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
2178 # else
2179 			c->block = (uint8_t*)malloc(MFSBLOCKSIZE);
2180 # endif
2181 //			syslog(LOG_WARNING,"chunk: %016"PRIX64", block:%p",c->chunkid,c->block);
2182 			passert(c->block);
2183 			c->blockno = 0xFFFF;
2184 		}
2185 #endif /* PRESERVE_BLOCK */
2186 		if (add) {
2187 			cc = malloc(sizeof(dopchunk));
2188 			passert(cc);
2189 			cc->chunkid = c->chunkid;
2190 			zassert(pthread_mutex_lock(&ndoplock));
2191 			cc->next = newdopchunks;
2192 			newdopchunks = cc;
2193 			zassert(pthread_mutex_unlock(&ndoplock));
2194 		}
2195 	}
2196 	c->crcrefcount++;
2197 	errno = 0;
2198 	return STATUS_OK;
2199 }
2200 
2201 static int hdd_io_end(chunk *c) {
2202 	int status;
2203 //	uint64_t ts,te;
2204 
2205 //	sassert(c->state==CH_LOCKED||c->state==CH_TOBEDELETED);
2206 
2207 //	syslog(LOG_NOTICE,"chunk: %"PRIu64" - after io",c->chunkid);
2208 	if (c->crcchanged) {
2209 		status = chunk_writecrc(c);
2210 		c->crcchanged = 0;
2211 		if (status!=STATUS_OK) {
2212 			int errmem = errno;
2213 			mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_end: file:%s - write error",c->filename);
2214 			errno = errmem;
2215 			return status;
2216 		}
2217 		c->fsyncneeded = 1;
2218 	}
2219 	c->crcrefcount--;
2220 	if (c->crcrefcount==0) {
2221 		double now = monotonic_seconds();
2222 /*
2223 		if (OPEN_DELAY==0) {
2224 			if (c->fsyncneeded) {
2225 				ts = monotonic_nseconds();
2226 #ifdef F_FULLFSYNC
2227 				if (fcntl(c->fd,F_FULLFSYNC)<0) {
2228 					int errmem = errno;
2229 					mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_end: file:%s - fsync (via fcntl) error",c->filename);
2230 					errno = errmem;
2231 					return ERROR_IO;
2232 				}
2233 #else
2234 				if (fsync(c->fd)<0) {
2235 					int errmem = errno;
2236 					mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_end: file:%s - fsync (direct call) error",c->filename);
2237 					errno = errmem;
2238 					return ERROR_IO;
2239 				}
2240 #endif
2241 				te = monotonic_nseconds();
2242 				hdd_stats_datafsync(c->owner,te-ts);
2243 				c->fsyncneeded = 0;
2244 			}
2245 			if (close(c->fd)<0) {
2246 				int errmem = errno;
2247 				c->fd = -1;
2248 				mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_end: file:%s - close error",c->filename);
2249 				errno = errmem;
2250 				return ERROR_IO;
2251 			}
2252 			c->fd = -1;
2253 		} else {
2254 */
2255 		c->opento = now + OPEN_DELAY;
2256 //		}
2257 		c->crcto = now + CRC_DELAY;
2258 #ifdef PRESERVE_BLOCK
2259 		c->blockto = now + BLOCK_DELAY;
2260 #endif
2261 	}
2262 	errno = 0;
2263 	return STATUS_OK;
2264 }
2265 
2266 
2267 
2268 
2269 /* I/O operations */
2270 
2271 int hdd_open(uint64_t chunkid,uint32_t version) {
2272 	int status;
2273 	chunk *c;
2274 	c = hdd_chunk_find(chunkid);
2275 	if (c==NULL) {
2276 		return ERROR_NOCHUNK;
2277 	}
2278 	if (c->version!=version && version>0) {
2279 		hdd_chunk_release(c);
2280 		return ERROR_WRONGVERSION;
2281 	}
2282 	status = hdd_io_begin(c,0);
2283 	if (status!=STATUS_OK) {
2284 		hdd_error_occured(c);	// uses and preserves errno !!!
2285 		hdd_report_damaged_chunk(chunkid);
2286 	}
2287 	hdd_chunk_release(c);
2288 //	if (status==STATUS_OK) {
2289 //		syslog(LOG_NOTICE,"chunk %08"PRIX64" opened",chunkid);
2290 //	}
2291 	return status;
2292 }
2293 
2294 int hdd_close(uint64_t chunkid) {
2295 	int status;
2296 	chunk *c;
2297 	c = hdd_chunk_find(chunkid);
2298 	if (c==NULL) {
2299 		return ERROR_NOCHUNK;
2300 	}
2301 	status = hdd_io_end(c);
2302 	if (status!=STATUS_OK) {
2303 		hdd_error_occured(c);	// uses and preserves errno !!!
2304 		hdd_report_damaged_chunk(chunkid);
2305 	}
2306 	hdd_chunk_release(c);
2307 //	if (status==STATUS_OK) {
2308 //		syslog(LOG_NOTICE,"chunk %08"PRIX64" closed",chunkid);
2309 //	}
2310 	return status;
2311 }
2312 
2313 int hdd_read(uint64_t chunkid,uint32_t version,uint16_t blocknum,uint8_t *buffer,uint32_t offset,uint32_t size,uint8_t *crcbuff) {
2314 	chunk *c;
2315 	int ret;
2316 	int error;
2317 	const uint8_t *rcrcptr;
2318 	uint32_t crc,bcrc,precrc,postcrc,combinedcrc;
2319 	uint64_t ts,te;
2320 #ifndef PRESERVE_BLOCK
2321 	uint8_t *blockbuffer;
2322 	blockbuffer = pthread_getspecific(blockbufferkey);
2323 	if (blockbuffer==NULL) {
2324 # ifdef MMAP_ALLOC
2325 		blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
2326 # else
2327 		blockbuffer = malloc(MFSBLOCKSIZE);
2328 # endif
2329 		passert(blockbuffer);
2330 		zassert(pthread_setspecific(blockbufferkey,blockbuffer));
2331 	}
2332 #endif /* PRESERVE_BLOCK */
2333 	c = hdd_chunk_find(chunkid);
2334 	if (c==NULL) {
2335 		return ERROR_NOCHUNK;
2336 	}
2337 	if (c->version!=version && version>0) {
2338 		hdd_chunk_release(c);
2339 		return ERROR_WRONGVERSION;
2340 	}
2341 	if (blocknum>=MFSBLOCKSINCHUNK) {
2342 		hdd_chunk_release(c);
2343 		return ERROR_BNUMTOOBIG;
2344 	}
2345 	if (size>MFSBLOCKSIZE) {
2346 		hdd_chunk_release(c);
2347 		return ERROR_WRONGSIZE;
2348 	}
2349 	if ((offset>=MFSBLOCKSIZE) || (offset+size>MFSBLOCKSIZE)) {
2350 		hdd_chunk_release(c);
2351 		return ERROR_WRONGOFFSET;
2352 	}
2353 	if (blocknum>=c->blocks) {
2354 		memset(buffer,0,size);
2355 		if (size==MFSBLOCKSIZE) {
2356 			crc = emptyblockcrc;
2357 		} else {
2358 			crc = mycrc32_zeroblock(0,size);
2359 		}
2360 		put32bit(&crcbuff,crc);
2361 		hdd_chunk_release(c);
2362 		return STATUS_OK;
2363 	}
2364 	if (offset==0 && size==MFSBLOCKSIZE) {
2365 #ifdef PRESERVE_BLOCK
2366 		if (c->blockno==blocknum) {
2367 			memcpy(buffer,c->block,MFSBLOCKSIZE);
2368 			ret = MFSBLOCKSIZE;
2369 			error = 0;
2370 		} else {
2371 #endif /* PRESERVE_BLOCK */
2372 		ts = monotonic_nseconds();
2373 #ifdef USE_PIO
2374 		ret = pread(c->fd,buffer,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2375 #else /* USE_PIO */
2376 		lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2377 		ret = read(c->fd,buffer,MFSBLOCKSIZE);
2378 #endif /* USE_PIO */
2379 		error = errno;
2380 		te = monotonic_nseconds();
2381 		hdd_stats_dataread(c->owner,MFSBLOCKSIZE,te-ts);
2382 #ifdef PRESERVE_BLOCK
2383 			c->blockno = blocknum;
2384 			memcpy(c->block,buffer,MFSBLOCKSIZE);
2385 		}
2386 #endif /* PRESERVE_BLOCK */
2387 		crc = mycrc32(0,buffer,MFSBLOCKSIZE);
2388 		rcrcptr = (c->crc)+(4*blocknum);
2389 		bcrc = get32bit(&rcrcptr);
2390 		if (bcrc!=crc) {
2391 			errno = error;
2392 			hdd_error_occured(c);	// uses and preserves errno !!!
2393 			syslog(LOG_WARNING,"read_block_from_chunk: file:%s - crc error",c->filename);
2394 			hdd_report_damaged_chunk(chunkid);
2395 			hdd_chunk_release(c);
2396 			return ERROR_CRC;
2397 		}
2398 		if (ret!=MFSBLOCKSIZE) {
2399 			errno = error;
2400 			hdd_error_occured(c);	// uses and preserves errno !!!
2401 			mfs_arg_errlog_silent(LOG_WARNING,"read_block_from_chunk: file:%s - read error",c->filename);
2402 			hdd_report_damaged_chunk(chunkid);
2403 			hdd_chunk_release(c);
2404 			return ERROR_IO;
2405 		}
2406 	} else {
2407 #ifdef PRESERVE_BLOCK
2408 		if (c->blockno != blocknum) {
2409 			ts = monotonic_nseconds();
2410 #ifdef USE_PIO
2411 			ret = pread(c->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2412 #else /* USE_PIO */
2413 			lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2414 			ret = read(c->fd,c->block,MFSBLOCKSIZE);
2415 #endif /* USE_PIO */
2416 			error = errno;
2417 			te = monotonic_nseconds();
2418 			hdd_stats_dataread(c->owner,MFSBLOCKSIZE,te-ts);
2419 			c->blockno = blocknum;
2420 		} else {
2421 			ret = MFSBLOCKSIZE;
2422 			error = 0;
2423 		}
2424 		precrc = mycrc32(0,c->block,offset);
2425 		crc = mycrc32(0,c->block+offset,size);
2426 		postcrc = mycrc32(0,c->block+offset+size,MFSBLOCKSIZE-(offset+size));
2427 #else /* PRESERVE_BLOCK */
2428 		ts = monotonic_nseconds();
2429 #ifdef USE_PIO
2430 		ret = pread(c->fd,blockbuffer,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2431 #else /* USE_PIO */
2432 		lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2433 		ret = read(c->fd,blockbuffer,MFSBLOCKSIZE);
2434 		error = errno;
2435 #endif /* USE_PIO */
2436 		te = monotonic_nseconds();
2437 		hdd_stats_dataread(c->owner,MFSBLOCKSIZE,te-ts);
2438 //		crc = mycrc32(0,blockbuffer+offset,size);	// first calc crc for piece
2439 		precrc = mycrc32(0,blockbuffer,offset);
2440 		crc = mycrc32(0,blockbuffer+offset,size);
2441 		postcrc = mycrc32(0,blockbuffer+offset+size,MFSBLOCKSIZE-(offset+size));
2442 #endif /* PRESERVE_BLOCK */
2443 		if (offset==0) {
2444 			combinedcrc = mycrc32_combine(crc,postcrc,MFSBLOCKSIZE-(offset+size));
2445 		} else {
2446 			combinedcrc = mycrc32_combine(precrc,crc,size);
2447 			if ((offset+size)<MFSBLOCKSIZE) {
2448 				combinedcrc = mycrc32_combine(combinedcrc,postcrc,MFSBLOCKSIZE-(offset+size));
2449 			}
2450 		}
2451 		rcrcptr = (c->crc)+(4*blocknum);
2452 		bcrc = get32bit(&rcrcptr);
2453 //		if (bcrc!=mycrc32(0,blockbuffer,MFSBLOCKSIZE)) {
2454 		if (bcrc!=combinedcrc) {
2455 			errno = error;
2456 			hdd_error_occured(c);	// uses and preserves errno !!!
2457 			syslog(LOG_WARNING,"read_block_from_chunk: file:%s - crc error",c->filename);
2458 			hdd_report_damaged_chunk(chunkid);
2459 			hdd_chunk_release(c);
2460 			return ERROR_CRC;
2461 		}
2462 		if (ret!=MFSBLOCKSIZE) {
2463 			errno = error;
2464 			hdd_error_occured(c);	// uses and preserves errno !!!
2465 			mfs_arg_errlog_silent(LOG_WARNING,"read_block_from_chunk: file:%s - read error",c->filename);
2466 			hdd_report_damaged_chunk(chunkid);
2467 			hdd_chunk_release(c);
2468 			return ERROR_IO;
2469 		}
2470 #ifdef PRESERVE_BLOCK
2471 		memcpy(buffer,c->block+offset,size);
2472 #else /* PRESERVE_BLOCK */
2473 		memcpy(buffer,blockbuffer+offset,size);
2474 #endif /* PRESERVE_BLOCK */
2475 	}
2476 	put32bit(&crcbuff,crc);
2477 	hdd_chunk_release(c);
2478 	return STATUS_OK;
2479 }
2480 
2481 int hdd_write(uint64_t chunkid,uint32_t version,uint16_t blocknum,const uint8_t *buffer,uint32_t offset,uint32_t size,const uint8_t *crcbuff) {
2482 	chunk *c;
2483 	int ret;
2484 	int error;
2485 	uint8_t *wcrcptr;
2486 	const uint8_t *rcrcptr;
2487 	uint32_t crc,bcrc,precrc,postcrc,combinedcrc,chcrc;
2488 	uint32_t i;
2489 	uint64_t ts,te;
2490 #ifndef PRESERVE_BLOCK
2491 	uint8_t *blockbuffer;
2492 	blockbuffer = pthread_getspecific(blockbufferkey);
2493 	if (blockbuffer==NULL) {
2494 # ifdef MMAP_ALLOC
2495 		blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
2496 # else
2497 		blockbuffer = malloc(MFSBLOCKSIZE);
2498 # endif
2499 		passert(blockbuffer);
2500 		zassert(pthread_setspecific(blockbufferkey,blockbuffer));
2501 	}
2502 #endif /* PRESERVE_BLOCK */
2503 	c = hdd_chunk_find(chunkid);
2504 	if (c==NULL) {
2505 		return ERROR_NOCHUNK;
2506 	}
2507 	if (c->version!=version && version>0) {
2508 		hdd_chunk_release(c);
2509 		return ERROR_WRONGVERSION;
2510 	}
2511 	if (blocknum>=MFSBLOCKSINCHUNK) {
2512 		hdd_chunk_release(c);
2513 		return ERROR_BNUMTOOBIG;
2514 	}
2515 	if (size>MFSBLOCKSIZE) {
2516 		hdd_chunk_release(c);
2517 		return ERROR_WRONGSIZE;
2518 	}
2519 	if ((offset>=MFSBLOCKSIZE) || (offset+size>MFSBLOCKSIZE)) {
2520 		hdd_chunk_release(c);
2521 		return ERROR_WRONGOFFSET;
2522 	}
2523 	crc = get32bit(&crcbuff);
2524 	if (crc!=mycrc32(0,buffer,size)) {
2525 		hdd_chunk_release(c);
2526 		return ERROR_CRC;
2527 	}
2528 	if (offset==0 && size==MFSBLOCKSIZE) {
2529 		if (blocknum>=c->blocks) {
2530 			wcrcptr = (c->crc)+(4*(c->blocks));
2531 			for (i=c->blocks ; i<blocknum ; i++) {
2532 				put32bit(&wcrcptr,emptyblockcrc);
2533 			}
2534 			c->blocks = blocknum+1;
2535 		}
2536 		ts = monotonic_nseconds();
2537 #ifdef USE_PIO
2538 		ret = pwrite(c->fd,buffer,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2539 #else /* USE_PIO */
2540 		lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2541 		ret = write(c->fd,buffer,MFSBLOCKSIZE);
2542 #endif /* USE_PIO */
2543 		error = errno;
2544 		te = monotonic_nseconds();
2545 		hdd_stats_datawrite(c->owner,MFSBLOCKSIZE,te-ts);
2546 		if (crc!=mycrc32(0,buffer,MFSBLOCKSIZE)) {
2547 			errno = error;
2548 			hdd_error_occured(c);
2549 			syslog(LOG_WARNING,"write_block_to_chunk: file:%s - crc error",c->filename);
2550 			hdd_report_damaged_chunk(chunkid);
2551 			hdd_chunk_release(c);
2552 			return ERROR_CRC;
2553 		}
2554 		wcrcptr = (c->crc)+(4*blocknum);
2555 		put32bit(&wcrcptr,crc);
2556 		c->crcchanged = 1;
2557 		if (ret!=MFSBLOCKSIZE) {
2558 			if (error==0 || error==EAGAIN) {
2559 				error=ENOSPC;
2560 			}
2561 			errno = error;
2562 			hdd_error_occured(c);	// uses and preserves errno !!!
2563 			mfs_arg_errlog_silent(LOG_WARNING,"write_block_to_chunk: file:%s - write error",c->filename);
2564 			hdd_report_damaged_chunk(chunkid);
2565 			hdd_chunk_release(c);
2566 			return ERROR_IO;
2567 		}
2568 #ifdef PRESERVE_BLOCK
2569 		memcpy(c->block,buffer,MFSBLOCKSIZE);
2570 		c->blockno = blocknum;
2571 #endif /* PRESERVE_BLOCK */
2572 	} else {
2573 		if (blocknum<c->blocks) {
2574 #ifdef PRESERVE_BLOCK
2575 			if (c->blockno != blocknum) {
2576 				ts = monotonic_nseconds();
2577 #ifdef USE_PIO
2578 				ret = pread(c->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2579 #else /* USE_PIO */
2580 				lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2581 				ret = read(c->fd,c->block,MFSBLOCKSIZE);
2582 #endif /* USE_PIO */
2583 				error = errno;
2584 				te = monotonic_nseconds();
2585 				hdd_stats_dataread(c->owner,MFSBLOCKSIZE,te-ts);
2586 				c->blockno = blocknum;
2587 			} else {
2588 				ret = MFSBLOCKSIZE;
2589 				error = 0;
2590 			}
2591 #else /* PRESERVE_BLOCK */
2592 			ts = monotonic_nseconds();
2593 #ifdef USE_PIO
2594 			ret = pread(c->fd,blockbuffer,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2595 #else /* USE_PIO */
2596 			lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2597 			ret = read(c->fd,blockbuffer,MFSBLOCKSIZE);
2598 #endif /* USE_PIO */
2599 			error = errno;
2600 			te = monotonic_nseconds();
2601 			hdd_stats_dataread(c->owner,MFSBLOCKSIZE,te-ts);
2602 #endif /* PRESERVE_BLOCK */
2603 			if (ret!=MFSBLOCKSIZE) {
2604 				errno = error;
2605 				hdd_error_occured(c);	// uses and preserves errno !!!
2606 				mfs_arg_errlog_silent(LOG_WARNING,"write_block_to_chunk: file:%s - read error",c->filename);
2607 				hdd_report_damaged_chunk(chunkid);
2608 				hdd_chunk_release(c);
2609 				return ERROR_IO;
2610 			}
2611 #ifdef PRESERVE_BLOCK
2612 			precrc = mycrc32(0,c->block,offset);
2613 			chcrc = mycrc32(0,c->block+offset,size);
2614 			postcrc = mycrc32(0,c->block+offset+size,MFSBLOCKSIZE-(offset+size));
2615 #else /* PRESERVE_BLOCK */
2616 			precrc = mycrc32(0,blockbuffer,offset);
2617 			chcrc = mycrc32(0,blockbuffer+offset,size);
2618 			postcrc = mycrc32(0,blockbuffer+offset+size,MFSBLOCKSIZE-(offset+size));
2619 #endif /* PRESERVE_BLOCK */
2620 			if (offset==0) {
2621 				combinedcrc = mycrc32_combine(chcrc,postcrc,MFSBLOCKSIZE-(offset+size));
2622 			} else {
2623 				combinedcrc = mycrc32_combine(precrc,chcrc,size);
2624 				if ((offset+size)<MFSBLOCKSIZE) {
2625 					combinedcrc = mycrc32_combine(combinedcrc,postcrc,MFSBLOCKSIZE-(offset+size));
2626 				}
2627 			}
2628 			rcrcptr = (c->crc)+(4*blocknum);
2629 			bcrc = get32bit(&rcrcptr);
2630 //			if (bcrc!=mycrc32(0,blockbuffer,MFSBLOCKSIZE)) {
2631 			if (bcrc!=combinedcrc) {
2632 				errno = error;
2633 				hdd_error_occured(c);	// uses and preserves errno !!!
2634 				syslog(LOG_WARNING,"write_block_to_chunk: file:%s - crc error",c->filename);
2635 				hdd_report_damaged_chunk(chunkid);
2636 				hdd_chunk_release(c);
2637 				return ERROR_CRC;
2638 			}
2639 		} else {
2640 			if (ftruncate(c->fd,CHUNKHDRSIZE+(((uint32_t)(blocknum+1))<<MFSBLOCKBITS))<0) {
2641 				hdd_error_occured(c);	// uses and preserves errno !!!
2642 				mfs_arg_errlog_silent(LOG_WARNING,"write_block_to_chunk: file:%s - ftruncate error",c->filename);
2643 				hdd_report_damaged_chunk(chunkid);
2644 				hdd_chunk_release(c);
2645 				return ERROR_IO;
2646 			}
2647 			wcrcptr = (c->crc)+(4*(c->blocks));
2648 			for (i=c->blocks ; i<blocknum ; i++) {
2649 				put32bit(&wcrcptr,emptyblockcrc);
2650 			}
2651 			c->blocks = blocknum+1;
2652 #ifdef PRESERVE_BLOCK
2653 			memset(c->block,0,MFSBLOCKSIZE);
2654 			c->blockno = blocknum;
2655 #else /* PRESERVE_BLOCK */
2656 			memset(blockbuffer,0,MFSBLOCKSIZE);
2657 #endif /* PRESERVE_BLOCK */
2658 			precrc = mycrc32_zeroblock(0,offset);
2659 			postcrc = mycrc32_zeroblock(0,MFSBLOCKSIZE-(offset+size));
2660 		}
2661 #ifdef PRESERVE_BLOCK
2662 		memcpy(c->block+offset,buffer,size);
2663 		ts = monotonic_nseconds();
2664 #ifdef USE_PIO
2665 		ret = pwrite(c->fd,c->block+offset,size,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS)+offset);
2666 #else /* USE_PIO */
2667 		lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS)+offset,SEEK_SET);
2668 		ret = write(c->fd,c->block+offset,size);
2669 #endif /* USE_PIO */
2670 		error = errno;
2671 		te = monotonic_nseconds();
2672 		hdd_stats_datawrite(c->owner,size,te-ts);
2673 		chcrc = mycrc32(0,c->block+offset,size);
2674 #else /* PRESERVE_BLOCK */
2675 		memcpy(blockbuffer+offset,buffer,size);
2676 		ts = monotonic_nseconds();
2677 #ifdef USE_PIO
2678 		ret = pwrite(c->fd,blockbuffer+offset,size,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS)+offset);
2679 #else /* USE_PIO */
2680 		lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS)+offset,SEEK_SET);
2681 		ret = write(c->fd,blockbuffer+offset,size);
2682 #endif /* USE_PIO */
2683 		error = errno;
2684 		te = monotonic_nseconds();
2685 		hdd_stats_datawrite(c->owner,size,te-ts);
2686 		chcrc = mycrc32(0,blockbuffer+offset,size);
2687 #endif /* PRESERVE_BLOCK */
2688 		if (offset==0) {
2689 			combinedcrc = mycrc32_combine(chcrc,postcrc,MFSBLOCKSIZE-(offset+size));
2690 		} else {
2691 			combinedcrc = mycrc32_combine(precrc,chcrc,size);
2692 			if ((offset+size)<MFSBLOCKSIZE) {
2693 				combinedcrc = mycrc32_combine(combinedcrc,postcrc,MFSBLOCKSIZE-(offset+size));
2694 			}
2695 		}
2696 		wcrcptr = (c->crc)+(4*blocknum);
2697 //		bcrc = mycrc32(0,blockbuffer,MFSBLOCKSIZE);
2698 //		put32bit(&wcrcptr,bcrc);
2699 		put32bit(&wcrcptr,combinedcrc);
2700 		c->crcchanged = 1;
2701 //		if (crc!=mycrc32(0,blockbuffer+offset,size)) {
2702 		if (crc!=chcrc) {
2703 			errno = error;
2704 			hdd_error_occured(c);	// uses and preserves errno !!!
2705 			syslog(LOG_WARNING,"write_block_to_chunk: file:%s - crc error",c->filename);
2706 			hdd_report_damaged_chunk(chunkid);
2707 			hdd_chunk_release(c);
2708 			return ERROR_CRC;
2709 		}
2710 		if (ret!=(int)size) {
2711 			if (error==0 || error==EAGAIN) {
2712 				error=ENOSPC;
2713 			}
2714 			errno = error;
2715 			hdd_error_occured(c);	// uses and preserves errno !!!
2716 			mfs_arg_errlog_silent(LOG_WARNING,"write_block_to_chunk: file:%s - write error",c->filename);
2717 			hdd_report_damaged_chunk(chunkid);
2718 			hdd_chunk_release(c);
2719 			return ERROR_IO;
2720 		}
2721 	}
2722 //#warning TEST
2723 //	if ((random()&0x1F)==0) {
2724 //		syslog(LOG_NOTICE,"BAM BAM BAM");
2725 //		portable_usleep(500000);
2726 //	}
2727 	hdd_chunk_release(c);
2728 	return STATUS_OK;
2729 }
2730 
2731 
2732 
2733 /* chunk info */
2734 /*
2735 int hdd_check_version(uint64_t chunkid,uint32_t version) {
2736 	chunk *c;
2737 	c = hdd_chunk_find(chunkid);
2738 	if (c==NULL) {
2739 		return ERROR_NOCHUNK;
2740 	}
2741 	if (c->version!=version && version>0) {
2742 		hdd_chunk_release(c);
2743 		return ERROR_WRONGVERSION;
2744 	}
2745 	hdd_chunk_release(c);
2746 	return STATUS_OK;
2747 }
2748 */
2749 int hdd_get_blocks(uint64_t chunkid,uint32_t version,uint8_t *blocks_buff) {
2750 	chunk *c;
2751 	c = hdd_chunk_find(chunkid);
2752 	if (c==NULL) {
2753 		return ERROR_NOCHUNK;
2754 	}
2755 	if (c->version!=version && version>0) {
2756 		hdd_chunk_release(c);
2757 		return ERROR_WRONGVERSION;
2758 	}
2759 	put16bit(&blocks_buff,c->blocks);
2760 	hdd_chunk_release(c);
2761 	return STATUS_OK;
2762 }
2763 
2764 int hdd_get_checksum(uint64_t chunkid,uint32_t version,uint8_t *checksum_buff) {
2765 	int status;
2766 	uint32_t i;
2767 	uint32_t chksum;
2768 	chunk *c;
2769 	c = hdd_chunk_find(chunkid);
2770 	if (c==NULL) {
2771 		return ERROR_NOCHUNK;
2772 	}
2773 	if (c->version!=version && version>0) {
2774 		hdd_chunk_release(c);
2775 		return ERROR_WRONGVERSION;
2776 	}
2777 	status = hdd_io_begin(c,0);
2778 	if (status!=STATUS_OK) {
2779 		hdd_error_occured(c);	// uses and preserves errno !!!
2780 		hdd_report_damaged_chunk(chunkid);
2781 		hdd_chunk_release(c);
2782 		return status;
2783 	}
2784 	chksum = 1;
2785 	for (i=0 ; i<1024 ; i++) {
2786 		chksum *= 426265243;
2787 		chksum ^= c->crc[i];
2788 	}
2789 	put32bit(&checksum_buff,chksum);
2790 	status = hdd_io_end(c);
2791 	if (status!=STATUS_OK) {
2792 		hdd_error_occured(c);	// uses and preserves errno !!!
2793 		hdd_report_damaged_chunk(chunkid);
2794 		hdd_chunk_release(c);
2795 		return status;
2796 	}
2797 	hdd_chunk_release(c);
2798 	return STATUS_OK;
2799 }
2800 
2801 int hdd_get_checksum_tab(uint64_t chunkid,uint32_t version,uint8_t *checksum_tab) {
2802 	int status;
2803 	uint32_t i;
2804 	chunk *c;
2805 	c = hdd_chunk_find(chunkid);
2806 	if (c==NULL) {
2807 		return ERROR_NOCHUNK;
2808 	}
2809 	if (c->version!=version && version>0) {
2810 		hdd_chunk_release(c);
2811 		return ERROR_WRONGVERSION;
2812 	}
2813 	status = hdd_io_begin(c,0);
2814 	if (status!=STATUS_OK) {
2815 		hdd_error_occured(c);	// uses and preserves errno !!!
2816 		hdd_report_damaged_chunk(chunkid);
2817 		hdd_chunk_release(c);
2818 		return status;
2819 	}
2820 	for (i=0 ; i<1024 ; i++) {
2821 		put32bit(&checksum_tab,c->crc[i]);
2822 	}
2823 	status = hdd_io_end(c);
2824 	if (status!=STATUS_OK) {
2825 		hdd_error_occured(c);	// uses and preserves errno !!!
2826 		hdd_report_damaged_chunk(chunkid);
2827 		hdd_chunk_release(c);
2828 		return status;
2829 	}
2830 	hdd_chunk_release(c);
2831 	return STATUS_OK;
2832 }
2833 
2834 
2835 
2836 
2837 
2838 /* chunk operations */
2839 
2840 static int hdd_int_create(uint64_t chunkid,uint32_t version) {
2841 	folder *f;
2842 	chunk *c;
2843 	int status;
2844 	uint8_t *ptr;
2845 #ifdef PRESERVE_BLOCK
2846 	uint8_t hdrbuffer[CHUNKHDRSIZE];
2847 #else /* PRESERVE_BLOCK */
2848 	uint8_t *hdrbuffer;
2849 #endif /* PRESERVE_BLOCK */
2850 
2851 	zassert(pthread_mutex_lock(&folderlock));
2852 	f = hdd_getfolder();
2853 	if (f==NULL) {
2854 		zassert(pthread_mutex_unlock(&folderlock));
2855 		return ERROR_NOSPACE;
2856 	}
2857 	c = hdd_chunk_create(f,chunkid,version);
2858 	zassert(pthread_mutex_unlock(&folderlock));
2859 	if (c==NULL) {
2860 		return ERROR_CHUNKEXIST;
2861 	}
2862 
2863 #ifndef PRESERVE_BLOCK
2864 	hdrbuffer = pthread_getspecific(hdrbufferkey);
2865 	if (hdrbuffer==NULL) {
2866 		hdrbuffer = malloc(CHUNKHDRSIZE);
2867 		passert(hdrbuffer);
2868 		zassert(pthread_setspecific(hdrbufferkey,hdrbuffer));
2869 	}
2870 #endif /* PRESERVE_BLOCK */
2871 
2872 	status = hdd_io_begin(c,1);
2873 	if (status!=STATUS_OK) {
2874 		hdd_error_occured(c);	// uses and preserves errno !!!
2875 		hdd_chunk_delete(c);
2876 		return ERROR_IO;
2877 	}
2878 	memset(hdrbuffer,0,CHUNKHDRSIZE);
2879 	memcpy(hdrbuffer,MFSSIGNATURE "C 1.0",8);
2880 	ptr = hdrbuffer+8;
2881 	put64bit(&ptr,chunkid);
2882 	put32bit(&ptr,version);
2883 	if (write(c->fd,hdrbuffer,CHUNKHDRSIZE)!=CHUNKHDRSIZE) {
2884 		hdd_error_occured(c);	// uses and preserves errno !!!
2885 		mfs_arg_errlog_silent(LOG_WARNING,"create_newchunk: file:%s - write error",c->filename);
2886 		hdd_io_end(c);
2887 		unlink(c->filename);
2888 		hdd_chunk_delete(c);
2889 		return ERROR_IO;
2890 	}
2891 	hdd_stats_write(CHUNKHDRSIZE);
2892 	status = hdd_io_end(c);
2893 	if (status!=STATUS_OK) {
2894 		hdd_error_occured(c);	// uses and preserves errno !!!
2895 		unlink(c->filename);
2896 		hdd_chunk_delete(c);
2897 		return status;
2898 	}
2899 	hdd_chunk_release(c);
2900 	return STATUS_OK;
2901 }
2902 
2903 static int hdd_int_test(uint64_t chunkid,uint32_t version) {
2904 	const uint8_t *ptr;
2905 	uint16_t block;
2906 	uint32_t bcrc;
2907 	int32_t retsize;
2908 	int status;
2909 	chunk *c;
2910 #ifndef PRESERVE_BLOCK
2911 	uint8_t *blockbuffer;
2912 	blockbuffer = pthread_getspecific(blockbufferkey);
2913 	if (blockbuffer==NULL) {
2914 # ifdef MMAP_ALLOC
2915 		blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
2916 # else
2917 		blockbuffer = malloc(MFSBLOCKSIZE);
2918 # endif
2919 		passert(blockbuffer);
2920 		zassert(pthread_setspecific(blockbufferkey,blockbuffer));
2921 	}
2922 #endif /* PRESERVE_BLOCK */
2923 	c = hdd_chunk_find(chunkid);
2924 	if (c==NULL) {
2925 		return ERROR_NOCHUNK;
2926 	}
2927 	if (c->version!=version && version>0) {
2928 		hdd_chunk_release(c);
2929 		return ERROR_WRONGVERSION;
2930 	}
2931 	status = hdd_io_begin(c,0);
2932 	if (status!=STATUS_OK) {
2933 		hdd_error_occured(c);	// uses and preserves errno !!!
2934 		hdd_chunk_release(c);
2935 		return status;
2936 	}
2937 	lseek(c->fd,CHUNKHDRSIZE,SEEK_SET);
2938 	ptr = c->crc;
2939 	for (block=0 ; block<c->blocks ; block++) {
2940 #ifdef PRESERVE_BLOCK
2941 		retsize = read(c->fd,c->block,MFSBLOCKSIZE);
2942 #else /* PRESERVE_BLOCK */
2943 		retsize = read(c->fd,blockbuffer,MFSBLOCKSIZE);
2944 #endif /* PRESERVE_BLOCK */
2945 		if (retsize!=MFSBLOCKSIZE) {
2946 			hdd_error_occured(c);	// uses and preserves errno !!!
2947 			mfs_arg_errlog_silent(LOG_WARNING,"test_chunk: file:%s - data read error",c->filename);
2948 			hdd_io_end(c);
2949 			hdd_report_damaged_chunk(chunkid);
2950 			hdd_chunk_release(c);
2951 			return ERROR_IO;
2952 		}
2953 		hdd_stats_read(MFSBLOCKSIZE);
2954 #ifdef PRESERVE_BLOCK
2955 		c->blockno = block;
2956 #endif
2957 		bcrc = get32bit(&ptr);
2958 #ifdef PRESERVE_BLOCK
2959 		if (bcrc!=mycrc32(0,c->block,MFSBLOCKSIZE)) {
2960 #else /* PRESERVE_BLOCK */
2961 		if (bcrc!=mycrc32(0,blockbuffer,MFSBLOCKSIZE)) {
2962 #endif /* PRESERVE_BLOCK */
2963 			errno = 0;	// set anything to errno
2964 			hdd_error_occured(c);	// uses and preserves errno !!!
2965 			syslog(LOG_WARNING,"test_chunk: file:%s - crc error",c->filename);
2966 			hdd_io_end(c);
2967 			hdd_report_damaged_chunk(chunkid);
2968 			hdd_chunk_release(c);
2969 			return ERROR_CRC;
2970 		}
2971 	}
2972 	status = hdd_io_end(c);
2973 	if (status!=STATUS_OK) {
2974 		hdd_error_occured(c);	// uses and preserves errno !!!
2975 		hdd_report_damaged_chunk(chunkid);
2976 		hdd_chunk_release(c);
2977 		return status;
2978 	}
2979 	hdd_chunk_release(c);
2980 	return STATUS_OK;
2981 }
2982 
2983 static int hdd_int_duplicate(uint64_t chunkid,uint32_t version,uint32_t newversion,uint64_t copychunkid,uint32_t copyversion) {
2984 	folder *f;
2985 	uint32_t filenameleng;
2986 	char *newfilename;
2987 	uint8_t *ptr,vbuff[4];
2988 	uint16_t block;
2989 	int32_t retsize;
2990 	int status;
2991 	chunk *c,*oc;
2992 #ifdef PRESERVE_BLOCK
2993 	uint8_t hdrbuffer[CHUNKHDRSIZE];
2994 #else /* PRESERVE_BLOCK */
2995 	uint8_t *blockbuffer,*hdrbuffer;
2996 	blockbuffer = pthread_getspecific(blockbufferkey);
2997 	if (blockbuffer==NULL) {
2998 # ifdef MMAP_ALLOC
2999 		blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
3000 # else
3001 		blockbuffer = malloc(MFSBLOCKSIZE);
3002 # endif
3003 		passert(blockbuffer);
3004 		zassert(pthread_setspecific(blockbufferkey,blockbuffer));
3005 	}
3006 	hdrbuffer = pthread_getspecific(hdrbufferkey);
3007 	if (hdrbuffer==NULL) {
3008 		hdrbuffer = malloc(CHUNKHDRSIZE);
3009 		passert(hdrbuffer);
3010 		zassert(pthread_setspecific(hdrbufferkey,hdrbuffer));
3011 	}
3012 #endif /* PRESERVE_BLOCK */
3013 
3014 	oc = hdd_chunk_find(chunkid);
3015 	if (oc==NULL) {
3016 		return ERROR_NOCHUNK;
3017 	}
3018 	if (oc->version!=version && version>0) {
3019 		hdd_chunk_release(oc);
3020 		return ERROR_WRONGVERSION;
3021 	}
3022 	if (copyversion==0) {
3023 		copyversion = newversion;
3024 	}
3025 	zassert(pthread_mutex_lock(&folderlock));
3026 	f = hdd_getfolder();
3027 	if (f==NULL) {
3028 		zassert(pthread_mutex_unlock(&folderlock));
3029 		hdd_chunk_release(oc);
3030 		return ERROR_NOSPACE;
3031 	}
3032 	c = hdd_chunk_create(f,copychunkid,copyversion);
3033 	zassert(pthread_mutex_unlock(&folderlock));
3034 	if (c==NULL) {
3035 		hdd_chunk_release(oc);
3036 		return ERROR_CHUNKEXIST;
3037 	}
3038 
3039 	if (newversion!=version) {
3040 		filenameleng = strlen(oc->filename);
3041 		if (oc->filename[filenameleng-13]=='_') {	// new file name format
3042 			newfilename = malloc(filenameleng+1);
3043 			passert(newfilename);
3044 			memcpy(newfilename,c->filename,filenameleng+1);
3045 			sprintf(newfilename+filenameleng-12,"%08"PRIX32".mfs",newversion);
3046 			if (rename(oc->filename,newfilename)<0) {
3047 				hdd_error_occured(oc);	// uses and preserves errno !!!
3048 				mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - rename error",oc->filename);
3049 				free(newfilename);
3050 				hdd_chunk_delete(c);
3051 				hdd_chunk_release(oc);
3052 				return ERROR_IO;
3053 			}
3054 			free(oc->filename);
3055 			oc->filename = newfilename;
3056 		}
3057 		status = hdd_io_begin(oc,0);
3058 		if (status!=STATUS_OK) {
3059 			hdd_error_occured(oc);	// uses and preserves errno !!!
3060 			hdd_chunk_delete(c);
3061 			hdd_chunk_release(oc);
3062 			return status;	//can't change file version
3063 		}
3064 		ptr = vbuff;
3065 		put32bit(&ptr,newversion);
3066 #ifdef USE_PIO
3067 		if (pwrite(oc->fd,vbuff,4,16)!=4) {
3068 #else /* USE_PIO */
3069 		lseek(oc->fd,16,SEEK_SET);
3070 		if (write(oc->fd,vbuff,4)!=4) {
3071 #endif /* USE_PIO */
3072 			hdd_error_occured(oc);	// uses and preserves errno !!!
3073 			mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - write error",c->filename);
3074 			hdd_chunk_delete(c);
3075 			hdd_io_end(oc);
3076 			hdd_chunk_release(oc);
3077 			return ERROR_IO;
3078 		}
3079 		hdd_stats_write(4);
3080 		oc->version = newversion;
3081 	} else {
3082 		status = hdd_io_begin(oc,0);
3083 		if (status!=STATUS_OK) {
3084 			hdd_error_occured(oc);	// uses and preserves errno !!!
3085 			hdd_chunk_delete(c);
3086 			hdd_report_damaged_chunk(chunkid);
3087 			hdd_chunk_release(oc);
3088 			return status;
3089 		}
3090 	}
3091 	status = hdd_io_begin(c,1);
3092 	if (status!=STATUS_OK) {
3093 		hdd_error_occured(c);	// uses and preserves errno !!!
3094 		hdd_chunk_delete(c);
3095 		hdd_io_end(oc);
3096 		hdd_chunk_release(oc);
3097 		return status;
3098 	}
3099 	memset(hdrbuffer,0,CHUNKHDRSIZE);
3100 	memcpy(hdrbuffer,MFSSIGNATURE "C 1.0",8);
3101 	ptr = hdrbuffer+8;
3102 	put64bit(&ptr,copychunkid);
3103 	put32bit(&ptr,copyversion);
3104 	memcpy(c->crc,oc->crc,4096);
3105 	memcpy(hdrbuffer+1024,oc->crc,4096);
3106 	if (write(c->fd,hdrbuffer,CHUNKHDRSIZE)!=CHUNKHDRSIZE) {
3107 		hdd_error_occured(c);	// uses and preserves errno !!!
3108 		mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - hdr write error",c->filename);
3109 		hdd_io_end(c);
3110 		unlink(c->filename);
3111 		hdd_chunk_delete(c);
3112 		hdd_io_end(oc);
3113 		hdd_chunk_release(oc);
3114 		return ERROR_IO;
3115 	}
3116 	hdd_stats_write(CHUNKHDRSIZE);
3117 #ifndef PRESERVE_BLOCK
3118 	lseek(oc->fd,CHUNKHDRSIZE,SEEK_SET);
3119 #endif /* PRESERVE_BLOCK */
3120 	for (block=0 ; block<oc->blocks ; block++) {
3121 #ifdef PRESERVE_BLOCK
3122 		if (oc->blockno==block) {
3123 			memcpy(c->block,oc->block,MFSBLOCKSIZE);
3124 			retsize = MFSBLOCKSIZE;
3125 		} else {
3126 #ifdef USE_PIO
3127 			retsize = pread(oc->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS));
3128 #else /* USE_PIO */
3129 			lseek(oc->fd,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS),SEEK_SET);
3130 			retsize = read(oc->fd,c->block,MFSBLOCKSIZE);
3131 #endif /* USE_PIO */
3132 		}
3133 #else /* PRESERVE_BLOCK */
3134 		retsize = read(oc->fd,blockbuffer,MFSBLOCKSIZE);
3135 #endif /* PRESERVE_BLOCK */
3136 		if (retsize!=MFSBLOCKSIZE) {
3137 			hdd_error_occured(oc);	// uses and preserves errno !!!
3138 			mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - data read error",oc->filename);
3139 			hdd_io_end(c);
3140 			unlink(c->filename);
3141 			hdd_chunk_delete(c);
3142 			hdd_io_end(oc);
3143 			hdd_report_damaged_chunk(chunkid);
3144 			hdd_chunk_release(oc);
3145 			return ERROR_IO;
3146 		}
3147 #ifdef PRESERVE_BLOCK
3148 		if (oc->blockno!=block) {
3149 			hdd_stats_read(MFSBLOCKSIZE);
3150 		}
3151 		retsize = write(c->fd,c->block,MFSBLOCKSIZE);
3152 #else /* PRESERVE_BLOCK */
3153 		hdd_stats_read(MFSBLOCKSIZE);
3154 		retsize = write(c->fd,blockbuffer,MFSBLOCKSIZE);
3155 #endif /* PRESERVE_BLOCK */
3156 		if (retsize!=MFSBLOCKSIZE) {
3157 			hdd_error_occured(c);	// uses and preserves errno !!!
3158 			mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - data write error",c->filename);
3159 			hdd_io_end(c);
3160 			unlink(c->filename);
3161 			hdd_chunk_delete(c);
3162 			hdd_io_end(oc);
3163 			hdd_chunk_release(oc);
3164 			return ERROR_IO;	//write error
3165 		}
3166 		hdd_stats_write(MFSBLOCKSIZE);
3167 #ifdef PRESERVE_BLOCK
3168 		c->blockno = block;
3169 #endif /* PRESERVE_BLOCK */
3170 	}
3171 	status = hdd_io_end(oc);
3172 	if (status!=STATUS_OK) {
3173 		hdd_error_occured(oc);	// uses and preserves errno !!!
3174 		hdd_io_end(c);
3175 		unlink(c->filename);
3176 		hdd_chunk_delete(c);
3177 		hdd_report_damaged_chunk(chunkid);
3178 		hdd_chunk_release(oc);
3179 		return status;
3180 	}
3181 	status = hdd_io_end(c);
3182 	if (status!=STATUS_OK) {
3183 		hdd_error_occured(c);	// uses and preserves errno !!!
3184 		unlink(c->filename);
3185 		hdd_chunk_delete(c);
3186 		hdd_chunk_release(oc);
3187 		return status;
3188 	}
3189 	c->blocks = oc->blocks;
3190 	zassert(pthread_mutex_lock(&folderlock));
3191 	c->owner->needrefresh = 1;
3192 	zassert(pthread_mutex_unlock(&folderlock));
3193 	hdd_chunk_release(c);
3194 	hdd_chunk_release(oc);
3195 	return STATUS_OK;
3196 }
3197 
3198 static int hdd_int_version(uint64_t chunkid,uint32_t version,uint32_t newversion) {
3199 	int status;
3200 	uint32_t filenameleng;
3201 	char *newfilename;
3202 	uint8_t *ptr,vbuff[4];
3203 	chunk *c;
3204 	c = hdd_chunk_find(chunkid);
3205 	if (c==NULL) {
3206 		return ERROR_NOCHUNK;
3207 	}
3208 	if (c->version!=version && version>0) {
3209 		hdd_chunk_release(c);
3210 		return ERROR_WRONGVERSION;
3211 	}
3212 	filenameleng = strlen(c->filename);
3213 	if (c->filename[filenameleng-13]=='_') {	// new file name format
3214 		newfilename = malloc(filenameleng+1);
3215 		passert(newfilename);
3216 		memcpy(newfilename,c->filename,filenameleng+1);
3217 		sprintf(newfilename+filenameleng-12,"%08"PRIX32".mfs",newversion);
3218 		if (rename(c->filename,newfilename)<0) {
3219 			hdd_error_occured(c);	// uses and preserves errno !!!
3220 			mfs_arg_errlog_silent(LOG_WARNING,"set_chunk_version: file:%s - rename error",c->filename);
3221 			free(newfilename);
3222 			hdd_chunk_release(c);
3223 			return ERROR_IO;
3224 		}
3225 		free(c->filename);
3226 		c->filename = newfilename;
3227 	}
3228 	status = hdd_io_begin(c,0);
3229 	if (status!=STATUS_OK) {
3230 		hdd_error_occured(c);	// uses and preserves errno !!!
3231 		mfs_arg_errlog_silent(LOG_WARNING,"set_chunk_version: file:%s - open error",c->filename);
3232 		hdd_chunk_release(c);
3233 		return status;
3234 	}
3235 	ptr = vbuff;
3236 	put32bit(&ptr,newversion);
3237 #ifdef USE_PIO
3238 	if (pwrite(c->fd,vbuff,4,16)!=4) {
3239 #else /* USE_PIO */
3240 	lseek(c->fd,16,SEEK_SET);
3241 	if (write(c->fd,vbuff,4)!=4) {
3242 #endif /* USE_PIO */
3243 		hdd_error_occured(c);	// uses and preserves errno !!!
3244 		mfs_arg_errlog_silent(LOG_WARNING,"set_chunk_version: file:%s - write error",c->filename);
3245 		hdd_io_end(c);
3246 		hdd_chunk_release(c);
3247 		return ERROR_IO;
3248 	}
3249 	hdd_stats_write(4);
3250 	c->version = newversion;
3251 	status = hdd_io_end(c);
3252 	if (status!=STATUS_OK) {
3253 		hdd_error_occured(c);	// uses and preserves errno !!!
3254 	}
3255 	hdd_chunk_release(c);
3256 	return status;
3257 }
3258 
3259 static int hdd_int_truncate(uint64_t chunkid,uint32_t version,uint32_t newversion,uint32_t length) {
3260 	int status;
3261 	uint32_t filenameleng;
3262 	char *newfilename;
3263 	uint8_t *ptr,vbuff[4];
3264 	chunk *c;
3265 	uint32_t blocks;
3266 	uint32_t i;
3267 #ifndef PRESERVE_BLOCK
3268 	uint8_t *blockbuffer;
3269 	blockbuffer = pthread_getspecific(blockbufferkey);
3270 	if (blockbuffer==NULL) {
3271 # ifdef MMAP_ALLOC
3272 		blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
3273 # else
3274 		blockbuffer = malloc(MFSBLOCKSIZE);
3275 # endif
3276 		passert(blockbuffer);
3277 		zassert(pthread_setspecific(blockbufferkey,blockbuffer));
3278 	}
3279 #endif /* !PRESERVE_BLOCK */
3280 	if (length>MFSCHUNKSIZE) {
3281 		return ERROR_WRONGSIZE;
3282 	}
3283 	c = hdd_chunk_find(chunkid);
3284 	// step 1 - change version
3285 	if (c==NULL) {
3286 		return ERROR_NOCHUNK;
3287 	}
3288 	if (c->version!=version && version>0) {
3289 		hdd_chunk_release(c);
3290 		return ERROR_WRONGVERSION;
3291 	}
3292 	filenameleng = strlen(c->filename);
3293 	if (c->filename[filenameleng-13]=='_') {	// new file name format
3294 		newfilename = malloc(filenameleng+1);
3295 		passert(newfilename);
3296 		memcpy(newfilename,c->filename,filenameleng+1);
3297 		sprintf(newfilename+filenameleng-12,"%08"PRIX32".mfs",newversion);
3298 		if (rename(c->filename,newfilename)<0) {
3299 			hdd_error_occured(c);	// uses and preserves errno !!!
3300 			mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - rename error",c->filename);
3301 			free(newfilename);
3302 			hdd_chunk_release(c);
3303 			return ERROR_IO;
3304 		}
3305 		free(c->filename);
3306 		c->filename = newfilename;
3307 	}
3308 	status = hdd_io_begin(c,0);
3309 	if (status!=STATUS_OK) {
3310 		hdd_error_occured(c);	// uses and preserves errno !!!
3311 		hdd_chunk_release(c);
3312 		return status;	//can't change file version
3313 	}
3314 	ptr = vbuff;
3315 	put32bit(&ptr,newversion);
3316 #ifdef USE_PIO
3317 	if (pwrite(c->fd,vbuff,4,16)!=4) {
3318 #else /* USE_PIO */
3319 	lseek(c->fd,16,SEEK_SET);
3320 	if (write(c->fd,vbuff,4)!=4) {
3321 #endif /* USE_PIO */
3322 		hdd_error_occured(c);	// uses and preserves errno !!!
3323 		mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - write error",c->filename);
3324 		hdd_io_end(c);
3325 		hdd_chunk_release(c);
3326 		return ERROR_IO;
3327 	}
3328 	hdd_stats_write(4);
3329 	c->version = newversion;
3330 	// step 2. truncate
3331 	blocks = ((length+MFSBLOCKMASK)>>MFSBLOCKBITS);
3332 	if (blocks>c->blocks) {
3333 		if (ftruncate(c->fd,CHUNKHDRSIZE+(blocks<<MFSBLOCKBITS))<0) {
3334 			hdd_error_occured(c);	// uses and preserves errno !!!
3335 			mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - ftruncate error",c->filename);
3336 			hdd_io_end(c);
3337 			hdd_chunk_release(c);
3338 			return ERROR_IO;
3339 		}
3340 		ptr = (c->crc)+(4*(c->blocks));
3341 		for (i=c->blocks ; i<blocks ; i++) {
3342 			put32bit(&ptr,emptyblockcrc);
3343 		}
3344 		c->crcchanged = 1;
3345 	} else {
3346 		uint32_t blocknum = length>>MFSBLOCKBITS;
3347 		uint32_t blockpos = length&MFSCHUNKBLOCKMASK;
3348 		uint32_t blocksize = length&MFSBLOCKMASK;
3349 		if (ftruncate(c->fd,CHUNKHDRSIZE+length)<0) {
3350 			hdd_error_occured(c);	// uses and preserves errno !!!
3351 			mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - ftruncate error",c->filename);
3352 			hdd_io_end(c);
3353 			hdd_chunk_release(c);
3354 			return ERROR_IO;
3355 		}
3356 		if (blocksize>0) {
3357 			if (ftruncate(c->fd,CHUNKHDRSIZE+(blocks<<MFSBLOCKBITS))<0) {
3358 				hdd_error_occured(c);	// uses and preserves errno !!!
3359 				mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - ftruncate error",c->filename);
3360 				hdd_io_end(c);
3361 				hdd_chunk_release(c);
3362 				return ERROR_IO;
3363 			}
3364 #ifdef PRESERVE_BLOCK
3365 			if (c->blockno>=blocks) {
3366 				c->blockno = 0xFFFF;	// invalidate truncated block
3367 			}
3368 			if (c->blockno!=(blockpos>>MFSBLOCKBITS)) {
3369 
3370 #ifdef USE_PIO
3371 				if (pread(c->fd,c->block,blocksize,CHUNKHDRSIZE+blockpos)!=(signed)blocksize) {
3372 #else /* USE_PIO */
3373 				lseek(c->fd,CHUNKHDRSIZE+blockpos,SEEK_SET);
3374 				if (read(c->fd,c->block,blocksize)!=(signed)blocksize) {
3375 #endif /* USE_PIO */
3376 #else /* PRESERVE_BLOCK */
3377 #ifdef USE_PIO
3378 			if (pread(c->fd,blockbuffer,blocksize,CHUNKHDRSIZE+blockpos)!=(signed)blocksize) {
3379 #else /* USE_PIO */
3380 			lseek(c->fd,CHUNKHDRSIZE+blockpos,SEEK_SET);
3381 			if (read(c->fd,blockbuffer,blocksize)!=(signed)blocksize) {
3382 #endif /* USE_PIO */
3383 #endif /* PRESERVE_BLOCK */
3384 				hdd_error_occured(c);	// uses and preserves errno !!!
3385 				mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - read error",c->filename);
3386 				hdd_io_end(c);
3387 				hdd_chunk_release(c);
3388 				return ERROR_IO;
3389 			}
3390 			hdd_stats_read(blocksize);
3391 #ifdef PRESERVE_BLOCK
3392 			}
3393 			memset(c->block+blocksize,0,MFSBLOCKSIZE-blocksize);
3394 			c->blockno = blockpos>>MFSBLOCKBITS;
3395 			i = mycrc32_zeroexpanded(0,c->block,blocksize,MFSBLOCKSIZE-blocksize);
3396 #else /* PRESERVE_BLOCK */
3397 			i = mycrc32_zeroexpanded(0,blockbuffer,blocksize,MFSBLOCKSIZE-blocksize);
3398 #endif /* PRESERVE_BLOCK */
3399 			ptr = (c->crc)+(4*blocknum);
3400 			put32bit(&ptr,i);
3401 			c->crcchanged = 1;
3402 		}
3403 	}
3404 	if (c->blocks != blocks) {
3405 		zassert(pthread_mutex_lock(&folderlock));
3406 		c->owner->needrefresh = 1;
3407 		zassert(pthread_mutex_unlock(&folderlock));
3408 	}
3409 	c->blocks = blocks;
3410 	status = hdd_io_end(c);
3411 	if (status!=STATUS_OK) {
3412 		hdd_error_occured(c);	// uses and preserves errno !!!
3413 	}
3414 	hdd_chunk_release(c);
3415 	return status;
3416 }
3417 
3418 static int hdd_int_duptrunc(uint64_t chunkid,uint32_t version,uint32_t newversion,uint64_t copychunkid,uint32_t copyversion,uint32_t length) {
3419 	folder *f;
3420 	uint32_t filenameleng;
3421 	char *newfilename;
3422 	uint8_t *ptr,vbuff[4];
3423 	uint16_t block;
3424 	uint16_t blocks;
3425 	int32_t retsize;
3426 	uint32_t crc;
3427 	int status;
3428 	chunk *c,*oc;
3429 #ifdef PRESERVE_BLOCK
3430 	uint8_t hdrbuffer[CHUNKHDRSIZE];
3431 #else /* PRESERVE_BLOCK */
3432 	uint8_t *blockbuffer,*hdrbuffer;
3433 	blockbuffer = pthread_getspecific(blockbufferkey);
3434 	if (blockbuffer==NULL) {
3435 # ifdef MMAP_ALLOC
3436 		blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
3437 # else
3438 		blockbuffer = malloc(MFSBLOCKSIZE);
3439 # endif
3440 		passert(blockbuffer);
3441 		zassert(pthread_setspecific(blockbufferkey,blockbuffer));
3442 	}
3443 	hdrbuffer = pthread_getspecific(hdrbufferkey);
3444 	if (hdrbuffer==NULL) {
3445 		hdrbuffer = malloc(CHUNKHDRSIZE);
3446 		passert(hdrbuffer);
3447 		zassert(pthread_setspecific(hdrbufferkey,hdrbuffer));
3448 	}
3449 #endif /* PRESERVE_BLOCK */
3450 
3451 	if (length>MFSCHUNKSIZE) {
3452 		return ERROR_WRONGSIZE;
3453 	}
3454 	oc = hdd_chunk_find(chunkid);
3455 	if (oc==NULL) {
3456 		return ERROR_NOCHUNK;
3457 	}
3458 	if (oc->version!=version && version>0) {
3459 		hdd_chunk_release(oc);
3460 		return ERROR_WRONGVERSION;
3461 	}
3462 	if (copyversion==0) {
3463 		copyversion = newversion;
3464 	}
3465 	zassert(pthread_mutex_lock(&folderlock));
3466 	f = hdd_getfolder();
3467 	if (f==NULL) {
3468 		zassert(pthread_mutex_unlock(&folderlock));
3469 		hdd_chunk_release(oc);
3470 		return ERROR_NOSPACE;
3471 	}
3472 	c = hdd_chunk_create(f,copychunkid,copyversion);
3473 	zassert(pthread_mutex_unlock(&folderlock));
3474 	if (c==NULL) {
3475 		hdd_chunk_release(oc);
3476 		return ERROR_CHUNKEXIST;
3477 	}
3478 
3479 	if (newversion!=version) {
3480 		filenameleng = strlen(oc->filename);
3481 		if (oc->filename[filenameleng-13]=='_') {	// new file name format
3482 			newfilename = malloc(filenameleng+1);
3483 			passert(newfilename);
3484 			memcpy(newfilename,c->filename,filenameleng+1);
3485 			sprintf(newfilename+filenameleng-12,"%08"PRIX32".mfs",newversion);
3486 			if (rename(oc->filename,newfilename)<0) {
3487 				hdd_error_occured(oc);	// uses and preserves errno !!!
3488 				mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - rename error",oc->filename);
3489 				free(newfilename);
3490 				hdd_chunk_delete(c);
3491 				hdd_chunk_release(oc);
3492 				return ERROR_IO;
3493 			}
3494 			free(oc->filename);
3495 			oc->filename = newfilename;
3496 		}
3497 		status = hdd_io_begin(oc,0);
3498 		if (status!=STATUS_OK) {
3499 			hdd_error_occured(oc);	// uses and preserves errno !!!
3500 			hdd_chunk_delete(c);
3501 			hdd_chunk_release(oc);
3502 			return status;	//can't change file version
3503 		}
3504 		ptr = vbuff;
3505 		put32bit(&ptr,newversion);
3506 #ifdef USE_PIO
3507 		if (pwrite(oc->fd,vbuff,4,16)!=4) {
3508 #else /* USE_PIO */
3509 		lseek(oc->fd,16,SEEK_SET);
3510 		if (write(oc->fd,vbuff,4)!=4) {
3511 #endif /* USE_PIO */
3512 			hdd_error_occured(oc);	// uses and preserves errno !!!
3513 			mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - write error",c->filename);
3514 			hdd_chunk_delete(c);
3515 			hdd_io_end(oc);
3516 			hdd_chunk_release(oc);
3517 			return ERROR_IO;
3518 		}
3519 		hdd_stats_write(4);
3520 		oc->version = newversion;
3521 	} else {
3522 		status = hdd_io_begin(oc,0);
3523 		if (status!=STATUS_OK) {
3524 			hdd_error_occured(oc);	// uses and preserves errno !!!
3525 			hdd_chunk_delete(c);
3526 			hdd_report_damaged_chunk(chunkid);
3527 			hdd_chunk_release(oc);
3528 			return status;
3529 		}
3530 	}
3531 	status = hdd_io_begin(c,1);
3532 	if (status!=STATUS_OK) {
3533 		hdd_error_occured(c);	// uses and preserves errno !!!
3534 		hdd_chunk_delete(c);
3535 		hdd_io_end(oc);
3536 		hdd_chunk_release(oc);
3537 		return status;
3538 	}
3539 	blocks = ((length+MFSBLOCKMASK)>>MFSBLOCKBITS);
3540 	memset(hdrbuffer,0,CHUNKHDRSIZE);
3541 	memcpy(hdrbuffer,MFSSIGNATURE "C 1.0",8);
3542 	ptr = hdrbuffer+8;
3543 	put64bit(&ptr,copychunkid);
3544 	put32bit(&ptr,copyversion);
3545 	memcpy(hdrbuffer+1024,oc->crc,4096);
3546 // do not write header yet - only seek to apriopriate position
3547 	lseek(c->fd,CHUNKHDRSIZE,SEEK_SET);
3548 #ifndef PRESERVE_BLOCK
3549 	lseek(oc->fd,CHUNKHDRSIZE,SEEK_SET);
3550 #endif /* PRESERVE_BLOCK */
3551 	if (blocks>oc->blocks) { // expanding
3552 		for (block=0 ; block<oc->blocks ; block++) {
3553 #ifdef PRESERVE_BLOCK
3554 			if (oc->blockno==block) {
3555 				memcpy(c->block,oc->block,MFSBLOCKSIZE);
3556 				retsize = MFSBLOCKSIZE;
3557 			} else {
3558 #ifdef USE_PIO
3559 				retsize = pread(oc->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS));
3560 #else /* USE_PIO */
3561 				lseek(oc->fd,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS),SEEK_SET);
3562 				retsize = read(oc->fd,c->block,MFSBLOCKSIZE);
3563 #endif /* USE_PIO */
3564 			}
3565 #else /* PRESERVE_BLOCK */
3566 			retsize = read(oc->fd,blockbuffer,MFSBLOCKSIZE);
3567 #endif /* PRESERVE_BLOCK */
3568 			if (retsize!=MFSBLOCKSIZE) {
3569 				hdd_error_occured(oc);	// uses and preserves errno !!!
3570 				mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data read error",oc->filename);
3571 				hdd_io_end(c);
3572 				unlink(c->filename);
3573 				hdd_chunk_delete(c);
3574 				hdd_io_end(oc);
3575 				hdd_report_damaged_chunk(chunkid);
3576 				hdd_chunk_release(oc);
3577 				return ERROR_IO;
3578 			}
3579 #ifdef PRESERVE_BLOCK
3580 			if (oc->blockno!=block) {
3581 				hdd_stats_read(MFSBLOCKSIZE);
3582 			}
3583 			retsize = write(c->fd,c->block,MFSBLOCKSIZE);
3584 #else /* PRESERVE_BLOCK */
3585 			hdd_stats_read(MFSBLOCKSIZE);
3586 			retsize = write(c->fd,blockbuffer,MFSBLOCKSIZE);
3587 #endif /* PRESERVE_BLOCK */
3588 			if (retsize!=MFSBLOCKSIZE) {
3589 				hdd_error_occured(c);	// uses and preserves errno !!!
3590 				mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data write error",c->filename);
3591 				hdd_io_end(c);
3592 				unlink(c->filename);
3593 				hdd_chunk_delete(c);
3594 				hdd_io_end(oc);
3595 				hdd_chunk_release(oc);
3596 				return ERROR_IO;
3597 			}
3598 			hdd_stats_write(MFSBLOCKSIZE);
3599 #ifdef PRESERVE_BLOCK
3600 			c->blockno = block;
3601 #endif /* PRESERVE_BLOCK */
3602 		}
3603 		if (ftruncate(c->fd,CHUNKHDRSIZE+(((uint32_t)blocks)<<MFSBLOCKBITS))<0) {
3604 			hdd_error_occured(c);	// uses and preserves errno !!!
3605 			mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - ftruncate error",c->filename);
3606 			hdd_io_end(c);
3607 			unlink(c->filename);
3608 			hdd_chunk_delete(c);
3609 			hdd_io_end(oc);
3610 			hdd_chunk_release(oc);
3611 			return ERROR_IO;	//write error
3612 		}
3613 		ptr = hdrbuffer+CHUNKHDRCRC+4*(oc->blocks);
3614 		for (block=oc->blocks ; block<blocks ; block++) {
3615 			put32bit(&ptr,emptyblockcrc);
3616 		}
3617 	} else { // shrinking
3618 		uint32_t blocksize = (length&MFSBLOCKMASK);
3619 		if (blocksize==0) { // aligned shring
3620 			for (block=0 ; block<blocks ; block++) {
3621 #ifdef PRESERVE_BLOCK
3622 				if (oc->blockno==block) {
3623 					memcpy(c->block,oc->block,MFSBLOCKSIZE);
3624 					retsize = MFSBLOCKSIZE;
3625 				} else {
3626 #ifdef USE_PIO
3627 					retsize = pread(oc->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS));
3628 #else /* USE_PIO */
3629 					lseek(oc->fd,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS),SEEK_SET);
3630 					retsize = read(oc->fd,c->block,MFSBLOCKSIZE);
3631 #endif /* USE_PIO */
3632 				}
3633 #else /* PRESERVE_BLOCK */
3634 				retsize = read(oc->fd,blockbuffer,MFSBLOCKSIZE);
3635 #endif /* PRESERVE_BLOCK */
3636 				if (retsize!=MFSBLOCKSIZE) {
3637 					hdd_error_occured(oc);	// uses and preserves errno !!!
3638 					mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data read error",oc->filename);
3639 					hdd_io_end(c);
3640 					unlink(c->filename);
3641 					hdd_chunk_delete(c);
3642 					hdd_io_end(oc);
3643 					hdd_report_damaged_chunk(chunkid);
3644 					hdd_chunk_release(oc);
3645 					return ERROR_IO;
3646 				}
3647 #ifdef PRESERVE_BLOCK
3648 				if (oc->blockno!=block) {
3649 					hdd_stats_read(MFSBLOCKSIZE);
3650 				}
3651 				retsize = write(c->fd,c->block,MFSBLOCKSIZE);
3652 #else /* PRESERVE_BLOCK */
3653 				hdd_stats_read(MFSBLOCKSIZE);
3654 				retsize = write(c->fd,blockbuffer,MFSBLOCKSIZE);
3655 #endif /* PRESERVE_BLOCK */
3656 				if (retsize!=MFSBLOCKSIZE) {
3657 					hdd_error_occured(c);	// uses and preserves errno !!!
3658 					mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data write error",c->filename);
3659 					hdd_io_end(c);
3660 					unlink(c->filename);
3661 					hdd_chunk_delete(c);
3662 					hdd_io_end(oc);
3663 					hdd_chunk_release(oc);
3664 					return ERROR_IO;
3665 				}
3666 				hdd_stats_write(MFSBLOCKSIZE);
3667 #ifdef PRESERVE_BLOCK
3668 				c->blockno = block;
3669 #endif /* PRESERVE_BLOCK */
3670 			}
3671 		} else { // misaligned shrink
3672 			for (block=0 ; block<blocks-1 ; block++) {
3673 #ifdef PRESERVE_BLOCK
3674 				if (oc->blockno==block) {
3675 					memcpy(c->block,oc->block,MFSBLOCKSIZE);
3676 					retsize = MFSBLOCKSIZE;
3677 				} else {
3678 #ifdef USE_PIO
3679 					retsize = pread(oc->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS));
3680 #else /* USE_PIO */
3681 					lseek(oc->fd,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS),SEEK_SET);
3682 					retsize = read(oc->fd,c->block,MFSBLOCKSIZE);
3683 #endif /* USE_PIO */
3684 				}
3685 #else /* PRESERVE_BLOCK */
3686 				retsize = read(oc->fd,blockbuffer,MFSBLOCKSIZE);
3687 #endif /* PRESERVE_BLOCK */
3688 				if (retsize!=MFSBLOCKSIZE) {
3689 					hdd_error_occured(oc);	// uses and preserves errno !!!
3690 					mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data read error",oc->filename);
3691 					hdd_io_end(c);
3692 					unlink(c->filename);
3693 					hdd_chunk_delete(c);
3694 					hdd_io_end(oc);
3695 					hdd_report_damaged_chunk(chunkid);
3696 					hdd_chunk_release(oc);
3697 					return ERROR_IO;
3698 				}
3699 #ifdef PRESERVE_BLOCK
3700 				if (oc->blockno!=block) {
3701 					hdd_stats_read(MFSBLOCKSIZE);
3702 				}
3703 				retsize = write(c->fd,c->block,MFSBLOCKSIZE);
3704 #else /* PRESERVE_BLOCK */
3705 				hdd_stats_read(MFSBLOCKSIZE);
3706 				retsize = write(c->fd,blockbuffer,MFSBLOCKSIZE);
3707 #endif /* PRESERVE_BLOCK */
3708 				if (retsize!=MFSBLOCKSIZE) {
3709 					hdd_error_occured(c);	// uses and preserves errno !!!
3710 					mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data write error",c->filename);
3711 					hdd_io_end(c);
3712 					unlink(c->filename);
3713 					hdd_chunk_delete(c);
3714 					hdd_io_end(oc);
3715 					hdd_chunk_release(oc);
3716 					return ERROR_IO;	//write error
3717 				}
3718 				hdd_stats_write(MFSBLOCKSIZE);
3719 			}
3720 			block = blocks-1;
3721 #ifdef PRESERVE_BLOCK
3722 			if (oc->blockno==block) {
3723 				memcpy(c->block,oc->block,blocksize);
3724 				retsize = blocksize;
3725 			} else {
3726 #ifdef USE_PIO
3727 				retsize = pread(oc->fd,c->block,blocksize,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS));
3728 #else /* USE_PIO */
3729 				lseek(oc->fd,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS),SEEK_SET);
3730 				retsize = read(oc->fd,c->block,blocksize);
3731 #endif /* USE_PIO */
3732 			}
3733 #else /* PRESERVE_BLOCK */
3734 			retsize = read(oc->fd,blockbuffer,blocksize);
3735 #endif /* PRESERVE_BLOCK */
3736 			if (retsize!=(signed)blocksize) {
3737 				hdd_error_occured(oc);	// uses and preserves errno !!!
3738 				mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data read error",oc->filename);
3739 				hdd_io_end(c);
3740 				unlink(c->filename);
3741 				hdd_chunk_delete(c);
3742 				hdd_io_end(oc);
3743 				hdd_report_damaged_chunk(chunkid);
3744 				hdd_chunk_release(oc);
3745 				return ERROR_IO;
3746 			}
3747 #ifdef PRESERVE_BLOCK
3748 			if (oc->blockno!=block) {
3749 				hdd_stats_read(blocksize);
3750 			}
3751 			memset(c->block+blocksize,0,MFSBLOCKSIZE-blocksize);
3752 			retsize = write(c->fd,c->block,MFSBLOCKSIZE);
3753 #else /* PRESERVE_BLOCK */
3754 			hdd_stats_read(blocksize);
3755 			memset(blockbuffer+blocksize,0,MFSBLOCKSIZE-blocksize);
3756 			retsize = write(c->fd,blockbuffer,MFSBLOCKSIZE);
3757 #endif /* PRESERVE_BLOCK */
3758 			if (retsize!=MFSBLOCKSIZE) {
3759 				hdd_error_occured(c);	// uses and preserves errno !!!
3760 				mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data write error",c->filename);
3761 				hdd_io_end(c);
3762 				unlink(c->filename);
3763 				hdd_chunk_delete(c);
3764 				hdd_io_end(oc);
3765 				hdd_chunk_release(oc);
3766 				return ERROR_IO;
3767 			}
3768 			hdd_stats_write(MFSBLOCKSIZE);
3769 			ptr = hdrbuffer+CHUNKHDRCRC+4*(blocks-1);
3770 #ifdef PRESERVE_BLOCK
3771 			crc = mycrc32_zeroexpanded(0,c->block,blocksize,MFSBLOCKSIZE-blocksize);
3772 #else /* PRESERVE_BLOCK */
3773 			crc = mycrc32_zeroexpanded(0,blockbuffer,blocksize,MFSBLOCKSIZE-blocksize);
3774 #endif /* PRESERVE_BLOCK */
3775 			put32bit(&ptr,crc);
3776 #ifdef PRESERVE_BLOCK
3777 			c->blockno = block;
3778 #endif /* PRESERVE_BLOCK */
3779 		}
3780 	}
3781 // and now write header
3782 	memcpy(c->crc,hdrbuffer+1024,4096);
3783 	lseek(c->fd,0,SEEK_SET);
3784 	if (write(c->fd,hdrbuffer,CHUNKHDRSIZE)!=CHUNKHDRSIZE) {
3785 		hdd_error_occured(c);	// uses and preserves errno !!!
3786 		mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - hdr write error",c->filename);
3787 		hdd_io_end(c);
3788 		unlink(c->filename);
3789 		hdd_chunk_delete(c);
3790 		hdd_io_end(oc);
3791 		hdd_chunk_release(oc);
3792 		return ERROR_IO;
3793 	}
3794 	hdd_stats_write(CHUNKHDRSIZE);
3795 	status = hdd_io_end(oc);
3796 	if (status!=STATUS_OK) {
3797 		hdd_error_occured(oc);	// uses and preserves errno !!!
3798 		hdd_io_end(c);
3799 		unlink(c->filename);
3800 		hdd_chunk_delete(c);
3801 		hdd_report_damaged_chunk(chunkid);
3802 		hdd_chunk_release(oc);
3803 		return status;
3804 	}
3805 	status = hdd_io_end(c);
3806 	if (status!=STATUS_OK) {
3807 		hdd_error_occured(c);	// uses and preserves errno !!!
3808 		unlink(c->filename);
3809 		hdd_chunk_delete(c);
3810 		hdd_chunk_release(oc);
3811 		return status;
3812 	}
3813 	c->blocks = blocks;
3814 	zassert(pthread_mutex_lock(&folderlock));
3815 	c->owner->needrefresh = 1;
3816 	zassert(pthread_mutex_unlock(&folderlock));
3817 	hdd_chunk_release(c);
3818 	hdd_chunk_release(oc);
3819 	return STATUS_OK;
3820 }
3821 
3822 static int hdd_int_delete(uint64_t chunkid,uint32_t version) {
3823 	chunk *c;
3824 	c = hdd_chunk_find(chunkid);
3825 	if (c==NULL) {
3826 		return ERROR_NOCHUNK;
3827 	}
3828 	if (c->version!=version && version>0) {
3829 		hdd_chunk_release(c);
3830 		return ERROR_WRONGVERSION;
3831 	}
3832 	if (unlink(c->filename)<0) {
3833 		if (errno!=ENOENT) {
3834 			hdd_error_occured(c);	// uses and preserves errno !!!
3835 			mfs_arg_errlog_silent(LOG_WARNING,"delete_chunk: file:%s - unlink error",c->filename);
3836 			hdd_chunk_release(c);
3837 			return ERROR_IO;
3838 		} else {
3839 			mfs_arg_errlog_silent(LOG_WARNING,"delete_chunk: file:%s - chunk already deleted !!!",c->filename);
3840 		}
3841 	} else {
3842 		zassert(pthread_mutex_lock(&folderlock));
3843 		c->owner->needrefresh = 1;
3844 		zassert(pthread_mutex_unlock(&folderlock));
3845 	}
3846 	hdd_chunk_delete(c);
3847 	return STATUS_OK;
3848 }
3849 
3850 /* all chunk operations in one call */
3851 // newversion>0 && length==0xFFFFFFFF && copychunkid==0   -> change version
3852 // newversion>0 && length==0xFFFFFFFF && copycnunkid>0    -> duplicate
3853 // newversion>0 && length<=MFSCHUNKSIZE && copychunkid==0    -> truncate
3854 // newversion>0 && length<=MFSCHUNKSIZE && copychunkid>0     -> duplicate and truncate
3855 // newversion==0 && length==0                             -> delete
3856 // newversion==0 && length==1                             -> create
3857 // newversion==0 && length==2                             -> check chunk contents
3858 int hdd_chunkop(uint64_t chunkid,uint32_t version,uint32_t newversion,uint64_t copychunkid,uint32_t copyversion,uint32_t length) {
3859 	zassert(pthread_mutex_lock(&statslock));
3860 	if (newversion>0) {
3861 		if (length==0xFFFFFFFF) {
3862 			if (copychunkid==0) {
3863 				stats_version++;
3864 			} else {
3865 				stats_duplicate++;
3866 			}
3867 		} else if (length<=MFSCHUNKSIZE) {
3868 			if (copychunkid==0) {
3869 				stats_truncate++;
3870 			} else {
3871 				stats_duptrunc++;
3872 			}
3873 		}
3874 	} else {
3875 		if (length==0) {
3876 			stats_delete++;
3877 		} else if (length==1) {
3878 			stats_create++;
3879 		} else if (length==2) {
3880 			stats_test++;
3881 		}
3882 	}
3883 	zassert(pthread_mutex_unlock(&statslock));
3884 	if (newversion>0) {
3885 		if (length==0xFFFFFFFF) {
3886 			if (copychunkid==0) {
3887 				return hdd_int_version(chunkid,version,newversion);
3888 			} else {
3889 				return hdd_int_duplicate(chunkid,version,newversion,copychunkid,copyversion);
3890 			}
3891 		} else if (length<=MFSCHUNKSIZE) {
3892 			if (copychunkid==0) {
3893 				return hdd_int_truncate(chunkid,version,newversion,length);
3894 			} else {
3895 				return hdd_int_duptrunc(chunkid,version,newversion,copychunkid,copyversion,length);
3896 			}
3897 		} else {
3898 			return ERROR_EINVAL;
3899 		}
3900 	} else {
3901 		if (length==0) {
3902 			return hdd_int_delete(chunkid,version);
3903 		} else if (length==1) {
3904 			return hdd_int_create(chunkid,version);
3905 		} else if (length==2) {
3906 			return hdd_int_test(chunkid,version);
3907 		} else {
3908 			return ERROR_EINVAL;
3909 		}
3910 	}
3911 }
3912 
3913 chunk* hdd_random_chunk(folder *f) {
3914 	uint32_t try;
3915 	uint32_t pos;
3916 	chunk *c;
3917 	zassert(pthread_mutex_lock(&folderlock));
3918 	zassert(pthread_mutex_lock(&hashlock));
3919 	if (f->chunkcount>0) {
3920 		for (try=0 ; try<RANDOM_CHUNK_RETRIES ; try++) {
3921 			pos = rndu32_ranged(f->chunkcount);
3922 			if (f->chunktab[pos]->state==CH_AVAIL) {
3923 				c = f->chunktab[pos];
3924 				c->state = CH_LOCKED;
3925 				zassert(pthread_mutex_unlock(&hashlock));
3926 				zassert(pthread_mutex_unlock(&folderlock));
3927 				if (c->validattr==0) {
3928 					if (hdd_chunk_getattr(c)) {
3929 						hdd_report_damaged_chunk(c->chunkid);
3930 						unlink(c->filename);
3931 						hdd_chunk_delete(c);
3932 					} else {
3933 						return c;
3934 					}
3935 				} else {
3936 					return c;
3937 				}
3938 				zassert(pthread_mutex_lock(&folderlock));
3939 				zassert(pthread_mutex_lock(&hashlock));
3940 			}
3941 		}
3942 	}
3943 	zassert(pthread_mutex_unlock(&hashlock));
3944 	zassert(pthread_mutex_unlock(&folderlock));
3945 	return NULL;
3946 }
3947 
3948 int hdd_int_move(folder *fsrc,folder *fdst) {
3949 	uint8_t *wptr;
3950 	const uint8_t *rptr;
3951 	uint16_t block;
3952 	uint32_t bcrc;
3953 	int32_t retsize;
3954 	int status;
3955 	int error;
3956 	char *tmp_filename;
3957 	char *new_filename;
3958 	uint32_t leng;
3959 	int new_fd;
3960 	chunk *c;
3961 	uint64_t ts,te;
3962 #ifdef PRESERVE_BLOCK
3963 	uint8_t hdrbuffer[CHUNKHDRSIZE];
3964 #else /* PRESERVE_BLOCK */
3965 	uint8_t *blockbuffer,*hdrbuffer;
3966 	blockbuffer = pthread_getspecific(blockbufferkey);
3967 	if (blockbuffer==NULL) {
3968 # ifdef MMAP_ALLOC
3969 		blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
3970 # else
3971 		blockbuffer = malloc(MFSBLOCKSIZE);
3972 # endif
3973 		passert(blockbuffer);
3974 		zassert(pthread_setspecific(blockbufferkey,blockbuffer));
3975 	}
3976 	hdrbuffer = pthread_getspecific(hdrbufferkey);
3977 	if (hdrbuffer==NULL) {
3978 		hdrbuffer = malloc(CHUNKHDRSIZE);
3979 		passert(hdrbuffer);
3980 		zassert(pthread_setspecific(hdrbufferkey,hdrbuffer));
3981 	}
3982 #endif /* PRESERVE_BLOCK */
3983 
3984 	c = hdd_random_chunk(fsrc);
3985 	if (c==NULL) {
3986 		syslog(LOG_NOTICE,"move chunk %s -> %s (can't find valid chunk to move)",fsrc->path,fdst->path);
3987 		return ERROR_NOCHUNK;
3988 	}
3989 	syslog(LOG_NOTICE,"move chunk %s -> %s (chunk: %016"PRIX64"_%08"PRIX32")",fsrc->path,fdst->path,c->chunkid,c->version);
3990 	status = hdd_io_begin(c,0);
3991 	if (status!=STATUS_OK) {
3992 		hdd_error_occured(c);
3993 		hdd_report_damaged_chunk(c->chunkid);
3994 		hdd_chunk_release(c);
3995 		return status;
3996 	}
3997 
3998 	/* create tmp file name */
3999 	leng = strlen(fdst->path);
4000 	tmp_filename = malloc(leng+7);
4001 	passert(tmp_filename);
4002 	memcpy(tmp_filename,fdst->path,leng);
4003 	memcpy(tmp_filename+leng,"reptmp",7);
4004 
4005 	/* create new file */
4006 	new_fd = open(tmp_filename,O_RDWR | O_TRUNC | O_CREAT,0666);
4007 	if (new_fd<0) {
4008 		mfs_arg_errlog_silent(LOG_WARNING,"move_chunk: file:%s - hdr open error",tmp_filename);
4009 		hdd_io_end(c);
4010 		hdd_chunk_release(c);
4011 		free(tmp_filename);
4012 		return ERROR_IO;
4013 	}
4014 
4015 	memset(hdrbuffer,0,CHUNKHDRSIZE);
4016 	memcpy(hdrbuffer,MFSSIGNATURE "C 1.0",8);
4017 	wptr = hdrbuffer+8;
4018 	put64bit(&wptr,c->chunkid);
4019 	put32bit(&wptr,c->version);
4020 	memcpy(hdrbuffer+1024,c->crc,4096);
4021 	if (write(new_fd,hdrbuffer,CHUNKHDRSIZE)!=CHUNKHDRSIZE) {
4022 		mfs_arg_errlog_silent(LOG_WARNING,"move_chunk: file:%s - hdr write error",tmp_filename);
4023 		close(new_fd);
4024 		unlink(tmp_filename);
4025 		hdd_io_end(c);
4026 		hdd_chunk_release(c);
4027 		free(tmp_filename);
4028 		return ERROR_IO;
4029 	}
4030 	hdd_stats_write(CHUNKHDRSIZE);
4031 	lseek(c->fd,CHUNKHDRSIZE,SEEK_SET);
4032 	rptr = c->crc;
4033 	for (block=0 ; block<c->blocks ; block++) {
4034 		ts = monotonic_nseconds();
4035 #ifdef PRESERVE_BLOCK
4036 		retsize = read(c->fd,c->block,MFSBLOCKSIZE);
4037 #else /* PRESERVE_BLOCK */
4038 		retsize = read(c->fd,blockbuffer,MFSBLOCKSIZE);
4039 #endif /* PRESERVE_BLOCK */
4040 		error = errno;
4041 		te = monotonic_nseconds();
4042 		hdd_stats_dataread(fsrc,MFSBLOCKSIZE,te-ts);
4043 		if (retsize!=MFSBLOCKSIZE) {
4044 			errno = error;
4045 			hdd_error_occured(c);	// uses and preserves errno !!!
4046 			mfs_arg_errlog_silent(LOG_WARNING,"move_chunk: file:%s - data read error",c->filename);
4047 			close(new_fd);
4048 			unlink(tmp_filename);
4049 			hdd_io_end(c);
4050 			hdd_report_damaged_chunk(c->chunkid);
4051 			hdd_chunk_release(c);
4052 			free(tmp_filename);
4053 			return ERROR_IO;
4054 		}
4055 		hdd_stats_read(MFSBLOCKSIZE);
4056 #ifdef PRESERVE_BLOCK
4057 		c->blockno = block;
4058 #endif
4059 		bcrc = get32bit(&rptr);
4060 #ifdef PRESERVE_BLOCK
4061 		if (bcrc!=mycrc32(0,c->block,MFSBLOCKSIZE)) {
4062 #else /* PRESERVE_BLOCK */
4063 		if (bcrc!=mycrc32(0,blockbuffer,MFSBLOCKSIZE)) {
4064 #endif /* PRESERVE_BLOCK */
4065 			errno = 0;	// set anything to errno
4066 			hdd_error_occured(c);	// uses and preserves errno !!!
4067 			syslog(LOG_WARNING,"move_chunk: file:%s - crc error",c->filename);
4068 			close(new_fd);
4069 			unlink(tmp_filename);
4070 			hdd_io_end(c);
4071 			hdd_chunk_release(c);
4072 			free(tmp_filename);
4073 			return ERROR_CRC;
4074 		}
4075 		ts = monotonic_nseconds();
4076 #ifdef PRESERVE_BLOCK
4077 		retsize = write(new_fd,c->block,MFSBLOCKSIZE);
4078 #else /* PRESERVE_BLOCK */
4079 		retsize = write(new_fd,blockbuffer,MFSBLOCKSIZE);
4080 #endif /* PRESERVE_BLOCK */
4081 		te = monotonic_nseconds();
4082 		hdd_stats_datawrite(fdst,MFSBLOCKSIZE,te-ts);
4083 		if (retsize!=MFSBLOCKSIZE) {
4084 			mfs_arg_errlog_silent(LOG_WARNING,"move_chunk: file:%s - data write error",tmp_filename);
4085 			close(new_fd);
4086 			unlink(tmp_filename);
4087 			hdd_io_end(c);
4088 			hdd_chunk_release(c);
4089 			free(tmp_filename);
4090 			return ERROR_IO;	//write error
4091 		}
4092 		hdd_stats_write(MFSBLOCKSIZE);
4093 	}
4094 	status = hdd_io_end(c);
4095 	if (status!=STATUS_OK) {
4096 		hdd_error_occured(c);	// uses and preserves errno !!!
4097 		close(new_fd);
4098 		unlink(tmp_filename);
4099 		hdd_report_damaged_chunk(c->chunkid);
4100 		hdd_chunk_release(c);
4101 		free(tmp_filename);
4102 		return status;
4103 	}
4104 
4105 	/* create new file name */
4106 	new_filename = malloc(leng+39);
4107 	passert(new_filename);
4108 	memcpy(new_filename,fdst->path,leng);
4109 	sprintf(new_filename+leng,"%02X/chunk_%016"PRIX64"_%08"PRIX32".mfs",(unsigned int)(c->chunkid&255),c->chunkid,c->version);
4110 
4111 	if (rename(tmp_filename,new_filename)<0) {
4112 		mfs_arg_errlog_silent(LOG_WARNING,"move_chunk: file:%s->%s - rename error",tmp_filename,new_filename);
4113 		close(new_fd);
4114 		unlink(tmp_filename);
4115 		hdd_chunk_release(c);
4116 		free(tmp_filename);
4117 		free(new_filename);
4118 		return ERROR_IO;
4119 	}
4120 
4121 	if (c->fd>=0) {
4122 		close(c->fd);
4123 		c->fd = new_fd;
4124 	} else {
4125 		close(new_fd);
4126 	}
4127 
4128 	unlink(c->filename);
4129 	free(c->filename);
4130 	free(tmp_filename);
4131 	c->filename = new_filename;
4132 	zassert(pthread_mutex_lock(&folderlock));
4133 	fsrc->needrefresh = 1;
4134 	fdst->needrefresh = 1;
4135 	hdd_remove_chunk_from_folder(c,fsrc);
4136 	hdd_add_chunk_to_folder(c,fdst);
4137 	zassert(pthread_mutex_unlock(&folderlock));
4138 	zassert(pthread_mutex_lock(&testlock));
4139 	hdd_remove_chunk_from_test_chain(c,fsrc);
4140 	hdd_add_chunk_to_test_chain(c,fdst);
4141 	zassert(pthread_mutex_unlock(&testlock));
4142 	hdd_chunk_release(c);
4143 	return STATUS_OK;
4144 }
4145 
4146 void* hdd_rebalance_thread(void *arg) {
4147 	folder *f,*fdst,*fsrc;
4148 	double aboveminerr,belowminerr,err,expdist;
4149 	double usage;
4150 	double avgusage;
4151 	double rebalancediff;
4152 	uint32_t avgcount;
4153 	uint32_t belowcnt;
4154 	uint32_t abovecnt;
4155 	uint64_t belowsum;
4156 	uint64_t abovesum;
4157 	uint8_t changed;
4158 	uint8_t rebalance_servers;
4159 	uint8_t rebalance_is_on;
4160 	double rebalance_finished;
4161 	double monotonic_time;
4162 	uint32_t perc;
4163 	uint64_t st,en;
4164 
4165 	rebalance_is_on = 0;
4166 	rebalance_finished = 0;
4167 	for (;;) {
4168 		zassert(pthread_mutex_lock(&testlock));
4169 		perc = HDDRebalancePerc;
4170 		zassert(pthread_mutex_unlock(&testlock));
4171 		zassert(pthread_mutex_lock(&termlock));
4172 		if (term) {
4173 			zassert(pthread_mutex_unlock(&termlock));
4174 			return arg;
4175 		}
4176 		zassert(pthread_mutex_unlock(&termlock));
4177 
4178 		monotonic_time = monotonic_seconds();
4179 		zassert(pthread_mutex_lock(&folderlock));
4180 		if (folderactions==0 || (rebalance_finished + 60.0) > monotonic_time || perc==0) {
4181 			zassert(pthread_mutex_unlock(&folderlock));
4182 			sleep(1);
4183 			continue;
4184 		}
4185 		// check REBALANCE_FORCE_SRC and REBALANCE_FORCE_DST
4186 		abovecnt = 0;
4187 		belowcnt = 0;
4188 		avgcount = 0;
4189 		changed = 0;
4190 		for (f=folderhead ; f ; f=f->next) {
4191 			if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>0) {
4192 //				if (f->needrefresh || (f->lastrefresh<monotonic_time && rebalance_is_on)) {
4193 				if (f->needrefresh || rebalance_is_on) {
4194 					hdd_refresh_usage(f);
4195 					f->needrefresh = 0;
4196 					f->lastrefresh = monotonic_time;
4197 					changed = 1;
4198 				}
4199 				if (f->balancemode==REBALANCE_FORCE_SRC) {
4200 					abovecnt++;
4201 				} else if (f->balancemode==REBALANCE_FORCE_DST) {
4202 					belowcnt++;
4203 				} else {
4204 					avgcount++;
4205 				}
4206 			}
4207 			f->tmpbalancemode = REBALANCE_NONE;
4208 		}
4209 		rebalance_servers = 0;
4210 		if ((abovecnt>0 && (belowcnt+avgcount)>0) || (belowcnt>0 && (abovecnt+avgcount)>0)) { // force data movement
4211 			for (f=folderhead ; f ; f=f->next) {
4212 				if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>0) {
4213 					usage = f->total-f->avail;
4214 					usage /= f->total;
4215 					if (abovecnt==0) {
4216 						if (f->balancemode==REBALANCE_FORCE_DST && usage<REBALANCE_DST_MAX_USAGE) {
4217 							f->tmpbalancemode = REBALANCE_DST;
4218 							rebalance_servers |= 1;
4219 						} else if (f->chunkcount>0) {
4220 							f->tmpbalancemode = REBALANCE_SRC;
4221 							rebalance_servers |= 2;
4222 						}
4223 					} else if (belowcnt==0) {
4224 						if (f->balancemode==REBALANCE_FORCE_SRC && f->chunkcount>0) {
4225 							f->tmpbalancemode = REBALANCE_SRC;
4226 							rebalance_servers |= 2;
4227 						} else if (usage<REBALANCE_DST_MAX_USAGE) {
4228 							f->tmpbalancemode = REBALANCE_DST;
4229 							rebalance_servers |= 1;
4230 						}
4231 					} else {
4232 						if (f->balancemode==REBALANCE_FORCE_DST && usage<REBALANCE_DST_MAX_USAGE) {
4233 							f->tmpbalancemode = REBALANCE_DST;
4234 							rebalance_servers |= 1;
4235 						} else if (f->balancemode==REBALANCE_FORCE_SRC && f->chunkcount>0) {
4236 							f->tmpbalancemode = REBALANCE_SRC;
4237 							rebalance_servers |= 2;
4238 						}
4239 					}
4240 				}
4241 			}
4242 		} else { // usage rebalance
4243 			rebalancediff = REBALANCE_DIFF_MAX;
4244 			if (rebalance_is_on) {
4245 				rebalancediff /= 2.0;
4246 			}
4247 			avgusage = 0.0;
4248 			avgcount = 0;
4249 			for (f=folderhead ; f ; f=f->next) {
4250 				if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>REBALANCE_TOTAL_MIN) {
4251 					usage = f->total-f->avail;
4252 					usage /= f->total;
4253 					avgusage += usage;
4254 					avgcount++;
4255 				}
4256 			}
4257 			if (avgcount>0) {
4258 				avgusage /= avgcount;
4259 				belowcnt = 0;
4260 				belowsum = 0;
4261 				abovecnt = 0;
4262 				abovesum = 0;
4263 				for (f=folderhead ; f ; f=f->next) {
4264 					if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>REBALANCE_TOTAL_MIN) {
4265 						usage = f->total-f->avail;
4266 						usage /= f->total;
4267 						if (usage < avgusage - rebalancediff) {
4268 							belowcnt++;
4269 							belowsum+=f->total;
4270 						} else if (usage > avgusage + rebalancediff) {
4271 							abovecnt++;
4272 							abovesum+=f->total;
4273 						}
4274 					}
4275 				}
4276 				if (abovecnt>0 || belowcnt>0) {
4277 					for (f=folderhead ; f ; f=f->next) {
4278 						if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>REBALANCE_TOTAL_MIN) {
4279 							usage = f->total-f->avail;
4280 							usage /= f->total;
4281 							if ((((usage < avgusage - rebalancediff) && belowcnt>0) || ((usage <= avgusage + rebalancediff) && belowcnt==0)) && usage<REBALANCE_DST_MAX_USAGE) {
4282 								f->tmpbalancemode = REBALANCE_DST;
4283 								rebalance_servers |= 1;
4284 							} else if ((((usage > avgusage + rebalancediff) && abovecnt>0) || ((usage >= avgusage - rebalancediff) && abovecnt==0)) && f->chunkcount>0) {
4285 								f->tmpbalancemode = REBALANCE_SRC;
4286 								rebalance_servers |= 2;
4287 							}
4288 						}
4289 					}
4290 				}
4291 			}
4292 		}
4293 		fdst = NULL;
4294 		fsrc = NULL;
4295 		if (rebalance_servers==3) {
4296 			belowcnt = 0;
4297 			belowsum = 0;
4298 			abovecnt = 0;
4299 			abovesum = 0;
4300 			for (f=folderhead ; f ; f=f->next) {
4301 				if (f->tmpbalancemode == REBALANCE_DST) {
4302 					belowcnt++;
4303 					belowsum+=f->total;
4304 				} else if (f->tmpbalancemode == REBALANCE_SRC) {
4305 					abovecnt++;
4306 					abovesum+=f->total;
4307 				}
4308 			}
4309 			aboveminerr = 0.0;
4310 			belowminerr = 0.0;
4311 			for (f=folderhead ; f ; f=f->next) {
4312 				if (f->tmpbalancemode == REBALANCE_DST) {
4313 					f->write_dist++;
4314 					if (f->write_first) {
4315 						err = 1.0;
4316 					} else {
4317 						expdist = belowsum;
4318 						expdist /= f->total;
4319 						err = (expdist + f->write_corr) / f->write_dist;
4320 					}
4321 					if (fdst==NULL || err<belowminerr) {
4322 						belowminerr = err;
4323 						fdst = f;
4324 					}
4325 				} else if (f->tmpbalancemode == REBALANCE_SRC) {
4326 					f->read_dist++;
4327 					if (f->read_first) {
4328 						err = 1.0;
4329 					} else {
4330 						expdist = abovesum;
4331 						expdist /= f->total;
4332 						err = (expdist + f->read_corr) / f->read_dist;
4333 					}
4334 					if (fsrc==NULL || err<aboveminerr) {
4335 						aboveminerr = err;
4336 						fsrc = f;
4337 					}
4338 				}
4339 			}
4340 		}
4341 		if (fdst && fsrc) {
4342 //			syslog(LOG_NOTICE,"debug: move %s -> %s",fsrc->path,fdst->path);
4343 			if (fsrc->read_first) {
4344 				fsrc->read_first = 0;
4345 			} else {
4346 				expdist = abovesum;
4347 				expdist /= fsrc->total;
4348 				fsrc->read_corr += expdist - fsrc->read_dist;
4349 			}
4350 			fsrc->read_dist = 0;
4351 			if (fdst->write_first) {
4352 				fdst->write_first = 0;
4353 			} else {
4354 				expdist = belowsum;
4355 				expdist /= fdst->total;
4356 				fdst->write_corr += expdist - fdst->write_dist;
4357 			}
4358 			fdst->write_dist = 0;
4359 			fsrc->rebalance_in_progress = 1;
4360 			fdst->rebalance_in_progress = 1;
4361 			zassert(pthread_mutex_unlock(&folderlock));
4362 			if (changed) {
4363 				zassert(pthread_mutex_lock(&dclock));
4364 				hddspacechanged = 1;
4365 				zassert(pthread_mutex_unlock(&dclock));
4366 			}
4367 			st = monotonic_useconds();
4368 			(void)hdd_int_move(fsrc,fdst);
4369 			en = monotonic_useconds();
4370 			zassert(pthread_mutex_lock(&folderlock));
4371 			fsrc->rebalance_in_progress = 0;
4372 			fdst->rebalance_in_progress = 0;
4373 			fdst->rebalance_last_usec = en;
4374 			zassert(pthread_mutex_unlock(&folderlock));
4375 			rebalance_is_on = 1;
4376 			if (perc<100 && en>st) {
4377 				en -= st;
4378 				st = en;
4379 				en *= 100;
4380 				en /= perc;
4381 				en -= st;
4382 				if (en>0) {
4383 					portable_usleep(en);
4384 				}
4385 			}
4386 		} else {
4387 			zassert(pthread_mutex_unlock(&folderlock));
4388 			if (changed) {
4389 				zassert(pthread_mutex_lock(&dclock));
4390 				hddspacechanged = 1;
4391 				zassert(pthread_mutex_unlock(&dclock));
4392 			}
4393 			if (rebalance_is_on) {
4394 				zassert(pthread_mutex_lock(&folderlock));
4395 				for (f=folderhead ; f ; f=f->next) {
4396 					f->read_dist = 0;
4397 					f->read_first = 1;
4398 					f->read_corr = 0.0;
4399 				}
4400 				zassert(pthread_mutex_unlock(&folderlock));
4401 				rebalance_finished = monotonic_time;
4402 			}
4403 			rebalance_is_on = 0;
4404 			sleep(1);
4405 		}
4406 	}
4407 	return arg;
4408 }
4409 
4410 void* hdd_tester_thread(void* arg) {
4411 	folder *f,*of;
4412 	chunk *c;
4413 	uint64_t chunkid;
4414 	uint32_t version;
4415 	uint32_t freq;
4416 	uint32_t cnt;
4417 	uint64_t st,en;
4418 	char *path;
4419 
4420 	f = folderhead;
4421 	freq = HDDTestFreq;
4422 	cnt = 0;
4423 	for (;;) {
4424 		st = monotonic_useconds();
4425 		path = NULL;
4426 		chunkid = 0;
4427 		version = 0;
4428 		zassert(pthread_mutex_lock(&folderlock));
4429 		zassert(pthread_mutex_lock(&hashlock));
4430 		zassert(pthread_mutex_lock(&testlock));
4431 		if (testerreset) {
4432 			testerreset = 0;
4433 			f = folderhead;
4434 			freq = HDDTestFreq;
4435 			cnt = 0;
4436 		}
4437 		cnt++;
4438 		if (cnt<freq || freq==0 || folderactions==0 || folderhead==NULL) {
4439 			path = NULL;
4440 		} else {
4441 			cnt = 0;
4442 			of = f;
4443 			do {
4444 				f = f->next;
4445 				if (f==NULL) {
4446 					f = folderhead;
4447 				}
4448 			} while ((f->damaged || f->todel || f->toremove || f->scanstate!=SCST_WORKING) && of!=f);
4449 			if (of==f && (f->damaged || f->todel || f->toremove || f->scanstate!=SCST_WORKING)) {	// all folders are unavailable
4450 				path = NULL;
4451 			} else {
4452 				c = f->testhead;
4453 				if (c && c->state==CH_AVAIL) {
4454 					chunkid = c->chunkid;
4455 					version = c->version;
4456 					path = strdup(c->filename);
4457 					passert(path);
4458 				}
4459 			}
4460 		}
4461 		zassert(pthread_mutex_unlock(&testlock));
4462 		zassert(pthread_mutex_unlock(&hashlock));
4463 		zassert(pthread_mutex_unlock(&folderlock));
4464 		if (path) {
4465 //			syslog(LOG_NOTICE,"testing chunk: %s",path);
4466 			(void)hdd_int_test(chunkid,version); // ignore status here - hdd_int_test on error does everything itself
4467 			free(path);
4468 		}
4469 		zassert(pthread_mutex_lock(&termlock));
4470 		if (term) {
4471 			zassert(pthread_mutex_unlock(&termlock));
4472 			return arg;
4473 		}
4474 		zassert(pthread_mutex_unlock(&termlock));
4475 		en = monotonic_useconds();
4476 		if (en>st) {
4477 			en-=st;
4478 			if (en<1000000) {
4479 				portable_usleep(1000000-en);
4480 			}
4481 		}
4482 	}
4483 	return arg;
4484 }
4485 
4486 void hdd_testshuffle(folder *f) {
4487 	uint32_t i,j,chunksno;
4488 	chunk **csorttab,*c;
4489 	zassert(pthread_mutex_lock(&testlock));
4490 	chunksno = 0;
4491 	for (c=f->testhead ; c ; c=c->testnext) {
4492 		chunksno++;
4493 	}
4494 	if (chunksno>0) {
4495 		csorttab = malloc(sizeof(chunk*)*chunksno);
4496 		passert(csorttab);
4497 		chunksno = 0;
4498 		for (c=f->testhead ; c ; c=c->testnext) {
4499 			csorttab[chunksno++] = c;
4500 		}
4501 		if (chunksno>1) {
4502 			for (i=0 ; i<chunksno-1 ; i++) {
4503 				j = i+rndu32_ranged(chunksno-i);
4504 				if (j!=i) {
4505 					c = csorttab[i];
4506 					csorttab[i] = csorttab[j];
4507 					csorttab[j] = c;
4508 				}
4509 			}
4510 		}
4511 	} else {
4512 		csorttab = NULL;
4513 	}
4514 	f->testhead = NULL;
4515 	f->testtail = &(f->testhead);
4516 	for (i=0 ; i<chunksno ; i++) {
4517 		c = csorttab[i];
4518 		c->testnext = NULL;
4519 		c->testprev = f->testtail;
4520 		*(c->testprev) = c;
4521 		f->testtail = &(c->testnext);
4522 	}
4523 	if (csorttab) {
4524 		free(csorttab);
4525 	}
4526 	zassert(pthread_mutex_unlock(&testlock));
4527 }
4528 
4529 /*
4530 int hdd_testcompare(const void *a,const void *b) {
4531 	chunk const* *aa = (chunk const* *)a;
4532 	chunk const* *bb = (chunk const* *)b;
4533 	return (**aa).testtime - (**bb).testtime;
4534 }
4535 
4536 void hdd_testsort(folder *f) {
4537 	uint32_t i,chunksno;
4538 	chunk **csorttab,*c;
4539 	zassert(pthread_mutex_lock(&testlock));
4540 	chunksno = 0;
4541 	for (c=f->testhead ; c ; c=c->testnext) {
4542 		chunksno++;
4543 	}
4544 	if (chunksno>0) {
4545 		csorttab = malloc(sizeof(chunk*)*chunksno);
4546 		passert(csorttab);
4547 		chunksno = 0;
4548 		for (c=f->testhead ; c ; c=c->testnext) {
4549 			csorttab[chunksno++] = c;
4550 		}
4551 		qsort(csorttab,chunksno,sizeof(chunk*),hdd_testcompare);
4552 	} else {
4553 		csorttab = NULL;
4554 	}
4555 	f->testhead = NULL;
4556 	f->testtail = &(f->testhead);
4557 	for (i=0 ; i<chunksno ; i++) {
4558 		c = csorttab[i];
4559 		c->testnext = NULL;
4560 		c->testprev = f->testtail;
4561 		*(c->testprev) = c;
4562 		f->testtail = &(c->testnext);
4563 	}
4564 	if (csorttab) {
4565 		free(csorttab);
4566 	}
4567 	zassert(pthread_mutex_unlock(&testlock));
4568 }
4569 */
4570 
4571 /* initialization */
4572 
4573 static inline int hdd_check_filename(const char *fname,uint64_t *chunkid,uint32_t *version) {
4574 	uint64_t namechunkid;
4575 	uint32_t nameversion;
4576 	char ch;
4577 	uint32_t i;
4578 
4579 	if (strncmp(fname,"chunk_",6)!=0) {
4580 		return -1;
4581 	}
4582 	namechunkid = 0;
4583 	nameversion = 0;
4584 	for (i=6 ; i<22 ; i++) {
4585 		ch = fname[i];
4586 		if (ch>='0' && ch<='9') {
4587 			ch-='0';
4588 		} else if (ch>='A' && ch<='F') {
4589 			ch-='A'-10;
4590 		} else {
4591 			return -1;
4592 		}
4593 		namechunkid *= 16;
4594 		namechunkid += ch;
4595 	}
4596 	if (fname[22]!='_') {
4597 		return -1;
4598 	}
4599 	for (i=23 ; i<31 ; i++) {
4600 		ch = fname[i];
4601 		if (ch>='0' && ch<='9') {
4602 			ch-='0';
4603 		} else if (ch>='A' && ch<='F') {
4604 			ch-='A'-10;
4605 		} else {
4606 			return -1;
4607 		}
4608 		nameversion *= 16;
4609 		nameversion += ch;
4610 	}
4611 	if (strcmp(fname+31,".mfs")!=0) {
4612 		return -1;
4613 	}
4614 	*chunkid = namechunkid;
4615 	*version = nameversion;
4616 	return 0;
4617 }
4618 
4619 static inline void hdd_add_chunk(folder *f,const char *fullname,uint64_t chunkid,uint32_t version,uint8_t todel) {
4620 	struct stat sb;
4621 	folder *prevf,*currf;
4622 	chunk *c;
4623 	uint16_t blocks;
4624 	uint8_t validattr;
4625 
4626 	if (f->sizelimit) {
4627 		if (stat(fullname,&sb)<0) {
4628 			if (f->todel<2) {
4629 				unlink(fullname);
4630 			}
4631 			return;
4632 		}
4633 		if ((sb.st_mode & S_IFMT) != S_IFREG) {
4634 			mfs_arg_syslog(LOG_WARNING,"%s: is not regular file",fullname);
4635 			return;
4636 		}
4637 		if (sb.st_size<CHUNKHDRSIZE || sb.st_size>(CHUNKHDRSIZE+MFSCHUNKSIZE) || ((sb.st_size-CHUNKHDRSIZE)&MFSBLOCKMASK)!=0) {
4638 			if (f->todel<2) {
4639 				unlink(fullname);	// remove wrong chunk
4640 			}
4641 			return;
4642 		}
4643 		blocks = (sb.st_size - CHUNKHDRSIZE) / MFSBLOCKSIZE;
4644 		validattr = 1;
4645 	} else {
4646 		blocks = 0;
4647 		validattr = 0;
4648 	}
4649 	prevf = NULL;
4650 	currf = f;
4651 	c = hdd_chunk_get(chunkid,CH_NEW_AUTO);
4652 	if (c==NULL) { // already have this chunk, but with error state - try, to create new one
4653 		c = hdd_chunk_get(chunkid,CH_NEW_AUTO);
4654 		if (c==NULL) {
4655 			mfs_arg_syslog(LOG_WARNING,"can't create chunk record for file: %s",fullname);
4656 			return;
4657 		}
4658 	}
4659 	if (c->filename!=NULL) {	// already have this chunk
4660 		if (version <= c->version) {	// current chunk is older
4661 			if (todel<2) { // this is R/W fs?
4662 				unlink(fullname); // if yes then remove file
4663 			}
4664 			currf = NULL;
4665 		} else { // current chunk is better, so use it, and clear older one
4666 			prevf = c->owner;
4667 			if (c->todel<2) { // current chunk is on R/W fs?
4668 				unlink(c->filename); // if yes then remove file
4669 			}
4670 			free(c->filename);
4671 			c->filename = strdup(fullname);
4672 			passert(c->filename);
4673 			c->version = version;
4674 			c->blocks = blocks;
4675 			c->validattr = validattr;
4676 			c->todel = todel;
4677 //			c->testtime = (sb.st_atime>sb.st_mtime)?sb.st_atime:sb.st_mtime;
4678 			zassert(pthread_mutex_lock(&testlock));
4679 			hdd_remove_chunk_from_test_chain(c,prevf);
4680 			hdd_add_chunk_to_test_chain(c,currf);
4681 			zassert(pthread_mutex_unlock(&testlock));
4682 		}
4683 	} else {
4684 		c->filename = strdup(fullname);
4685 		passert(c->filename);
4686 		c->version = version;
4687 		c->blocks = blocks;
4688 		c->validattr = validattr;
4689 		c->todel = todel;
4690 //		c->testtime = (sb.st_atime>sb.st_mtime)?sb.st_atime:sb.st_mtime;
4691 		zassert(pthread_mutex_lock(&testlock));
4692 		hdd_add_chunk_to_test_chain(c,currf);
4693 		zassert(pthread_mutex_unlock(&testlock));
4694 		hdd_report_new_chunk(c->chunkid,c->version|(todel?0x80000000:0));
4695 	}
4696 	zassert(pthread_mutex_lock(&folderlock));
4697 	if (prevf) {
4698 		hdd_remove_chunk_from_folder(c,prevf);
4699 	}
4700 	if (currf) {
4701 		hdd_add_chunk_to_folder(c,currf);
4702 	}
4703 	zassert(pthread_mutex_unlock(&folderlock));
4704 	hdd_chunk_release(c);
4705 }
4706 
4707 void* hdd_folder_scan(void *arg) {
4708 	folder *f = (folder*)arg;
4709 	DIR *dd;
4710 	struct dirent *de,*destorage;
4711 	uint16_t subf;
4712 	char *fullname,*oldfullname;
4713 	uint16_t plen,oldplen;
4714 	uint64_t namechunkid;
4715 	uint32_t nameversion;
4716 	uint32_t tcheckcnt;
4717 	uint8_t scanterm,todel;
4718 //	uint8_t progressreportmode;
4719 	uint8_t lastperc,currentperc;
4720 	uint32_t lasttime,currenttime,begintime;
4721 
4722 	begintime = time(NULL);
4723 
4724 	zassert(pthread_mutex_lock(&folderlock));
4725 	todel = f->todel;
4726 	hdd_refresh_usage(f);
4727 //	progressreportmode = wait_for_scan;
4728 	zassert(pthread_mutex_unlock(&folderlock));
4729 
4730 	plen = strlen(f->path);
4731 	oldplen = plen;
4732 
4733 	fullname = malloc(plen+39);
4734 	passert(fullname);
4735 
4736 	memcpy(fullname,f->path,plen);
4737 	fullname[plen]='\0';
4738 	if (todel==0) {
4739 		mkdir(fullname,0755);
4740 	}
4741 
4742 	fullname[plen++]='_';
4743 	fullname[plen++]='_';
4744 	fullname[plen++]='/';
4745 	fullname[plen]='\0';
4746 
4747 	/* size of name added to size of structure because on some os'es d_name has size of 1 byte */
4748 	destorage = (struct dirent*)malloc(sizeof(struct dirent)+pathconf(f->path,_PC_NAME_MAX)+1);
4749 	passert(destorage);
4750 
4751 	scanterm = 0;
4752 
4753 	zassert(pthread_mutex_lock(&dclock));
4754 	hddspacechanged = 1;
4755 	zassert(pthread_mutex_unlock(&dclock));
4756 
4757 	if (todel==0) {
4758 		for (subf=0 ; subf<256 ; subf++) {
4759 			fullname[plen-3]="0123456789ABCDEF"[subf>>4];
4760 			fullname[plen-2]="0123456789ABCDEF"[subf&15];
4761 			mkdir(fullname,0755);
4762 		}
4763 
4764 /* move chunks from "X/name" to "XX/name" */
4765 
4766 		oldfullname = malloc(oldplen+38);
4767 		passert(oldfullname);
4768 		memcpy(oldfullname,f->path,oldplen);
4769 		oldfullname[oldplen++]='_';
4770 		oldfullname[oldplen++]='/';
4771 		oldfullname[oldplen]='\0';
4772 
4773 		for (subf=0 ; subf<16 ; subf++) {
4774 			oldfullname[oldplen-2]="0123456789ABCDEF"[subf];
4775 			oldfullname[oldplen]='\0';
4776 			dd = opendir(oldfullname);
4777 			if (dd==NULL) {
4778 				continue;
4779 			}
4780 			while (readdir_r(dd,destorage,&de)==0 && de!=NULL) {
4781 				if (hdd_check_filename(de->d_name,&namechunkid,&nameversion)<0) {
4782 					continue;
4783 				}
4784 				memcpy(oldfullname+oldplen,de->d_name,36);
4785 				memcpy(fullname+plen,de->d_name,36);
4786 				fullname[plen-3]="0123456789ABCDEF"[(namechunkid>>4)&15];
4787 				fullname[plen-2]="0123456789ABCDEF"[namechunkid&15];
4788 				rename(oldfullname,fullname);
4789 			}
4790 			oldfullname[oldplen]='\0';
4791 			rmdir(oldfullname);
4792 			closedir(dd);
4793 		}
4794 		free(oldfullname);
4795 
4796 	}
4797 /* scan new file names */
4798 
4799 	tcheckcnt = 0;
4800 	lastperc = 0;
4801 	lasttime = time(NULL);
4802 	for (subf=0 ; subf<256 && scanterm==0 ; subf++) {
4803 		fullname[plen-3]="0123456789ABCDEF"[subf>>4];
4804 		fullname[plen-2]="0123456789ABCDEF"[subf&15];
4805 		fullname[plen]='\0';
4806 //		mkdir(fullname,0755);
4807 		dd = opendir(fullname);
4808 		if (dd) {
4809 			while (readdir_r(dd,destorage,&de)==0 && de!=NULL && scanterm==0) {
4810 //#warning debug
4811 //				portable_usleep(100000);
4812 //
4813 				if (hdd_check_filename(de->d_name,&namechunkid,&nameversion)<0) {
4814 					continue;
4815 				}
4816 				memcpy(fullname+plen,de->d_name,36);
4817 				hdd_add_chunk(f,fullname,namechunkid,nameversion,todel);
4818 				tcheckcnt++;
4819 				if (tcheckcnt>=1000) {
4820 					zassert(pthread_mutex_lock(&folderlock));
4821 					if (f->scanstate==SCST_SCANTERMINATE) {
4822 						scanterm = 1;
4823 					}
4824 					zassert(pthread_mutex_unlock(&folderlock));
4825 					// portable_usleep(100000); - slow down scanning (also change 1000 in 'if' to something much smaller) - for tests
4826 					tcheckcnt = 0;
4827 				}
4828 			}
4829 			closedir(dd);
4830 		}
4831 		currenttime = time(NULL);
4832 		currentperc = ((subf*100.0)/256.0);
4833 		if (currentperc>lastperc && currenttime>lasttime) {
4834 			lastperc=currentperc;
4835 			lasttime=currenttime;
4836 			zassert(pthread_mutex_lock(&folderlock));
4837 			f->scanprogress = currentperc;
4838 			zassert(pthread_mutex_unlock(&folderlock));
4839 			zassert(pthread_mutex_lock(&dclock));
4840 			hddspacechanged = 1; // report chunk count to master
4841 			zassert(pthread_mutex_unlock(&dclock));
4842 			syslog(LOG_NOTICE,"scanning folder %s: %"PRIu8"%% (%"PRIu32"s)",f->path,lastperc,currenttime-begintime);
4843 		}
4844 	}
4845 	free(fullname);
4846 	free(destorage);
4847 //	fprintf(stderr,"hdd space manager: %s: %"PRIu32" chunks found\n",f->path,f->chunkcount);
4848 
4849 	hdd_testshuffle(f);
4850 
4851 	zassert(pthread_mutex_lock(&folderlock));
4852 	if (f->scanstate==SCST_SCANTERMINATE) {
4853 		syslog(LOG_NOTICE,"scanning folder %s: interrupted",f->path);
4854 	} else {
4855 		syslog(LOG_NOTICE,"scanning folder %s: complete (%"PRIu32"s)",f->path,(uint32_t)(time(NULL))-begintime);
4856 	}
4857 	f->scanstate = SCST_SCANFINISHED;
4858 	f->scanprogress = 100;
4859 	zassert(pthread_mutex_unlock(&folderlock));
4860 	return NULL;
4861 }
4862 
4863 void* hdd_folders_thread(void *arg) {
4864 	for (;;) {
4865 		hdd_check_folders();
4866 		zassert(pthread_mutex_lock(&termlock));
4867 		if (term) {
4868 			zassert(pthread_mutex_unlock(&termlock));
4869 			return arg;
4870 		}
4871 		zassert(pthread_mutex_unlock(&termlock));
4872 		sleep(1);
4873 	}
4874 	return arg;
4875 }
4876 
4877 void* hdd_delayed_thread(void *arg) {
4878 	for (;;) {
4879 		hdd_delayed_ops();
4880 		zassert(pthread_mutex_lock(&termlock));
4881 		if (term) {
4882 			zassert(pthread_mutex_unlock(&termlock));
4883 			return arg;
4884 		}
4885 		zassert(pthread_mutex_unlock(&termlock));
4886 		sleep(DELAYEDSTEP);
4887 	}
4888 	return arg;
4889 }
4890 
4891 #ifndef PRESERVE_BLOCK
4892 # ifdef MMAP_ALLOC
4893 void hdd_blockbuffer_free(void *addr) {
4894 	munmap(addr,MFSBLOCKSIZE);
4895 }
4896 # endif
4897 #endif
4898 
4899 void hdd_term(void) {
4900 	uint32_t i;
4901 	folder *f,*fn;
4902 	chunk *c,*cn;
4903 	dopchunk *dc,*dcn;
4904 	cntcond *cc,*ccn;
4905 	lostchunk *lc,*lcn;
4906 	newchunk *nc,*ncn;
4907 	damagedchunk *dmc,*dmcn;
4908 
4909 	zassert(pthread_mutex_lock(&termlock));
4910 	i = term; // if term is non zero here then it means that threads have not been started, so do not join with them
4911 	term = 1;
4912 	zassert(pthread_mutex_unlock(&termlock));
4913 	if (i==0) {
4914 		zassert(pthread_join(testerthread,NULL));
4915 		zassert(pthread_join(foldersthread,NULL));
4916 		zassert(pthread_join(rebalancethread,NULL));
4917 		zassert(pthread_join(delayedthread,NULL));
4918 	}
4919 	zassert(pthread_mutex_lock(&folderlock));
4920 	i = 0;
4921 	for (f=folderhead ; f ; f=f->next) {
4922 		if (f->scanstate==SCST_SCANINPROGRESS) {
4923 			f->scanstate = SCST_SCANTERMINATE;
4924 		}
4925 		if (f->scanstate==SCST_SCANTERMINATE || f->scanstate==SCST_SCANFINISHED) {
4926 			i++;
4927 		}
4928 	}
4929 	zassert(pthread_mutex_unlock(&folderlock));
4930 //	syslog(LOG_NOTICE,"waiting for scanning threads (%"PRIu32")",i);
4931 	while (i>0) {
4932 		portable_usleep(10000); // not very elegant solution.
4933 		zassert(pthread_mutex_lock(&folderlock));
4934 		for (f=folderhead ; f ; f=f->next) {
4935 			if (f->scanstate==SCST_SCANFINISHED) {
4936 				zassert(pthread_join(f->scanthread,NULL));
4937 				f->scanstate = SCST_WORKING;	// any state - to prevent calling pthread_join again
4938 				i--;
4939 			}
4940 		}
4941 		zassert(pthread_mutex_unlock(&folderlock));
4942 	}
4943 	for (i=0 ; i<HASHSIZE ; i++) {
4944 		for (c=hashtab[i] ; c ; c=cn) {
4945 			cn = c->next;
4946 			if (c->state==CH_AVAIL) {
4947 				if (c->crcchanged) {
4948 					syslog(LOG_WARNING,"hdd_term: CRC not flushed - writing now");
4949 					if (chunk_writecrc(c)!=STATUS_OK) {
4950 						mfs_arg_errlog_silent(LOG_WARNING,"hdd_term: file:%s - write error",c->filename);
4951 					}
4952 				}
4953 				if (c->fd>=0) {
4954 					close(c->fd);
4955 					hdd_open_files_handle(OF_AFTER_CLOSE);
4956 				}
4957 				if (c->crc!=NULL) {
4958 #ifdef MMAP_ALLOC
4959 					munmap((void*)(c->crc),4096);
4960 #else
4961 					free(c->crc);
4962 #endif
4963 				}
4964 #ifdef PRESERVE_BLOCK
4965 				if (c->block!=NULL) {
4966 # ifdef MMAP_ALLOC
4967 					munmap((void*)(c->block),MFSBLOCKSIZE);
4968 # else
4969 					free(c->block);
4970 # endif
4971 				}
4972 #endif /* PRESERVE_BLOCK */
4973 				if (c->filename) {
4974 					free(c->filename);
4975 				}
4976 				free(c);
4977 			} else {
4978 				syslog(LOG_WARNING,"hdd_term: locked chunk !!!");
4979 			}
4980 		}
4981 	}
4982 	for (f=folderhead ; f ; f=fn) {
4983 		fn = f->next;
4984 		if (f->lfd>=0) {
4985 			close(f->lfd);
4986 		}
4987 		if (f->chunktab) {
4988 			free(f->chunktab);
4989 		}
4990 		free(f->path);
4991 		free(f);
4992 	}
4993 	for (i=0 ; i<DHASHSIZE ; i++) {
4994 		for (dc=dophashtab[i] ; dc ; dc=dcn) {
4995 			dcn = dc->next;
4996 			free(dc);
4997 		}
4998 	}
4999 	for (dc=newdopchunks ; dc ; dc=dcn) {
5000 		dcn = dc->next;
5001 		free(dc);
5002 	}
5003 	for (cc=cclist ; cc ; cc=ccn) {
5004 		ccn = cc->next;
5005 		if (cc->wcnt) {
5006 			syslog(LOG_WARNING,"hddspacemgr (atexit): used cond !!!");
5007 		} else {
5008 			zassert(pthread_cond_destroy(&(cc->cond)));
5009 		}
5010 		free(cc);
5011 	}
5012 	for (nc=newchunks ; nc ; nc=ncn) {
5013 		ncn = nc->next;
5014 		free(nc);
5015 	}
5016 	for (lc=lostchunks ; lc ; lc=lcn) {
5017 		lcn = lc->next;
5018 		free(lc);
5019 	}
5020 	for (dmc=damagedchunks ; dmc ; dmc=dmcn) {
5021 		dmcn = dmc->next;
5022 		free(dmc);
5023 	}
5024 }
5025 
5026 int hdd_size_parse(const char *str,uint64_t *ret) {
5027 	uint64_t val,frac,fracdiv;
5028 	double drval,mult;
5029 	int f;
5030 	val=0;
5031 	frac=0;
5032 	fracdiv=1;
5033 	f=0;
5034 	while (*str>='0' && *str<='9') {
5035 		f=1;
5036 		val*=10;
5037 		val+=(*str-'0');
5038 		str++;
5039 	}
5040 	if (*str=='.') {	// accept format ".####" (without 0)
5041 		str++;
5042 		while (*str>='0' && *str<='9') {
5043 			fracdiv*=10;
5044 			frac*=10;
5045 			frac+=(*str-'0');
5046 			str++;
5047 		}
5048 		if (fracdiv==1) {	// if there was '.' expect number afterwards
5049 			return -1;
5050 		}
5051 	} else if (f==0) {	// but not empty string
5052 		return -1;
5053 	}
5054 	if (str[0]=='\0' || (str[0]=='B' && str[1]=='\0')) {
5055 		mult=1.0;
5056 	} else if (str[0]!='\0' && (str[1]=='\0' || (str[1]=='B' && str[2]=='\0'))) {
5057 		switch(str[0]) {
5058 		case 'k':
5059 			mult=1e3;
5060 			break;
5061 		case 'M':
5062 			mult=1e6;
5063 			break;
5064 		case 'G':
5065 			mult=1e9;
5066 			break;
5067 		case 'T':
5068 			mult=1e12;
5069 			break;
5070 		case 'P':
5071 			mult=1e15;
5072 			break;
5073 		case 'E':
5074 			mult=1e18;
5075 			break;
5076 		default:
5077 			return -1;
5078 		}
5079 	} else if (str[0]!='\0' && str[1]=='i' && (str[2]=='\0' || (str[2]=='B' && str[3]=='\0'))) {
5080 		switch(str[0]) {
5081 		case 'K':
5082 			mult=1024.0;
5083 			break;
5084 		case 'M':
5085 			mult=1048576.0;
5086 			break;
5087 		case 'G':
5088 			mult=1073741824.0;
5089 			break;
5090 		case 'T':
5091 			mult=1099511627776.0;
5092 			break;
5093 		case 'P':
5094 			mult=1125899906842624.0;
5095 			break;
5096 		case 'E':
5097 			mult=1152921504606846976.0;
5098 			break;
5099 		default:
5100 			return -1;
5101 		}
5102 	} else {
5103 		return -1;
5104 	}
5105 	drval = round(((double)frac/(double)fracdiv+(double)val)*mult);
5106 	if (drval>18446744073709551615.0) {
5107 		return -2;
5108 	} else {
5109 		*ret = drval;
5110 	}
5111 	return 1;
5112 }
5113 
5114 int hdd_parseline(char *hddcfgline) {
5115 	uint32_t l,p;
5116 	int lfd,td,im,bm;
5117 	int mfd;
5118 	char *pptr;
5119 	char *lockfname;
5120 	char *metaidfname;
5121 	struct stat sb;
5122 	folder *f;
5123 	uint8_t lockneeded;
5124 	uint8_t cannotbeused;
5125 	uint64_t limit;
5126 	uint64_t metaid;
5127 	uint8_t lmode;
5128 
5129 	if (hddcfgline[0]=='#') {
5130 		return 0;
5131 	}
5132 	l = strlen(hddcfgline);
5133 	while (l>0 && (hddcfgline[l-1]=='\r' || hddcfgline[l-1]=='\n' || hddcfgline[l-1]==' ' || hddcfgline[l-1]=='\t')) {
5134 		l--;
5135 	}
5136 	if (l==0) {
5137 		return 0;
5138 	}
5139 	hddcfgline[l]='\0';
5140 	p = l;
5141 	while (p>0 && hddcfgline[p-1]!=' ' && hddcfgline[p-1]!='\t') {
5142 		p--;
5143 	}
5144 	lmode = 0;
5145 	if (p>0) {
5146 		if (hddcfgline[p]=='-') {
5147 			if (hdd_size_parse(hddcfgline+p+1,&limit)>=0) {
5148 				lmode = 1;
5149 			} else {
5150 				mfs_arg_syslog(LOG_WARNING,"size parse error, data: %s",hddcfgline+p);
5151 			}
5152 		} if ((hddcfgline[p]>='0' && hddcfgline[p]<='9') || hddcfgline[p]=='.') {
5153 			if (hdd_size_parse(hddcfgline+p,&limit)>=0) {
5154 				lmode = 2;
5155 			} else {
5156 				mfs_arg_syslog(LOG_WARNING,"size parse error, data: %s",hddcfgline+p);
5157 			}
5158 		}
5159 		if (lmode) {
5160 			l = p;
5161 			while (l>0 && (hddcfgline[l-1]==' ' || hddcfgline[l-1]=='\t')) {
5162 				l--;
5163 			}
5164 			if (l==0) {
5165 				return 0;
5166 			}
5167 		}
5168 	}
5169 	if (hddcfgline[l-1]!='/') {
5170 		hddcfgline[l]='/';
5171 		hddcfgline[l+1]='\0';
5172 		l++;
5173 	} else {
5174 		hddcfgline[l]='\0';
5175 	}
5176 	td = 0;
5177 	im = 0;
5178 	bm = REBALANCE_STD;
5179 	pptr = hddcfgline;
5180 	while (1) {
5181 		if (*pptr == '*') {
5182 			td = 1;
5183 		} else if (*pptr == '!') {
5184 			im = 1;
5185 		} else if (*pptr == '>') {
5186 			bm = REBALANCE_FORCE_DST;
5187 		} else if (*pptr == '<') {
5188 			bm = REBALANCE_FORCE_SRC;
5189 		} else {
5190 			break;
5191 		}
5192 		l--;
5193 		pptr++;
5194 	}
5195 
5196 	zassert(pthread_mutex_lock(&folderlock));
5197 	lockneeded = 1;
5198 	cannotbeused = 0;
5199 	for (f=folderhead ; f && lockneeded ; f=f->next) {
5200 		if (strcmp(f->path,pptr)==0) {
5201 			if (f->toremove==1) {
5202 				cannotbeused = 1;
5203 			} else {
5204 				lockneeded = 0;
5205 			}
5206 		}
5207 	}
5208 	zassert(pthread_mutex_unlock(&folderlock));
5209 
5210 	if (cannotbeused) {
5211 		mfs_arg_syslog(LOG_WARNING,"hdd space manager: drive '%s' is being removed and can not be added again while removing is in progress - try it again in couple of seconds",pptr);
5212 		return -1;
5213 	}
5214 
5215 	if (lmode==1) { // sanity checks
5216 		if (limit<0x4000000) {
5217 			mfs_arg_syslog(LOG_WARNING,"hdd space manager: limit on '%s' < chunk size - leaving so small space on hdd is not recommended",pptr);
5218 		} else {
5219 			struct statvfs fsinfo;
5220 
5221 			if (statvfs(pptr,&fsinfo)<0) {
5222 				mfs_arg_errlog(LOG_NOTICE,"hdd space manager: statvfs on '%s'",pptr);
5223 			} else {
5224 				uint64_t size = (uint64_t)(fsinfo.f_frsize)*(uint64_t)(fsinfo.f_blocks-(fsinfo.f_bfree-fsinfo.f_bavail));
5225 				if (limit > size) {
5226 					mfs_arg_syslog(LOG_WARNING,"hdd space manager: space to be left free on '%s' (%"PRIu64") is greater than real volume size (%"PRIu64") !!!",pptr,limit,size);
5227 				}
5228 			}
5229 		}
5230 	}
5231 	if (lmode==2) { // sanity checks
5232 		if (limit==0) {
5233 			mfs_arg_syslog(LOG_WARNING,"hdd space manager: limit on '%s' set to zero - using real volume size",pptr);
5234 			lmode = 0;
5235 		} else {
5236 			struct statvfs fsinfo;
5237 
5238 			if (statvfs(pptr,&fsinfo)<0) {
5239 				mfs_arg_errlog(LOG_NOTICE,"hdd space manager: statvfs on '%s'",pptr);
5240 			} else {
5241 				uint64_t size = (uint64_t)(fsinfo.f_frsize)*(uint64_t)(fsinfo.f_blocks-(fsinfo.f_bfree-fsinfo.f_bavail));
5242 				if (limit > size) {
5243 					mfs_arg_syslog(LOG_WARNING,"hdd space manager: limit on '%s' (%"PRIu64") is greater than real volume size (%"PRIu64") - using real volume size",pptr,limit,size);
5244 					lmode = 0;
5245 				}
5246 			}
5247 		}
5248 	}
5249 
5250 	metaid = masterconn_getmetaid();
5251 	metaidfname = (char*)malloc(l+8);
5252 	passert(metaidfname);
5253 	memcpy(metaidfname,pptr,l);
5254 	memcpy(metaidfname+l,".metaid",8);
5255 	mfd = open(metaidfname,O_RDONLY);
5256 	if (mfd>=0) {
5257 		uint64_t filemetaid;
5258 		uint8_t buff[8];
5259 		const uint8_t *rptr;
5260 		if (read(mfd,buff,8)==8) {
5261 			rptr = buff;
5262 			filemetaid = get64bit(&rptr);
5263 			if (filemetaid!=metaid) {
5264 				if (metaid>0) {
5265 					if (im==0) {
5266 						mfs_arg_syslog(LOG_ERR,"hdd space manager: wrong meta id in file '%s' (0x%016"PRIX64",expected:0x%016"PRIX64") - shouldn't use this drive - use '!' in drive definition to ignore this (dangerous)",metaidfname,filemetaid,metaid);
5267 					} else {
5268 						mfs_arg_syslog(LOG_ERR,"hdd space manager: wrong meta id in file '%s' (0x%016"PRIX64",expected:0x%016"PRIX64") - forced to use this drive",metaidfname,filemetaid,metaid);
5269 					}
5270 				} else {
5271 					if (im==0) {
5272 						mfs_arg_syslog(LOG_ERR,"hdd space manager: chunkserver without meta id shouldn't use drive with defined meta id (file: '%s') - use '!' in drive definition to ignore this (dangerous)",metaidfname);
5273 					} else {
5274 						mfs_arg_syslog(LOG_ERR,"hdd space manager: chunkserver without meta id shouldn't use drive with defined meta id (file: '%s') - forced to ignore",metaidfname);
5275 					}
5276 				}
5277 				close(mfd);
5278 				free(metaidfname);
5279 				if (im==0) {
5280 					return -1;
5281 				}
5282 			}
5283 			metaid = 0; // file exists and is correct (or forced do be ignored), so do not re create it
5284 		}
5285 		close(mfd);
5286 	}
5287 	free(metaidfname);
5288 	lockfname = (char*)malloc(l+6);
5289 	passert(lockfname);
5290 	memcpy(lockfname,pptr,l);
5291 	memcpy(lockfname+l,".lock",6);
5292 	lfd = open(lockfname,O_RDWR|O_CREAT|O_TRUNC,0640);
5293 	if (lfd<0 && errno==EROFS && td) {
5294 		lfd = open(lockfname,O_RDONLY); // prevents umounting
5295 		free(lockfname);
5296 		td = 2;
5297 	} else {
5298 		if (lfd<0) {
5299 			mfs_arg_errlog(LOG_ERR,"hdd space manager: can't create lock file '%s'",lockfname);
5300 			free(lockfname);
5301 			return -1;
5302 		}
5303 		if (lockneeded && lockf(lfd,F_TLOCK,0)<0) {
5304 			if (ERRNO_ERROR) {
5305 				mfs_arg_errlog(LOG_NOTICE,"hdd space manager: lockf '%s' error",lockfname);
5306 			} else {
5307 				mfs_arg_syslog(LOG_ERR,"hdd space manager: data folder '%s' already locked (used by another process)",pptr);
5308 			}
5309 			free(lockfname);
5310 			close(lfd);
5311 			return -1;
5312 		}
5313 		if (fstat(lfd,&sb)<0) {
5314 			mfs_arg_errlog(LOG_NOTICE,"hdd space manager: fstat '%s' error",lockfname);
5315 			free(lockfname);
5316 			close(lfd);
5317 			return -1;
5318 		}
5319 		free(lockfname);
5320 		if (lockneeded) {
5321 			zassert(pthread_mutex_lock(&folderlock));
5322 			for (f=folderhead ; f ; f=f->next) {
5323 				if (f->devid==sb.st_dev) {
5324 					if (f->lockinode==sb.st_ino) {
5325 						mfs_arg_syslog(LOG_ERR,"hdd space manager: data folders '%s' and '%s have the same lockfile !!!",pptr,f->path);
5326 						zassert(pthread_mutex_unlock(&folderlock));
5327 						close(lfd);
5328 						return -1;
5329 					} else {
5330 						mfs_arg_syslog(LOG_WARNING,"hdd space manager: data folders '%s' and '%s' are on the same physical device (could lead to unexpected behaviours)",pptr,f->path);
5331 					}
5332 				}
5333 			}
5334 			zassert(pthread_mutex_unlock(&folderlock));
5335 		}
5336 	}
5337 	if (im==0 && metaid>0) {
5338 		metaidfname = (char*)malloc(l+8);
5339 		passert(metaidfname);
5340 		memcpy(metaidfname,pptr,l);
5341 		memcpy(metaidfname+l,".metaid",8);
5342 		mfd = open(metaidfname,O_RDWR|O_CREAT|O_TRUNC,0640);
5343 		if (mfd>=0) {
5344 			uint8_t buff[8];
5345 			uint8_t *wptr;
5346 			wptr = buff;
5347 			put64bit(&wptr,metaid);
5348 			if (write(mfd,buff,8)!=8) {
5349 				mfs_errlog(LOG_WARNING,"hdd space manager: error writing meta id file");
5350 			}
5351 			close(mfd);
5352 		} else {
5353 			mfs_errlog(LOG_WARNING,"hdd space manager: error writing meta id file");
5354 		}
5355 		free(metaidfname);
5356 	}
5357 	zassert(pthread_mutex_lock(&folderlock));
5358 	for (f=folderhead ; f ; f=f->next) {
5359 		if (strcmp(f->path,pptr)==0) {
5360 			if (f->toremove==2) {
5361 				f->toremove = 0;
5362 			}
5363 			if (lmode==1) {
5364 				f->leavefree = limit;
5365 			} else {
5366 				f->leavefree = LeaveFree;
5367 			}
5368 			if (lmode==2) {
5369 				f->sizelimit = limit;
5370 			} else {
5371 				f->sizelimit = 0;
5372 			}
5373 			if (f->damaged) {
5374 				f->scanstate = SCST_SCANNEEDED;
5375 				f->scanprogress = 0;
5376 				f->damaged = 0;
5377 				f->avail = 0ULL;
5378 				f->total = 0ULL;
5379 				if (f->chunktab) {
5380 					free(f->chunktab);
5381 				}
5382 				f->chunkcount = 0;
5383 				f->chunktabsize = 0;
5384 				f->chunktab = NULL;
5385 				hdd_stats_clear(&(f->cstat));
5386 				for (l=0 ; l<STATSHISTORY ; l++) {
5387 					hdd_stats_clear(&(f->stats[l]));
5388 				}
5389 				f->statspos = 0;
5390 				for (l=0 ; l<LASTERRSIZE ; l++) {
5391 					f->lasterrtab[l].chunkid = 0ULL;
5392 					f->lasterrtab[l].timestamp = 0;
5393 					f->lasterrtab[l].monotonic_time = 0.0;
5394 					f->lasterrtab[l].errornumber = 0;
5395 				}
5396 				f->lasterrindx = 0;
5397 				f->lastrefresh = 0.0;
5398 				f->needrefresh = 1;
5399 			} else {
5400 				if ((f->todel==0 && td>0) || (f->todel>0 && td==0)) {
5401 					// the change is important - chunks need to be send to master again
5402 					f->scanstate = SCST_SENDNEEDED;
5403 				}
5404 			}
5405 			f->todel = td;
5406 			f->balancemode = bm;
5407 			zassert(pthread_mutex_unlock(&folderlock));
5408 			if (lfd>=0) {
5409 				close(lfd);
5410 			}
5411 			return 1;
5412 		}
5413 	}
5414 	f = (folder*)malloc(sizeof(folder));
5415 	passert(f);
5416 	f->todel = td;
5417 	f->balancemode = bm;
5418 	f->damaged = 0;
5419 	f->scanstate = SCST_SCANNEEDED;
5420 	f->scanprogress = 0;
5421 	f->path = strdup(pptr);
5422 	passert(f->path);
5423 	f->toremove = 0;
5424 	if (lmode==1) {
5425 		f->leavefree = limit;
5426 	} else {
5427 		f->leavefree = LeaveFree;
5428 	}
5429 	if (lmode==2) {
5430 		f->sizelimit = limit;
5431 	} else {
5432 		f->sizelimit = 0;
5433 	}
5434 	f->avail = 0ULL;
5435 	f->total = 0ULL;
5436 	f->chunkcount = 0;
5437 	f->chunktabsize = 0;
5438 	f->chunktab = NULL;
5439 	hdd_stats_clear(&(f->cstat));
5440 	for (l=0 ; l<STATSHISTORY ; l++) {
5441 		hdd_stats_clear(&(f->stats[l]));
5442 	}
5443 	f->statspos = 0;
5444 	for (l=0 ; l<LASTERRSIZE ; l++) {
5445 		f->lasterrtab[l].chunkid = 0ULL;
5446 		f->lasterrtab[l].timestamp = 0;
5447 		f->lasterrtab[l].monotonic_time = 0.0;
5448 		f->lasterrtab[l].errornumber = 0;
5449 	}
5450 	f->lasterrindx = 0;
5451 	f->lastrefresh = 0.0;
5452 	f->needrefresh = 1;
5453 	f->devid = sb.st_dev;
5454 	f->lockinode = sb.st_ino;
5455 	f->lfd = lfd;
5456 	f->testhead = NULL;
5457 	f->testtail = &(f->testhead);
5458 //	f->carry = (double)(random()&0x7FFFFFFF)/(double)(0x7FFFFFFF);
5459 	f->read_dist = 0;
5460 	f->write_dist = 0;
5461 	f->read_first = 1;
5462 	f->write_first = 1;
5463 	f->read_corr = 0.0;
5464 	f->write_corr = 0.0;
5465 	f->rebalance_in_progress = 0;
5466 	f->rebalance_last_usec = 0;
5467 	f->next = folderhead;
5468 	folderhead = f;
5469 	testerreset = 1;
5470 	zassert(pthread_mutex_unlock(&folderlock));
5471 	return 2;
5472 }
5473 
5474 int hdd_folders_reinit(void) {
5475 	folder *f;
5476 	FILE *fd;
5477 	char buff[1000];
5478 	char *hddfname;
5479 	int ret,datadef;
5480 
5481 	if (!cfg_isdefined("HDD_CONF_FILENAME")) {
5482 		hddfname = strdup(ETC_PATH "/mfs/mfshdd.cfg");
5483 		passert(hddfname);
5484 		fd = fopen(hddfname,"r");
5485 		if (!fd) {
5486 			free(hddfname);
5487 			hddfname = strdup(ETC_PATH "/mfshdd.cfg");
5488 			fd = fopen(hddfname,"r");
5489 			if (fd) {
5490 				mfs_syslog(LOG_WARNING,"default sysconf path has changed - please move mfshdd.cfg from "ETC_PATH"/ to "ETC_PATH"/mfs/");
5491 			}
5492 		}
5493 	} else {
5494 		hddfname = cfg_getstr("HDD_CONF_FILENAME",ETC_PATH "/mfs/mfshdd.cfg");
5495 		fd = fopen(hddfname,"r");
5496 	}
5497 
5498 	if (!fd) {
5499 		free(hddfname);
5500 		return -1;
5501 	}
5502 
5503 	ret = 0;
5504 
5505 	zassert(pthread_mutex_lock(&folderlock));
5506 	folderactions = 0; // stop folder actions
5507 	for (f=folderhead ; f ; f=f->next) {
5508 		if (f->toremove==0) {
5509 			f->toremove = 2;
5510 		}
5511 	}
5512 	zassert(pthread_mutex_unlock(&folderlock));
5513 
5514 	while (fgets(buff,999,fd)) {
5515 		buff[999] = 0;
5516 		if (hdd_parseline(buff)<0) {
5517 			ret = -1;
5518 		}
5519 
5520 	}
5521 	fclose(fd);
5522 
5523 	zassert(pthread_mutex_lock(&folderlock));
5524 	datadef = 0;
5525 	for (f=folderhead ; f ; f=f->next) {
5526 		if (f->toremove==0) {
5527 			datadef = 1;
5528 			if (f->scanstate==SCST_SCANNEEDED) {
5529 				syslog(LOG_NOTICE,"hdd space manager: folder %s will be scanned",f->path);
5530 			} else if (f->scanstate==SCST_SENDNEEDED) {
5531 				syslog(LOG_NOTICE,"hdd space manager: folder %s will be resend",f->path);
5532 			} else {
5533 				syslog(LOG_NOTICE,"hdd space manager: folder %s didn't change",f->path);
5534 			}
5535 		} else {
5536 			f->damaged = 0;
5537 			syslog(LOG_NOTICE,"hdd space manager: folder %s will be removed",f->path);
5538 		}
5539 	}
5540 	folderactions = 1; // continue folder actions
5541 	zassert(pthread_mutex_unlock(&folderlock));
5542 
5543 	if (datadef==0) {
5544 		mfs_arg_syslog(LOG_ERR,"hdd space manager: no hdd space defined in %s file",hddfname);
5545 		ret = -1;
5546 	}
5547 
5548 	free(hddfname);
5549 
5550 	return ret;
5551 }
5552 
5553 void hdd_info(void) {
5554 	hdd_open_files_handle(OF_INFO);
5555 }
5556 
5557 void hdd_reload(void) {
5558 	char *LeaveFreeStr;
5559 
5560 	zassert(pthread_mutex_lock(&folderlock));
5561 	HDDErrorCount = cfg_getuint32("HDD_ERROR_TOLERANCE_COUNT",2);
5562 	if (HDDErrorCount<1) {
5563 		syslog(LOG_NOTICE,"hdd space manager: error tolerance count too small - changed to 1");
5564 		HDDErrorCount = 1;
5565 	} else if (HDDErrorCount>10) {
5566 		syslog(LOG_NOTICE,"hdd space manager: error tolerance count too big - changed to 10");
5567 		HDDErrorCount = 10;
5568 	}
5569 	HDDErrorTime = cfg_getuint32("HDD_ERROR_TOLERANCE_PERIOD",600);
5570 	if (HDDErrorTime<10) {
5571 		syslog(LOG_NOTICE,"hdd space manager: error tolerance period too small - changed to 10 seconds");
5572 		HDDErrorTime = 10;
5573 	} else if (HDDErrorTime>86400) {
5574 		syslog(LOG_NOTICE,"hdd space manager: error tolerance period too big - changed to 86400 seconds (1 day)");
5575 		HDDErrorTime = 86400;
5576 	}
5577 	zassert(pthread_mutex_unlock(&folderlock));
5578 	zassert(pthread_mutex_lock(&testlock));
5579 	HDDTestFreq = cfg_getuint32("HDD_TEST_FREQ",10);
5580 	HDDRebalancePerc = cfg_getuint32("HDD_REBALANCE_UTILIZATION",20);
5581 	if (HDDRebalancePerc>100) {
5582 		HDDRebalancePerc=100;
5583 	}
5584 	zassert(pthread_mutex_unlock(&testlock));
5585 	zassert(pthread_mutex_lock(&doplock));
5586 	DoFsyncBeforeClose = cfg_getuint8("HDD_FSYNC_BEFORE_CLOSE",0);
5587 	zassert(pthread_mutex_unlock(&doplock));
5588 
5589 	LeaveFreeStr = cfg_getstr("HDD_LEAVE_SPACE_DEFAULT","256MiB");
5590 	if (hdd_size_parse(LeaveFreeStr,&LeaveFree)<0) {
5591 		syslog(LOG_NOTICE,"hdd space manager: HDD_LEAVE_SPACE_DEFAULT parse error - left unchanged");
5592 	}
5593 	free(LeaveFreeStr);
5594 	if (LeaveFree<0x4000000) {
5595 		syslog(LOG_NOTICE,"hdd space manager: HDD_LEAVE_SPACE_DEFAULT < chunk size - leaving so small space on hdd is not recommended");
5596 	}
5597 
5598 	syslog(LOG_NOTICE,"reloading hdd data ...");
5599 	hdd_folders_reinit();
5600 }
5601 
5602 int hdd_late_init(void) {
5603 	zassert(pthread_mutex_lock(&termlock));
5604 	term = 0;
5605 	zassert(pthread_mutex_unlock(&termlock));
5606 
5607 	zassert(main_minthread_create(&testerthread,0,hdd_tester_thread,NULL));
5608 	zassert(main_minthread_create(&foldersthread,0,hdd_folders_thread,NULL));
5609 	zassert(main_minthread_create(&rebalancethread,0,hdd_rebalance_thread,NULL));
5610 	zassert(main_minthread_create(&delayedthread,0,hdd_delayed_thread,NULL));
5611 	return 0;
5612 }
5613 
5614 int hdd_init(void) {
5615 	uint32_t hp;
5616 	folder *f;
5617 	char *LeaveFreeStr;
5618 
5619 	// this routine is called at the beginning from the main thread so no locks are necessary here
5620 	for (hp=0 ; hp<HASHSIZE ; hp++) {
5621 		hashtab[hp] = NULL;
5622 	}
5623 	for (hp=0 ; hp<DHASHSIZE ; hp++) {
5624 		dophashtab[hp] = NULL;
5625 	}
5626 
5627 #ifndef PRESERVE_BLOCK
5628 	zassert(pthread_key_create(&hdrbufferkey,free));
5629 # ifdef MMAP_ALLOC
5630 	zassert(pthread_key_create(&blockbufferkey,hdd_blockbuffer_free));
5631 # else
5632 	zassert(pthread_key_create(&blockbufferkey,free));
5633 # endif
5634 #endif /* PRESERVE_BLOCK */
5635 
5636 	emptyblockcrc = mycrc32_zeroblock(0,MFSBLOCKSIZE);
5637 
5638 	LeaveFreeStr = cfg_getstr("HDD_LEAVE_SPACE_DEFAULT","256MiB");
5639 	if (hdd_size_parse(LeaveFreeStr,&LeaveFree)<0) {
5640 		fprintf(stderr,"hdd space manager: HDD_LEAVE_SPACE_DEFAULT parse error - using default (256MiB)\n");
5641 		LeaveFree = 0x10000000;
5642 	}
5643 	free(LeaveFreeStr);
5644 	if (LeaveFree<0x4000000) {
5645 		fprintf(stderr,"hdd space manager: HDD_LEAVE_SPACE_DEFAULT < chunk size - leaving so small space on hdd is not recommended\n");
5646 	}
5647 
5648 	if (hdd_folders_reinit()<0) {
5649 		return -1;
5650 	}
5651 
5652 	hdd_open_files_handle(OF_INIT);
5653 
5654 	zassert(pthread_mutex_lock(&folderlock));
5655 	for (f=folderhead ; f ; f=f->next) {
5656 		fprintf(stderr,"hdd space manager: path to scan: %s\n",f->path);
5657 	}
5658 	zassert(pthread_mutex_unlock(&folderlock));
5659 	fprintf(stderr,"hdd space manager: start background hdd scanning (searching for available chunks)\n");
5660 
5661 	HDDErrorCount = cfg_getuint32("HDD_ERROR_TOLERANCE_COUNT",2);
5662 	if (HDDErrorCount<1) {
5663 		fprintf(stderr,"hdd space manager: error tolerance count too small - changed to 1\n");
5664 		HDDErrorCount = 2;
5665 	} else if (HDDErrorCount>10) {
5666 		fprintf(stderr,"hdd space manager: error tolerance count too big - changed to 10\n");
5667 		HDDErrorCount = 10;
5668 	}
5669 	HDDErrorTime = cfg_getuint32("HDD_ERROR_TOLERANCE_PERIOD",600);
5670 	if (HDDErrorTime<10) {
5671 		fprintf(stderr,"hdd space manager: error tolerance period too small - changed to 10 seconds\n");
5672 		HDDErrorTime = 10;
5673 	} else if (HDDErrorTime>86400) {
5674 		fprintf(stderr,"hdd space manager: error tolerance period too big - changed to 86400 seconds (1 day)\n");
5675 		HDDErrorTime = 86400;
5676 	}
5677 	HDDTestFreq = cfg_getuint32("HDD_TEST_FREQ",10);
5678 	HDDRebalancePerc = cfg_getuint32("HDD_REBALANCE_UTILIZATION",20);
5679 	if (HDDRebalancePerc>100) {
5680 		HDDRebalancePerc=100;
5681 	}
5682 
5683 	main_reload_register(hdd_reload);
5684 	main_time_register(60,0,hdd_diskinfo_movestats);
5685 	main_destruct_register(hdd_term);
5686 	main_info_register(hdd_info);
5687 
5688 	zassert(pthread_mutex_lock(&termlock));
5689 	term = 1;
5690 	zassert(pthread_mutex_unlock(&termlock));
5691 
5692 	return 0;
5693 }
5694