1 /*
2 * Copyright (C) 2016 Jakub Kruszona-Zawadzki, Core Technology Sp. z o.o.
3 *
4 * This file is part of MooseFS.
5 *
6 * MooseFS is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, version 2 (only).
9 *
10 * MooseFS is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with MooseFS; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301, USA
18 * or visit http://www.gnu.org/licenses/gpl-2.0.html
19 */
20
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24
25 #define MMAP_ALLOC 1
26
27 // #include <execinfo.h> // for backtrace - debugs only
28 #include <inttypes.h>
29 #include <syslog.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <string.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/statvfs.h>
38 #include <sys/time.h>
39 #include <sys/resource.h>
40 #include <time.h>
41 #include <dirent.h>
42 #include <errno.h>
43 #include <limits.h>
44 #include <math.h>
45 #include <pthread.h>
46 #ifdef MMAP_ALLOC
47 #include <sys/mman.h>
48 #endif
49
50 #include "MFSCommunication.h"
51 #include "cfg.h"
52 #include "datapack.h"
53 #include "crc.h"
54 #include "main.h"
55 #include "masterconn.h"
56 #include "slogger.h"
57 #include "massert.h"
58 #include "random.h"
59 #include "clocks.h"
60 #include "portable.h"
61 #include "sockets.h"
62
63 #define PRESERVE_BLOCK 1
64
65 #if defined(HAVE_PREAD) && defined(HAVE_PWRITE)
66 #define USE_PIO 1
67 #endif
68
69 /* usec's to wait after last rebalance before choosing disk for new chunk */
70 #define REBALANCE_GRACE_PERIOD 10000000
71
72 #define REBALANCE_TOTAL_MIN 1000000000
73 #define REBALANCE_DST_MAX_USAGE 0.99
74 #define REBALANCE_DIFF_MAX 0.01
75
76 /* system every DELAYEDSTEP seconds searches opened/crc_loaded chunk list for chunks to be closed/free crc */
77 #define DELAYEDSTEP 1
78
79 #define OPEN_DELAY 5
80 #define CRC_DELAY 100
81
82 #ifdef PRESERVE_BLOCK
83 #define BLOCK_DELAY 10
84 #endif
85
86 #define LOSTCHUNKSBLOCKSIZE 1024
87 #define NEWCHUNKSBLOCKSIZE 4096
88
89 #define CHUNKHDRSIZE (1024+4*1024)
90 #define CHUNKHDRCRC 1024
91
92 #define STATSHISTORY (24*60)
93
94 #define LASTERRSIZE 30
95
96 #define RANDOM_CHUNK_RETRIES 50
97
98 #define HASHSIZE (0x1000000)
99 #define HASHPOS(chunkid) ((chunkid)&0xFFFFFF)
100
101 #define DHASHSIZE 64
102 #define DHASHPOS(chunkid) ((chunkid)&0x3F)
103
104 #define CH_NEW_NONE 0
105 #define CH_NEW_AUTO 1
106 #define CH_NEW_EXCLUSIVE 2
107
108 #define CHUNKLOCKED ((void*)1)
109
110 typedef struct damagedchunk {
111 uint64_t chunkid;
112 struct damagedchunk *next;
113 } damagedchunk;
114
115 typedef struct lostchunk {
116 uint64_t chunkidblock[LOSTCHUNKSBLOCKSIZE];
117 uint32_t chunksinblock;
118 struct lostchunk *next;
119 } lostchunk;
120
121 typedef struct newchunk {
122 uint64_t chunkidblock[NEWCHUNKSBLOCKSIZE];
123 uint32_t versionblock[NEWCHUNKSBLOCKSIZE];
124 uint32_t chunksinblock;
125 struct newchunk *next;
126 } newchunk;
127
128 typedef struct dopchunk {
129 uint64_t chunkid;
130 struct dopchunk *next;
131 } dopchunk;
132
133 struct folder;
134
135 typedef struct ioerror {
136 uint64_t chunkid;
137 uint32_t timestamp;
138 double monotonic_time;
139 int errornumber;
140 } ioerror;
141
142 typedef struct _cntcond {
143 pthread_cond_t cond;
144 uint32_t wcnt;
145 struct _cntcond *next;
146 } cntcond;
147
148 typedef struct chunk {
149 char *filename;
150 uint64_t chunkid;
151 struct folder *owner;
152 uint32_t ownerindx;
153 uint32_t version;
154 uint16_t blocks;
155 uint16_t crcrefcount;
156 double opento;
157 double crcto;
158 unsigned crcchanged:1;
159 unsigned fsyncneeded:1;
160 #define CH_AVAIL 0
161 #define CH_LOCKED 1
162 #define CH_DELETED 2
163 #define CH_TOBEDELETED 3
164 unsigned state:2; // CH_AVAIL,CH_LOCKED,CH_DELETED
165 cntcond *ccond;
166 uint8_t *crc;
167 int fd;
168
169 #ifdef PRESERVE_BLOCK
170 double blockto;
171 uint8_t *block;
172 uint16_t blockno; // 0xFFFF == invalid
173 #endif
174 uint8_t validattr;
175 uint8_t todel;
176 // uint32_t testtime; // at start use max(atime,mtime) then every operation set it to current time
177 struct chunk *testnext,**testprev;
178 struct chunk *next;
179 } chunk;
180
181 typedef struct hddstats {
182 uint64_t rbytes;
183 uint64_t wbytes;
184 uint64_t nsecreadsum;
185 uint64_t nsecwritesum;
186 uint64_t nsecfsyncsum;
187 uint32_t rops;
188 uint32_t wops;
189 uint32_t fsyncops;
190 uint32_t nsecreadmax;
191 uint32_t nsecwritemax;
192 uint32_t nsecfsyncmax;
193 } hddstats;
194
195 typedef struct folder {
196 char *path;
197 #define SCST_SCANNEEDED 0
198 #define SCST_SCANINPROGRESS 1
199 #define SCST_SCANTERMINATE 2
200 #define SCST_SCANFINISHED 3
201 #define SCST_SENDNEEDED 4
202 #define SCST_WORKING 5
203 unsigned int scanstate:3;
204 unsigned int needrefresh:1;
205 unsigned int todel:2;
206 #define REBALANCE_STD 0
207 #define REBALANCE_FORCE_SRC 1
208 #define REBALANCE_FORCE_DST 2
209 unsigned int balancemode:2;
210 unsigned int damaged:1;
211 unsigned int toremove:2;
212 #define REBALANCE_NONE 0
213 #define REBALANCE_SRC 1
214 #define REBALANCE_DST 2
215 unsigned int tmpbalancemode:2;
216 uint8_t scanprogress;
217 uint64_t sizelimit;
218 uint64_t leavefree;
219 uint64_t avail;
220 uint64_t total;
221 hddstats cstat;
222 hddstats stats[STATSHISTORY];
223 uint32_t statspos;
224 ioerror lasterrtab[LASTERRSIZE];
225 struct chunk **chunktab;
226 uint32_t chunkcount;
227 uint32_t chunktabsize;
228 uint32_t lasterrindx;
229 double lastrefresh;
230 dev_t devid;
231 ino_t lockinode;
232 int lfd;
233 double read_corr;
234 double write_corr;
235 uint32_t read_dist;
236 uint32_t write_dist;
237 uint8_t read_first;
238 uint8_t write_first;
239 uint8_t rebalance_in_progress;
240 uint64_t rebalance_last_usec;
241 // double carry;
242 pthread_t scanthread;
243 struct chunk *testhead,**testtail;
244 struct folder *next;
245 } folder;
246
247 /*
248 typedef struct damaged {
249 char *path;
250 uint64_t avail;
251 uint64_t total;
252 ioerror lasterror;
253 uint32_t chunkcount;
254 struct damaged_disk *next;
255 } damaged;
256 */
257
258 static uint32_t HDDTestFreq = 10;
259 static uint32_t HDDRebalancePerc = 20;
260 static uint32_t HDDErrorCount = 2;
261 static uint32_t HDDErrorTime = 600;
262 static uint64_t LeaveFree;
263 static uint8_t DoFsyncBeforeClose = 0;
264
265 /* folders data */
266 static folder *folderhead = NULL;
267
268 /* chunk hash */
269 static chunk* hashtab[HASHSIZE];
270
271 /* extra chunk info */
272 static dopchunk *dophashtab[DHASHSIZE];
273 //static dopchunk *dopchunks = NULL;
274 static dopchunk *newdopchunks = NULL;
275
276 // master reports
277 static damagedchunk *damagedchunks = NULL;
278 static lostchunk *lostchunks = NULL;
279 static newchunk *newchunks = NULL;
280 static uint32_t errorcounter = 0;
281 static int hddspacechanged = 0;
282
283 static pthread_t rebalancethread,foldersthread,delayedthread,testerthread;
284 static uint8_t term = 0;
285 static uint8_t folderactions = 0;
286 static uint8_t testerreset = 0;
287 static pthread_mutex_t termlock = PTHREAD_MUTEX_INITIALIZER;
288
289 // stats_X
290 static pthread_mutex_t statslock = PTHREAD_MUTEX_INITIALIZER;
291
292 // newdopchunks + dophashtab
293 static pthread_mutex_t doplock = PTHREAD_MUTEX_INITIALIZER;
294 static pthread_mutex_t ndoplock = PTHREAD_MUTEX_INITIALIZER;
295
296 // master reports = damaged chunks, lost chunks, errorcounter, hddspacechanged
297 static pthread_mutex_t dclock = PTHREAD_MUTEX_INITIALIZER;
298
299 // hashtab - only hash tab, chunks have their own separate locks
300 static pthread_mutex_t hashlock = PTHREAD_MUTEX_INITIALIZER;
301 static cntcond *cclist = NULL;
302
303 // folderhead + all data in structures
304 static pthread_mutex_t folderlock = PTHREAD_MUTEX_INITIALIZER;
305
306 // chunk tester
307 static pthread_mutex_t testlock = PTHREAD_MUTEX_INITIALIZER;
308
309 #ifndef PRESERVE_BLOCK
310 static pthread_key_t hdrbufferkey;
311 static pthread_key_t blockbufferkey;
312 #endif
313
314 /*
315 static uint8_t wait_for_scan = 0;
316 static uint32_t scanprogress;
317 static uint8_t scanprogresswaiting;
318 static pthread_cond_t scanprogresscond = PTHREAD_COND_INITIALIZER;
319 */
320
321 static uint32_t emptyblockcrc;
322
323 static uint64_t stats_bytesr = 0;
324 static uint64_t stats_bytesw = 0;
325 static uint32_t stats_opr = 0;
326 static uint32_t stats_opw = 0;
327 static uint32_t stats_databytesr = 0;
328 static uint32_t stats_databytesw = 0;
329 static uint32_t stats_dataopr = 0;
330 static uint32_t stats_dataopw = 0;
331 static uint64_t stats_rtime = 0;
332 static uint64_t stats_wtime = 0;
333
334 static uint32_t stats_create = 0;
335 static uint32_t stats_delete = 0;
336 static uint32_t stats_test = 0;
337 static uint32_t stats_version = 0;
338 static uint32_t stats_duplicate = 0;
339 static uint32_t stats_truncate = 0;
340 static uint32_t stats_duptrunc = 0;
341
hdd_stats_clear(hddstats * r)342 static inline void hdd_stats_clear(hddstats *r) {
343 memset(r,0,sizeof(hddstats));
344 }
345
hdd_stats_add(hddstats * dst,hddstats * src)346 static inline void hdd_stats_add(hddstats *dst,hddstats *src) {
347 dst->rbytes += src->rbytes;
348 dst->wbytes += src->wbytes;
349 dst->nsecreadsum += src->nsecreadsum;
350 dst->nsecwritesum += src->nsecwritesum;
351 dst->nsecfsyncsum += src->nsecfsyncsum;
352 dst->rops += src->rops;
353 dst->wops += src->wops;
354 dst->fsyncops += src->fsyncops;
355 if (src->nsecreadmax>dst->nsecreadmax) {
356 dst->nsecreadmax = src->nsecreadmax;
357 }
358 if (src->nsecwritemax>dst->nsecwritemax) {
359 dst->nsecwritemax = src->nsecwritemax;
360 }
361 if (src->nsecfsyncmax>dst->nsecfsyncmax) {
362 dst->nsecfsyncmax = src->nsecfsyncmax;
363 }
364 }
365
366 /* size: 64 */
hdd_stats_binary_pack(uint8_t ** buff,hddstats * r)367 static inline void hdd_stats_binary_pack(uint8_t **buff,hddstats *r) {
368 put64bit(buff,r->rbytes);
369 put64bit(buff,r->wbytes);
370 put64bit(buff,r->nsecreadsum/1000);
371 put64bit(buff,r->nsecwritesum/1000);
372 put64bit(buff,r->nsecfsyncsum/1000);
373 put32bit(buff,r->rops);
374 put32bit(buff,r->wops);
375 put32bit(buff,r->fsyncops);
376 put32bit(buff,r->nsecreadmax/1000);
377 put32bit(buff,r->nsecwritemax/1000);
378 put32bit(buff,r->nsecfsyncmax/1000);
379 }
380
381 /*
382 void printbacktrace(void) {
383 void* callstack[128];
384 int i, frames = backtrace(callstack, 128);
385 char** strs = backtrace_symbols(callstack, frames);
386 for (i=0 ; i<frames ; ++i) {
387 printf("%s\n", strs[i]);
388 }
389 free(strs);
390 }
391 */
hdd_report_damaged_chunk(uint64_t chunkid)392 void hdd_report_damaged_chunk(uint64_t chunkid) {
393 damagedchunk *dc;
394 zassert(pthread_mutex_lock(&dclock));
395 dc = malloc(sizeof(damagedchunk));
396 passert(dc);
397 dc->chunkid = chunkid;
398 dc->next = damagedchunks;
399 damagedchunks = dc;
400 zassert(pthread_mutex_unlock(&dclock));
401 }
402
hdd_get_damaged_chunk_count(void)403 uint32_t hdd_get_damaged_chunk_count(void) {
404 damagedchunk *dc;
405 uint32_t result;
406 zassert(pthread_mutex_lock(&dclock));
407 result = 0;
408 for (dc=damagedchunks ; dc ; dc=dc->next) {
409 result++;
410 }
411 return result;
412 }
413
hdd_get_damaged_chunk_data(uint8_t * buff)414 void hdd_get_damaged_chunk_data(uint8_t *buff) {
415 damagedchunk *dc,*ndc;
416 uint64_t chunkid;
417 if (buff) {
418 dc = damagedchunks;
419 while (dc) {
420 ndc = dc;
421 dc = dc->next;
422 chunkid = ndc->chunkid;
423 put64bit(&buff,chunkid);
424 free(ndc);
425 }
426 damagedchunks = NULL;
427 }
428 zassert(pthread_mutex_unlock(&dclock));
429 }
430
hdd_report_lost_chunk(uint64_t chunkid)431 void hdd_report_lost_chunk(uint64_t chunkid) {
432 lostchunk *lc;
433 zassert(pthread_mutex_lock(&dclock));
434 if (lostchunks && lostchunks->chunksinblock<LOSTCHUNKSBLOCKSIZE) {
435 lostchunks->chunkidblock[lostchunks->chunksinblock++] = chunkid;
436 } else {
437 lc = malloc(sizeof(lostchunk));
438 passert(lc);
439 lc->chunkidblock[0] = chunkid;
440 lc->chunksinblock = 1;
441 lc->next = lostchunks;
442 lostchunks = lc;
443 }
444 zassert(pthread_mutex_unlock(&dclock));
445 }
446
hdd_get_lost_chunk_count(uint32_t limit)447 uint32_t hdd_get_lost_chunk_count(uint32_t limit) {
448 lostchunk *lc;
449 uint32_t result;
450 zassert(pthread_mutex_lock(&dclock));
451 result = 0;
452 for (lc=lostchunks ; lc ; lc=lc->next) {
453 if (limit>lc->chunksinblock) {
454 limit -= lc->chunksinblock;
455 result += lc->chunksinblock;
456 }
457 }
458 return result;
459 }
460
hdd_get_lost_chunk_data(uint8_t * buff,uint32_t limit)461 void hdd_get_lost_chunk_data(uint8_t *buff,uint32_t limit) {
462 lostchunk *lc,**lcptr;
463 uint64_t chunkid;
464 uint32_t i;
465 if (buff) {
466 lcptr = &lostchunks;
467 while ((lc=*lcptr)) {
468 if (limit>lc->chunksinblock) {
469 for (i=0 ; i<lc->chunksinblock ; i++) {
470 chunkid = lc->chunkidblock[i];
471 put64bit(&buff,chunkid);
472 }
473 limit -= lc->chunksinblock;
474 *lcptr = lc->next;
475 free(lc);
476 } else {
477 lcptr = &(lc->next);
478 }
479 }
480 }
481 zassert(pthread_mutex_unlock(&dclock));
482 }
483
hdd_report_new_chunk(uint64_t chunkid,uint32_t version)484 void hdd_report_new_chunk(uint64_t chunkid,uint32_t version) {
485 newchunk *nc;
486 zassert(pthread_mutex_lock(&dclock));
487 if (newchunks && newchunks->chunksinblock<NEWCHUNKSBLOCKSIZE) {
488 newchunks->chunkidblock[newchunks->chunksinblock] = chunkid;
489 newchunks->versionblock[newchunks->chunksinblock] = version;
490 newchunks->chunksinblock++;
491 } else {
492 nc = malloc(sizeof(newchunk));
493 passert(nc);
494 nc->chunkidblock[0] = chunkid;
495 nc->versionblock[0] = version;
496 nc->chunksinblock = 1;
497 nc->next = newchunks;
498 newchunks = nc;
499 }
500 zassert(pthread_mutex_unlock(&dclock));
501 }
502
hdd_get_new_chunk_count(uint32_t limit)503 uint32_t hdd_get_new_chunk_count(uint32_t limit) {
504 newchunk *nc;
505 uint32_t result;
506 zassert(pthread_mutex_lock(&dclock));
507 result = 0;
508 for (nc=newchunks ; nc ; nc=nc->next) {
509 if (limit>nc->chunksinblock) {
510 limit -= nc->chunksinblock;
511 result += nc->chunksinblock;
512 }
513 }
514 return result;
515 }
516
hdd_get_new_chunk_data(uint8_t * buff,uint32_t limit)517 void hdd_get_new_chunk_data(uint8_t *buff,uint32_t limit) {
518 newchunk *nc,**ncptr;
519 uint64_t chunkid;
520 uint32_t version;
521 uint32_t i;
522 if (buff) {
523 ncptr = &newchunks;
524 while ((nc=*ncptr)) {
525 if (limit>nc->chunksinblock) {
526 for (i=0 ; i<nc->chunksinblock ; i++) {
527 chunkid = nc->chunkidblock[i];
528 version = nc->versionblock[i];
529 put64bit(&buff,chunkid);
530 put32bit(&buff,version);
531 }
532 limit -= nc->chunksinblock;
533 *ncptr = nc->next;
534 free(nc);
535 } else {
536 ncptr = &(nc->next);
537 }
538 }
539 }
540 zassert(pthread_mutex_unlock(&dclock));
541 }
542
hdd_errorcounter(void)543 uint32_t hdd_errorcounter(void) {
544 uint32_t result;
545 zassert(pthread_mutex_lock(&dclock));
546 result = errorcounter;
547 errorcounter = 0;
548 zassert(pthread_mutex_unlock(&dclock));
549 return result;
550 }
551
hdd_spacechanged(void)552 int hdd_spacechanged(void) {
553 uint32_t result;
554 zassert(pthread_mutex_lock(&dclock));
555 result = hddspacechanged;
556 hddspacechanged = 0;
557 zassert(pthread_mutex_unlock(&dclock));
558 return result;
559 }
560
hdd_stats(uint64_t * br,uint64_t * bw,uint32_t * opr,uint32_t * opw,uint32_t * dbr,uint32_t * dbw,uint32_t * dopr,uint32_t * dopw,uint64_t * rtime,uint64_t * wtime)561 void hdd_stats(uint64_t *br,uint64_t *bw,uint32_t *opr,uint32_t *opw,uint32_t *dbr,uint32_t *dbw,uint32_t *dopr,uint32_t *dopw,uint64_t *rtime,uint64_t *wtime) {
562 zassert(pthread_mutex_lock(&statslock));
563 *br = stats_bytesr;
564 *bw = stats_bytesw;
565 *opr = stats_opr;
566 *opw = stats_opw;
567 *dbr = stats_databytesr;
568 *dbw = stats_databytesw;
569 *dopr = stats_dataopr;
570 *dopw = stats_dataopw;
571 *rtime = stats_rtime;
572 *wtime = stats_wtime;
573 stats_bytesr = 0;
574 stats_bytesw = 0;
575 stats_opr = 0;
576 stats_opw = 0;
577 stats_databytesr = 0;
578 stats_databytesw = 0;
579 stats_dataopr = 0;
580 stats_dataopw = 0;
581 stats_rtime = 0;
582 stats_wtime = 0;
583 zassert(pthread_mutex_unlock(&statslock));
584 }
585
hdd_op_stats(uint32_t * op_create,uint32_t * op_delete,uint32_t * op_version,uint32_t * op_duplicate,uint32_t * op_truncate,uint32_t * op_duptrunc,uint32_t * op_test)586 void hdd_op_stats(uint32_t *op_create,uint32_t *op_delete,uint32_t *op_version,uint32_t *op_duplicate,uint32_t *op_truncate,uint32_t *op_duptrunc,uint32_t *op_test) {
587 zassert(pthread_mutex_lock(&statslock));
588 *op_create = stats_create;
589 *op_delete = stats_delete;
590 *op_version = stats_version;
591 *op_duplicate = stats_duplicate;
592 *op_truncate = stats_truncate;
593 *op_duptrunc = stats_duptrunc;
594 *op_test = stats_test;
595 stats_create = 0;
596 stats_delete = 0;
597 stats_version = 0;
598 stats_duplicate = 0;
599 stats_truncate = 0;
600 stats_duptrunc = 0;
601 stats_test = 0;
602 zassert(pthread_mutex_unlock(&statslock));
603 }
604
hdd_stats_read(uint32_t size)605 static inline void hdd_stats_read(uint32_t size) {
606 zassert(pthread_mutex_lock(&statslock));
607 stats_opr++;
608 stats_bytesr += size;
609 zassert(pthread_mutex_unlock(&statslock));
610 }
611
hdd_stats_write(uint32_t size)612 static inline void hdd_stats_write(uint32_t size) {
613 zassert(pthread_mutex_lock(&statslock));
614 stats_opw++;
615 stats_bytesw += size;
616 zassert(pthread_mutex_unlock(&statslock));
617 }
618
hdd_stats_dataread(folder * f,uint32_t size,int64_t rtime)619 static inline void hdd_stats_dataread(folder *f,uint32_t size,int64_t rtime) {
620 if (rtime<=0) {
621 return;
622 }
623 zassert(pthread_mutex_lock(&statslock));
624 stats_dataopr++;
625 stats_databytesr += size;
626 stats_rtime += rtime;
627 f->cstat.rops++;
628 f->cstat.rbytes += size;
629 f->cstat.nsecreadsum += rtime;
630 if (rtime>f->cstat.nsecreadmax) {
631 f->cstat.nsecreadmax = rtime;
632 }
633 zassert(pthread_mutex_unlock(&statslock));
634 }
635
hdd_stats_datawrite(folder * f,uint32_t size,int64_t wtime)636 static inline void hdd_stats_datawrite(folder *f,uint32_t size,int64_t wtime) {
637 if (wtime<=0) {
638 return;
639 }
640 zassert(pthread_mutex_lock(&statslock));
641 stats_dataopw++;
642 stats_databytesw += size;
643 stats_wtime += wtime;
644 f->cstat.wops++;
645 f->cstat.wbytes += size;
646 f->cstat.nsecwritesum += wtime;
647 if (wtime>f->cstat.nsecwritemax) {
648 f->cstat.nsecwritemax = wtime;
649 }
650 zassert(pthread_mutex_unlock(&statslock));
651 }
652
hdd_stats_datafsync(folder * f,int64_t fsynctime)653 static inline void hdd_stats_datafsync(folder *f,int64_t fsynctime) {
654 if (fsynctime<=0) {
655 return;
656 }
657 zassert(pthread_mutex_lock(&statslock));
658 stats_wtime += fsynctime;
659 f->cstat.fsyncops++;
660 f->cstat.nsecfsyncsum += fsynctime;
661 if (fsynctime>f->cstat.nsecfsyncmax) {
662 f->cstat.nsecfsyncmax = fsynctime;
663 }
664 zassert(pthread_mutex_unlock(&statslock));
665 }
666
hdd_diskinfo_v1_size()667 uint32_t hdd_diskinfo_v1_size() {
668 folder *f;
669 uint32_t s,sl;
670
671 s = 0;
672 zassert(pthread_mutex_lock(&folderlock));
673 for (f=folderhead ; f ; f=f->next ) {
674 sl = strlen(f->path);
675 if (sl>255) {
676 sl = 255;
677 }
678 s += 34+sl;
679 }
680 return s;
681 }
682
hdd_diskinfo_v1_data(uint8_t * buff)683 void hdd_diskinfo_v1_data(uint8_t *buff) {
684 folder *f;
685 uint32_t sl;
686 uint32_t ei;
687 if (buff) {
688 for (f=folderhead ; f ; f=f->next ) {
689 sl = strlen(f->path);
690 if (sl>255) {
691 put8bit(&buff,255);
692 memcpy(buff,"(...)",5);
693 memcpy(buff+5,f->path+(sl-250),250);
694 buff += 255;
695 } else {
696 put8bit(&buff,sl);
697 if (sl>0) {
698 memcpy(buff,f->path,sl);
699 buff += sl;
700 }
701 }
702 put8bit(&buff,((f->todel)?1:0)+((f->damaged)?2:0)+((f->scanstate==SCST_SCANINPROGRESS)?4:0));
703 ei = (f->lasterrindx+(LASTERRSIZE-1))%LASTERRSIZE;
704 put64bit(&buff,f->lasterrtab[ei].chunkid);
705 put32bit(&buff,f->lasterrtab[ei].timestamp);
706 put64bit(&buff,f->total-f->avail);
707 put64bit(&buff,f->total);
708 put32bit(&buff,f->chunkcount);
709 }
710 }
711 zassert(pthread_mutex_unlock(&folderlock));
712 }
713
hdd_diskinfo_v2_size()714 uint32_t hdd_diskinfo_v2_size() {
715 folder *f;
716 uint32_t s,sl;
717
718 s = 0;
719 zassert(pthread_mutex_lock(&folderlock));
720 for (f=folderhead ; f ; f=f->next ) {
721 sl = strlen(f->path);
722 if (sl>255) {
723 sl = 255;
724 }
725 s += 2+226+sl;
726 }
727 return s;
728 }
729
hdd_diskinfo_v2_data(uint8_t * buff)730 void hdd_diskinfo_v2_data(uint8_t *buff) {
731 folder *f;
732 hddstats s;
733 uint32_t sl;
734 uint32_t ei;
735 uint32_t pos;
736 if (buff) {
737 zassert(pthread_mutex_lock(&statslock));
738 for (f=folderhead ; f ; f=f->next ) {
739 sl = strlen(f->path);
740 if (sl>255) {
741 put16bit(&buff,226+255); // size of this entry
742 put8bit(&buff,255);
743 memcpy(buff,"(...)",5);
744 memcpy(buff+5,f->path+(sl-250),250);
745 buff += 255;
746 } else {
747 put16bit(&buff,226+sl); // size of this entry
748 put8bit(&buff,sl);
749 if (sl>0) {
750 memcpy(buff,f->path,sl);
751 buff += sl;
752 }
753 }
754 put8bit(&buff,((f->todel)?1:0)+((f->damaged)?2:0)+((f->scanstate==SCST_SCANINPROGRESS)?4:0));
755 ei = (f->lasterrindx+(LASTERRSIZE-1))%LASTERRSIZE;
756 put64bit(&buff,f->lasterrtab[ei].chunkid);
757 put32bit(&buff,f->lasterrtab[ei].timestamp);
758 if (f->scanstate==SCST_SCANINPROGRESS) {
759 put64bit(&buff,f->scanprogress);
760 put64bit(&buff,0);
761 } else {
762 put64bit(&buff,f->total-f->avail);
763 put64bit(&buff,f->total);
764 }
765 put32bit(&buff,f->chunkcount);
766 s = f->stats[f->statspos];
767 hdd_stats_binary_pack(&buff,&s); // 64B
768 for (pos=1 ; pos<60 ; pos++) {
769 hdd_stats_add(&s,&(f->stats[(f->statspos+pos)%STATSHISTORY]));
770 }
771 hdd_stats_binary_pack(&buff,&s); // 64B
772 for (pos=60 ; pos<24*60 ; pos++) {
773 hdd_stats_add(&s,&(f->stats[(f->statspos+pos)%STATSHISTORY]));
774 }
775 hdd_stats_binary_pack(&buff,&s); // 64B
776 }
777 zassert(pthread_mutex_unlock(&statslock));
778 }
779 zassert(pthread_mutex_unlock(&folderlock));
780 }
781
782 #define OF_BEFORE_OPEN 0
783 #define OF_AFTER_CLOSE 1
784 #define OF_INIT 2
785 #define OF_INFO 3
786
hdd_open_files_handle(uint8_t mode)787 static inline void hdd_open_files_handle(uint8_t mode) {
788 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
789 static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
790 static uint32_t count = 0;
791 static uint32_t limit = 500;
792 static uint32_t waiting = 0;
793 if (mode==OF_BEFORE_OPEN) { // before open
794 zassert(pthread_mutex_lock(&lock));
795 while (count >= limit) {
796 waiting++;
797 zassert(pthread_cond_wait(&cond,&lock));
798 }
799 count++;
800 zassert(pthread_mutex_unlock(&lock));
801 } else if (mode==OF_AFTER_CLOSE) { // after close
802 zassert(pthread_mutex_lock(&lock));
803 count--;
804 if (waiting>0) {
805 zassert(pthread_cond_signal(&cond));
806 waiting--;
807 }
808 zassert(pthread_mutex_unlock(&lock));
809 } else if (mode==OF_INIT) {
810 struct rlimit rl;
811 getrlimit(RLIMIT_NOFILE,&rl);
812 limit = (rl.rlim_cur * 2) / 3;
813 syslog(LOG_NOTICE,"hdd space manager: setting open chunks limit to: %"PRIu32,limit);
814 } else if (mode==OF_INFO) {
815 uint32_t c;
816 zassert(pthread_mutex_lock(&lock));
817 c = count;
818 zassert(pthread_mutex_unlock(&lock));
819 syslog(LOG_NOTICE,"hdd space manager: open files: %"PRIu32"/%"PRIu32,c,limit);
820 }
821 }
822
hdd_diskinfo_movestats(void)823 void hdd_diskinfo_movestats(void) {
824 folder *f;
825 zassert(pthread_mutex_lock(&folderlock));
826 zassert(pthread_mutex_lock(&statslock));
827 for (f=folderhead ; f ; f=f->next ) {
828 if (f->statspos==0) {
829 f->statspos = STATSHISTORY-1;
830 } else {
831 f->statspos--;
832 }
833 f->stats[f->statspos] = f->cstat;
834 hdd_stats_clear(&(f->cstat));
835 }
836 zassert(pthread_mutex_unlock(&statslock));
837 zassert(pthread_mutex_unlock(&folderlock));
838 }
839
840 // testlock:locked
hdd_remove_chunk_from_test_chain(chunk * c,folder * f)841 static inline void hdd_remove_chunk_from_test_chain(chunk *c,folder *f) {
842 *(c->testprev) = c->testnext;
843 if (c->testnext) {
844 c->testnext->testprev = c->testprev;
845 } else {
846 f->testtail = c->testprev;
847 }
848 c->testnext = NULL;
849 c->testprev = NULL;
850 }
851
852 // testlock:locked
hdd_add_chunk_to_test_chain(chunk * c,folder * f)853 static inline void hdd_add_chunk_to_test_chain(chunk *c,folder *f) {
854 c->testnext = NULL;
855 c->testprev = f->testtail;
856 *(c->testprev) = c;
857 f->testtail = &(c->testnext);
858 }
859
860 // folderlock:locked
hdd_remove_chunk_from_folder(chunk * c,folder * f)861 static inline void hdd_remove_chunk_from_folder(chunk *c,folder *f) {
862 f->chunkcount--;
863 f->chunktab[c->ownerindx] = f->chunktab[f->chunkcount];
864 f->chunktab[c->ownerindx]->ownerindx = c->ownerindx;
865 c->owner = NULL;
866 c->ownerindx = 0;
867 }
868
869 // folderlock:locked
hdd_add_chunk_to_folder(chunk * c,folder * f)870 static inline void hdd_add_chunk_to_folder(chunk *c,folder *f) {
871 if (f->chunkcount==f->chunktabsize) {
872 if (f->chunktabsize==0) {
873 f->chunktabsize=10000;
874 f->chunktab = malloc(sizeof(chunk*)*f->chunktabsize);
875 } else {
876 f->chunktabsize*=3;
877 f->chunktabsize/=2;
878 f->chunktab = realloc(f->chunktab,sizeof(chunk*)*f->chunktabsize);
879 }
880 passert(f->chunktab);
881 }
882 f->chunktab[f->chunkcount] = c;
883 c->owner = f;
884 c->ownerindx = f->chunkcount;
885 f->chunkcount++;
886 }
887
hdd_chunk_remove(chunk * c)888 static inline void hdd_chunk_remove(chunk *c) {
889 chunk **cptr,*cp;
890 uint32_t hashpos = HASHPOS(c->chunkid);
891 cptr = &(hashtab[hashpos]);
892 while ((cp=*cptr)) {
893 if (c==cp) {
894 *cptr = cp->next;
895 if (cp->fd>=0) {
896 close(cp->fd);
897 hdd_open_files_handle(OF_AFTER_CLOSE);
898 }
899 if (cp->crc!=NULL) {
900 #ifdef MMAP_ALLOC
901 munmap((void*)(cp->crc),4096);
902 #else
903 free(cp->crc);
904 #endif
905 }
906 #ifdef PRESERVE_BLOCK
907 if (cp->block!=NULL) {
908 # ifdef MMAP_ALLOC
909 munmap((void*)(cp->block),MFSBLOCKSIZE);
910 # else
911 free(cp->block);
912 # endif
913 }
914 #endif /* PRESERVE_BLOCK */
915 if (cp->filename!=NULL) {
916 free(cp->filename);
917 }
918 free(cp);
919 return;
920 }
921 cptr = &(cp->next);
922 }
923 }
924
hdd_chunk_release(chunk * c)925 static void hdd_chunk_release(chunk *c) {
926 zassert(pthread_mutex_lock(&hashlock));
927 // syslog(LOG_WARNING,"hdd_chunk_release got chunk: %016"PRIX64" (c->state:%u)",c->chunkid,c->state);
928 if (c->state==CH_LOCKED) {
929 c->state = CH_AVAIL;
930 if (c->ccond) {
931 // printf("wake up one thread waiting for AVAIL chunk: %"PRIu64" on ccond:%p\n",c->chunkid,c->ccond);
932 // printbacktrace();
933 zassert(pthread_cond_signal(&(c->ccond->cond)));
934 }
935 } else if (c->state==CH_TOBEDELETED) {
936 if (c->ccond) {
937 c->state = CH_DELETED;
938 // printf("wake up one thread waiting for DELETED chunk: %"PRIu64" on ccond:%p\n",c->chunkid,c->ccond);
939 // printbacktrace();
940 zassert(pthread_cond_signal(&(c->ccond->cond)));
941 } else {
942 hdd_chunk_remove(c);
943 }
944 }
945 zassert(pthread_mutex_unlock(&hashlock));
946 }
947
hdd_chunk_getattr(chunk * c)948 static int hdd_chunk_getattr(chunk *c) {
949 struct stat sb;
950 if (c->fd>=0) {
951 if (fstat(c->fd,&sb)<0) {
952 return -1;
953 }
954 } else {
955 if (stat(c->filename,&sb)<0) {
956 return -1;
957 }
958 }
959 if ((sb.st_mode & S_IFMT) != S_IFREG) {
960 return -1;
961 }
962 if (sb.st_size<CHUNKHDRSIZE || sb.st_size>(CHUNKHDRSIZE+MFSCHUNKSIZE) || ((sb.st_size-CHUNKHDRSIZE)&MFSBLOCKMASK)!=0) {
963 return -1;
964 }
965 c->blocks = (sb.st_size - CHUNKHDRSIZE) / MFSBLOCKSIZE;
966 // c->testtime = (sb.st_atime>sb.st_mtime)?sb.st_atime:sb.st_mtime;
967 c->validattr = 1;
968 return 0;
969 }
970
hdd_chunk_tryfind(uint64_t chunkid)971 static chunk* hdd_chunk_tryfind(uint64_t chunkid) {
972 uint32_t hashpos = HASHPOS(chunkid);
973 chunk *c;
974 zassert(pthread_mutex_lock(&hashlock));
975 for (c=hashtab[hashpos] ; c && c->chunkid!=chunkid ; c=c->next) {}
976 if (c!=NULL) {
977 if (c->state==CH_LOCKED) {
978 c = CHUNKLOCKED;
979 } else if (c->state!=CH_AVAIL) {
980 c = NULL;
981 } else {
982 c->state = CH_LOCKED;
983 }
984 }
985 // if (c!=NULL && c!=CHUNKLOCKED) {
986 // syslog(LOG_WARNING,"hdd_chunk_tryfind returns chunk: %016"PRIX64" (c->state:%u)",c->chunkid,c->state);
987 // }
988 zassert(pthread_mutex_unlock(&hashlock));
989 return c;
990 }
991
992 static void hdd_chunk_delete(chunk *c);
993
hdd_chunk_get(uint64_t chunkid,uint8_t cflag)994 static chunk* hdd_chunk_get(uint64_t chunkid,uint8_t cflag) {
995 uint32_t hashpos = HASHPOS(chunkid);
996 chunk *c;
997 cntcond *cc;
998 zassert(pthread_mutex_lock(&hashlock));
999 for (c=hashtab[hashpos] ; c && c->chunkid!=chunkid ; c=c->next) {}
1000 if (c==NULL) {
1001 if (cflag!=CH_NEW_NONE) {
1002 c = malloc(sizeof(chunk));
1003 passert(c);
1004 c->chunkid = chunkid;
1005 c->version = 0;
1006 c->owner = NULL;
1007 c->filename = NULL;
1008 c->blocks = 0;
1009 c->crcrefcount = 0;
1010 c->opento = 0.0;
1011 c->crcto = 0.0;
1012 c->crcchanged = 0;
1013 c->fsyncneeded = 0;
1014 c->fd = -1;
1015 c->crc = NULL;
1016 c->state = CH_LOCKED;
1017 c->ccond = NULL;
1018 #ifdef PRESERVE_BLOCK
1019 c->blockto = 0.0;
1020 c->block = NULL;
1021 c->blockno = 0xFFFF;
1022 #endif
1023 c->validattr = 0;
1024 c->todel = 0;
1025 c->testnext = NULL;
1026 c->testprev = NULL;
1027 c->next = hashtab[hashpos];
1028 hashtab[hashpos] = c;
1029 }
1030 // syslog(LOG_WARNING,"hdd_chunk_get returns chunk: %016"PRIX64" (c->state:%u)",c->chunkid,c->state);
1031 zassert(pthread_mutex_unlock(&hashlock));
1032 return c;
1033 }
1034 if (cflag==CH_NEW_EXCLUSIVE) {
1035 if (c->state==CH_AVAIL || c->state==CH_LOCKED) {
1036 zassert(pthread_mutex_unlock(&hashlock));
1037 return NULL;
1038 }
1039 }
1040 for (;;) {
1041 switch (c->state) {
1042 case CH_AVAIL:
1043 c->state = CH_LOCKED;
1044 // syslog(LOG_WARNING,"hdd_chunk_get returns chunk: %016"PRIX64" (c->state:%u)",c->chunkid,c->state);
1045 zassert(pthread_mutex_unlock(&hashlock));
1046 if (c->validattr==0) {
1047 if (hdd_chunk_getattr(c)) {
1048 hdd_report_damaged_chunk(c->chunkid);
1049 unlink(c->filename);
1050 hdd_chunk_delete(c);
1051 return NULL;
1052 }
1053 }
1054 return c;
1055 case CH_DELETED:
1056 if (cflag!=CH_NEW_NONE) {
1057 if (c->fd>=0) {
1058 close(c->fd);
1059 hdd_open_files_handle(OF_AFTER_CLOSE);
1060 }
1061 if (c->crc!=NULL) {
1062 #ifdef MMAP_ALLOC
1063 munmap((void*)(c->crc),4096);
1064 #else
1065 free(c->crc);
1066 #endif
1067 }
1068 #ifdef PRESERVE_BLOCK
1069 if (c->block!=NULL) {
1070 # ifdef MMAP_ALLOC
1071 munmap((void*)(c->crc),MFSBLOCKSIZE);
1072 # else
1073 free(c->block);
1074 # endif
1075 }
1076 #endif /* PRESERVE_BLOCK */
1077 if (c->filename!=NULL) {
1078 free(c->filename);
1079 }
1080 c->version = 0;
1081 c->owner = NULL;
1082 c->filename = NULL;
1083 c->blocks = 0;
1084 c->crcrefcount = 0;
1085 c->opento = 0.0;
1086 c->crcto = 0.0;
1087 c->crcchanged = 0;
1088 c->fsyncneeded = 0;
1089 c->fd = -1;
1090 c->crc = NULL;
1091 #ifdef PRESERVE_BLOCK
1092 c->blockto = 0.0;
1093 c->block = NULL;
1094 c->blockno = 0xFFFF;
1095 #endif /* PRESERVE_BLOCK */
1096 c->validattr = 0;
1097 c->todel = 0;
1098 c->state = CH_LOCKED;
1099 // syslog(LOG_WARNING,"hdd_chunk_get returns chunk: %016"PRIX64" (c->state:%u)",c->chunkid,c->state);
1100 zassert(pthread_mutex_unlock(&hashlock));
1101 return c;
1102 }
1103 if (c->ccond==NULL) { // no more waiting threads - remove
1104 hdd_chunk_remove(c);
1105 } else { // there are waiting threads - wake them up
1106 // printf("wake up one thread waiting for DELETED chunk: %"PRIu64" on ccond:%p\n",c->chunkid,c->ccond);
1107 // printbacktrace();
1108 zassert(pthread_cond_signal(&(c->ccond->cond)));
1109 }
1110 zassert(pthread_mutex_unlock(&hashlock));
1111 return NULL;
1112 case CH_TOBEDELETED:
1113 case CH_LOCKED:
1114 if (c->ccond==NULL) {
1115 for (cc=cclist ; cc && cc->wcnt ; cc=cc->next) {}
1116 if (cc==NULL) {
1117 cc = malloc(sizeof(cntcond));
1118 passert(cc);
1119 zassert(pthread_cond_init(&(cc->cond),NULL));
1120 cc->wcnt = 0;
1121 cc->next = cclist;
1122 cclist = cc;
1123 }
1124 c->ccond = cc;
1125 }
1126 c->ccond->wcnt++;
1127 // printf("wait for %s chunk: %"PRIu64" on ccond:%p\n",(c->state==CH_LOCKED)?"LOCKED":"TOBEDELETED",c->chunkid,c->ccond);
1128 // printbacktrace();
1129 zassert(pthread_cond_wait(&(c->ccond->cond),&hashlock));
1130 // printf("%s chunk: %"PRIu64" woke up on ccond:%p\n",(c->state==CH_LOCKED)?"LOCKED":(c->state==CH_DELETED)?"DELETED":(c->state==CH_AVAIL)?"AVAIL":"TOBEDELETED",c->chunkid,c->ccond);
1131 c->ccond->wcnt--;
1132 if (c->ccond->wcnt==0) {
1133 c->ccond = NULL;
1134 }
1135 }
1136 }
1137 }
1138
hdd_chunk_delete(chunk * c)1139 static void hdd_chunk_delete(chunk *c) {
1140 folder *f;
1141 zassert(pthread_mutex_lock(&folderlock));
1142 f = c->owner;
1143 hdd_remove_chunk_from_folder(c,f);
1144 zassert(pthread_mutex_unlock(&folderlock));
1145 zassert(pthread_mutex_lock(&testlock));
1146 hdd_remove_chunk_from_test_chain(c,f);
1147 zassert(pthread_mutex_unlock(&testlock));
1148 zassert(pthread_mutex_lock(&hashlock));
1149 if (c->ccond) {
1150 c->state = CH_DELETED;
1151 // printf("wake up one thread waiting for DELETED chunk: %"PRIu64" ccond:%p\n",c->chunkid,c->ccond);
1152 // printbacktrace();
1153 zassert(pthread_cond_signal(&(c->ccond->cond)));
1154 } else {
1155 hdd_chunk_remove(c);
1156 }
1157 zassert(pthread_mutex_unlock(&hashlock));
1158 }
1159
hdd_chunk_create(folder * f,uint64_t chunkid,uint32_t version)1160 static chunk* hdd_chunk_create(folder *f,uint64_t chunkid,uint32_t version) {
1161 uint32_t leng;
1162 chunk *c;
1163
1164 c = hdd_chunk_get(chunkid,CH_NEW_EXCLUSIVE);
1165 if (c==NULL) {
1166 return NULL;
1167 }
1168 c->version = version;
1169 leng = strlen(f->path);
1170 c->filename = malloc(leng+39);
1171 passert(c->filename);
1172 memcpy(c->filename,f->path,leng);
1173 // memcpy(c->filename+leng,"__/chunk_XXXXXXXXXXXXXXXX_XXXXXXXX.mfs");
1174 // c->filename[leng]="0123456789ABCDEF"[(chunkid>>4)&15];
1175 // c->filename[leng+1]="0123456789ABCDEF"[chunkid&15];
1176 // sprintf(c->filename+leng,"%c%c/chunk_%016"PRIX64"_%08"PRIX32".mfs","0123456789ABCDEF"[(chunkid>>4)&15],"0123456789ABCDEF"[chunkid&15],chunkid,version);
1177 sprintf(c->filename+leng,"%02X/chunk_%016"PRIX64"_%08"PRIX32".mfs",(unsigned int)(chunkid&255),chunkid,version);
1178 c->blocks = 0;
1179 c->validattr = 1;
1180 f->needrefresh = 1;
1181 hdd_add_chunk_to_folder(c,f);
1182 zassert(pthread_mutex_lock(&testlock));
1183 hdd_add_chunk_to_test_chain(c,f);
1184 zassert(pthread_mutex_unlock(&testlock));
1185 return c;
1186 }
1187
1188 #define hdd_chunk_find(chunkid) hdd_chunk_get(chunkid,CH_NEW_NONE)
1189
hdd_chunk_testmove(chunk * c)1190 static void hdd_chunk_testmove(chunk *c) {
1191 zassert(pthread_mutex_lock(&testlock));
1192 if (c->testnext) {
1193 *(c->testprev) = c->testnext;
1194 c->testnext->testprev = c->testprev;
1195 c->testnext = NULL;
1196 c->testprev = c->owner->testtail;
1197 *(c->testprev) = c;
1198 c->owner->testtail = &(c->testnext);
1199 }
1200 // c->testtime = time(NULL);
1201 zassert(pthread_mutex_unlock(&testlock));
1202 }
1203
1204 // no locks - locked by caller
hdd_refresh_usage(folder * f)1205 static inline void hdd_refresh_usage(folder *f) {
1206 if (f->sizelimit) {
1207 uint32_t knownblocks;
1208 uint32_t knowncount;
1209 uint64_t calcsize;
1210 chunk *c;
1211 knownblocks = 0;
1212 knowncount = 0;
1213 zassert(pthread_mutex_lock(&hashlock));
1214 zassert(pthread_mutex_lock(&testlock));
1215 for (c=f->testhead ; c ; c=c->testnext) {
1216 if (c->state==CH_AVAIL && c->validattr==1) {
1217 knowncount++;
1218 knownblocks+=c->blocks;
1219 }
1220 }
1221 zassert(pthread_mutex_unlock(&testlock));
1222 zassert(pthread_mutex_unlock(&hashlock));
1223 if (knowncount>0) {
1224 calcsize = knownblocks;
1225 calcsize *= f->chunkcount;
1226 calcsize /= knowncount;
1227 calcsize *= 64;
1228 calcsize += f->chunkcount*5;
1229 calcsize *= 1024;
1230 } else { // unknown result;
1231 calcsize = 0;
1232 }
1233 f->total = f->sizelimit;
1234 f->avail = (calcsize>f->sizelimit)?0:f->sizelimit-calcsize;
1235 } else {
1236 struct statvfs fsinfo;
1237
1238 if (statvfs(f->path,&fsinfo)<0) {
1239 f->avail = 0ULL;
1240 f->total = 0ULL;
1241 }
1242 f->avail = (uint64_t)(fsinfo.f_frsize)*(uint64_t)(fsinfo.f_bavail);
1243 f->total = (uint64_t)(fsinfo.f_frsize)*(uint64_t)(fsinfo.f_blocks-(fsinfo.f_bfree-fsinfo.f_bavail));
1244 // f->total = (uint64_t)(fsinfo.f_frsize)*(uint64_t)(fsinfo.f_blocks);
1245 if (f->avail < f->leavefree) {
1246 f->avail = 0ULL;
1247 } else {
1248 f->avail -= f->leavefree;
1249 }
1250 }
1251 }
1252
hdd_getfolder()1253 static inline folder* hdd_getfolder() {
1254 folder *f,*bf;
1255 double minerr,err,expdist;
1256 // double usage;
1257 uint64_t totalsum,good_totalsum;
1258 uint32_t folder_cnt,good_cnt;
1259 uint8_t onlygood;
1260 uint64_t usectime;
1261
1262 usectime = monotonic_useconds();
1263
1264 totalsum = 0;
1265 good_totalsum = 0;
1266 folder_cnt = 0;
1267 good_cnt = 0;
1268 onlygood = 0;
1269 for (f=folderhead ; f ; f=f->next) {
1270 if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>0 && f->avail>0 && f->balancemode!=REBALANCE_FORCE_SRC) {
1271 if (usectime < f->rebalance_last_usec) { // wall clock move forward protection
1272 f->rebalance_last_usec = usectime;
1273 }
1274 // usage = f->total-f->avail;
1275 // usage /= f->total;
1276 if (f->rebalance_last_usec + REBALANCE_GRACE_PERIOD < usectime) {
1277 good_cnt++;
1278 good_totalsum += f->total;
1279 }
1280 folder_cnt++;
1281 totalsum += f->total;
1282 }
1283 }
1284 // syslog(LOG_NOTICE,"good_cnt: %"PRIu32" ; folder_cnt: %"PRIu32" ; good_totalsum:%"PRIu64" ; totalsum:%"PRIu64,good_cnt,folder_cnt,good_totalsum,totalsum);
1285 if (good_cnt * 3 >= folder_cnt * 2) {
1286 onlygood = 1;
1287 totalsum = good_totalsum;
1288 }
1289 bf = NULL;
1290 minerr = 0.0; // make some old compilers happy
1291 for (f=folderhead ; f ; f=f->next) {
1292 if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>0 && f->avail>0 && f->balancemode!=REBALANCE_FORCE_SRC) {
1293 // usage = f->total-f->avail;
1294 // usage /= f->total;
1295 if (onlygood==0 || (f->rebalance_last_usec + REBALANCE_GRACE_PERIOD < usectime)) {
1296 f->write_dist++;
1297 if (f->write_first) {
1298 err = 1.0;
1299 } else {
1300 expdist = totalsum;
1301 expdist /= f->total;
1302 err = (expdist + f->write_corr) / f->write_dist;
1303 }
1304 if (bf==NULL || err<minerr) {
1305 minerr = err;
1306 bf = f;
1307 }
1308 }
1309 }
1310 }
1311 if (bf) {
1312 // syslog(LOG_NOTICE,"chosen: %s",bf->path);
1313 if (bf->write_first) {
1314 bf->write_first = 0;
1315 } else {
1316 expdist = totalsum;
1317 expdist /= bf->total;
1318 bf->write_corr += expdist - bf->write_dist;
1319 }
1320 bf->write_dist = 0;
1321 }
1322 return bf;
1323 }
1324 /*
1325 static inline folder* hdd_getfolder() {
1326 folder *f,*bf;
1327 double maxcarry;
1328 double minavail,maxavail;
1329 double s,d;
1330 double pavail;
1331 int ok;
1332 // uint64_t minavail;
1333
1334 minavail = 0.0;
1335 maxavail = 0.0;
1336 maxcarry = 1.0;
1337 bf = NULL;
1338 ok = 0;
1339 for (f=folderhead ; f ; f=f->next) {
1340 if (f->damaged || f->todel || f->total==0 || f->avail==0 || f->scanstate!=SCST_WORKING) {
1341 continue;
1342 }
1343 if (f->carry >= maxcarry) {
1344 maxcarry = f->carry;
1345 bf = f;
1346 }
1347 pavail = (double)(f->avail)/(double)(f->total);
1348 if (ok==0 || minavail>pavail) {
1349 minavail = pavail;
1350 ok = 1;
1351 }
1352 if (pavail>maxavail) {
1353 maxavail = pavail;
1354 }
1355 }
1356 if (bf) {
1357 bf->carry -= 1.0;
1358 return bf;
1359 }
1360 if (maxavail==0.0) { // no space
1361 return NULL;
1362 }
1363 if (maxavail<0.01) {
1364 s = 0.0;
1365 } else {
1366 s = minavail*0.8;
1367 if (s<0.01) {
1368 s = 0.01;
1369 }
1370 }
1371 d = maxavail-s;
1372 maxcarry = 1.0;
1373 for (f=folderhead ; f ; f=f->next) {
1374 if (f->damaged || f->todel || f->total==0 || f->avail==0 || f->scanstate!=SCST_WORKING) {
1375 continue;
1376 }
1377 pavail = (double)(f->avail)/(double)(f->total);
1378 if (pavail>s) {
1379 f->carry += ((pavail-s)/d);
1380 }
1381 if (f->carry >= maxcarry) {
1382 maxcarry = f->carry;
1383 bf = f;
1384 }
1385 }
1386 if (bf) { // should be always true
1387 bf->carry -= 1.0;
1388 }
1389 return bf;
1390 }
1391 */
hdd_senddata(folder * f,int rmflag)1392 uint8_t hdd_senddata(folder *f,int rmflag) {
1393 uint32_t i;
1394 uint8_t todel;
1395 uint8_t canberemoved;
1396 chunk **cptr,*c;
1397
1398 todel = f->todel;
1399 canberemoved = 1;
1400 zassert(pthread_mutex_lock(&hashlock));
1401 zassert(pthread_mutex_lock(&testlock));
1402 for (i=0 ; i<HASHSIZE ; i++) {
1403 cptr = &(hashtab[i]);
1404 while ((c=*cptr)) {
1405 if (c->owner==f) {
1406 c->todel = todel;
1407 if (rmflag) {
1408 hdd_report_lost_chunk(c->chunkid);
1409 if (c->state==CH_AVAIL) {
1410 *cptr = c->next;
1411 if (c->fd>=0) {
1412 close(c->fd);
1413 hdd_open_files_handle(OF_AFTER_CLOSE);
1414 }
1415 if (c->crc!=NULL) {
1416 #ifdef MMAP_ALLOC
1417 munmap((void*)(c->crc),4096);
1418 #else
1419 free(c->crc);
1420 #endif
1421 }
1422 #ifdef PRESERVE_BLOCK
1423 if (c->block!=NULL) {
1424 # ifdef MMAP_ALLOC
1425 munmap((void*)(c->block),MFSBLOCKSIZE);
1426 # else
1427 free(c->block);
1428 # endif
1429 }
1430 #endif /* PRESERVE_BLOCK */
1431 if (c->filename) {
1432 free(c->filename);
1433 }
1434 hdd_remove_chunk_from_test_chain(c,c->owner);
1435 free(c);
1436 } else {
1437 canberemoved = 0;
1438 cptr = &(c->next);
1439 }
1440 } else {
1441 hdd_report_new_chunk(c->chunkid,c->version|((c->todel)?0x80000000:0));
1442 cptr = &(c->next);
1443 }
1444 } else {
1445 cptr = &(c->next);
1446 }
1447 }
1448 }
1449 zassert(pthread_mutex_unlock(&testlock));
1450 zassert(pthread_mutex_unlock(&hashlock));
1451 return canberemoved;
1452 }
1453
1454 void* hdd_folder_scan(void *arg);
1455
hdd_check_folders(void)1456 void hdd_check_folders(void) {
1457 folder *f,**fptr;
1458 uint32_t i;
1459 double monotonic_time;
1460 uint32_t err;
1461 uint8_t enoent;
1462 int changed;
1463
1464 monotonic_time = monotonic_seconds();
1465
1466 changed = 0;
1467 // syslog(LOG_NOTICE,"check folders ...");
1468
1469 zassert(pthread_mutex_lock(&folderlock));
1470 if (folderactions==0) {
1471 zassert(pthread_mutex_unlock(&folderlock));
1472 // syslog(LOG_NOTICE,"check folders: disabled");
1473 return;
1474 }
1475 // for (f=folderhead ; f ; f=f->next) {
1476 // syslog(LOG_NOTICE,"folder: %s, toremove:%u, damaged:%u, todel:%u, scanstate:%u",f->path,f->toremove,f->damaged,f->todel,f->scanstate);
1477 // }
1478 fptr = &folderhead;
1479 while ((f=*fptr)) {
1480 if (f->toremove && f->rebalance_in_progress==0) {
1481 switch (f->scanstate) {
1482 case SCST_SCANINPROGRESS:
1483 f->scanstate = SCST_SCANTERMINATE;
1484 break;
1485 case SCST_SCANFINISHED:
1486 zassert(pthread_join(f->scanthread,NULL));
1487 // no break - it's ok !!!
1488 case SCST_SENDNEEDED:
1489 case SCST_SCANNEEDED:
1490 f->scanstate = SCST_WORKING;
1491 // no break - it's ok !!!
1492 case SCST_WORKING:
1493 if (f->toremove==2) {
1494 f->toremove = 1;
1495 }
1496 if (hdd_senddata(f,1)) {
1497 f->toremove = 0;
1498 }
1499 changed = 1;
1500 break;
1501 }
1502 if (f->toremove==0) { // 0 here means 'removed', so delete it from data structures
1503 if (f->damaged) {
1504 f->chunkcount = 0;
1505 f->chunktabsize = 0;
1506 if (f->chunktab) {
1507 free(f->chunktab);
1508 }
1509 f->chunktab = NULL;
1510 } else {
1511 *fptr = f->next;
1512 syslog(LOG_NOTICE,"folder %s successfully removed",f->path);
1513 if (f->lfd>=0) {
1514 close(f->lfd);
1515 }
1516 if (f->chunktab) {
1517 free(f->chunktab);
1518 }
1519 free(f->path);
1520 free(f);
1521 testerreset = 1;
1522 }
1523 } else {
1524 fptr = &(f->next);
1525 }
1526 } else {
1527 fptr = &(f->next);
1528 }
1529 }
1530 for (f=folderhead ; f ; f=f->next) {
1531 if (f->damaged || f->toremove || (f->rebalance_in_progress==1 && f->scanstate!=SCST_WORKING)) {
1532 if (f->damaged && f->toremove==0 && f->scanstate==SCST_WORKING && f->lastrefresh+60.0<monotonic_time) {
1533 hdd_refresh_usage(f);
1534 f->lastrefresh = monotonic_time;
1535 changed = 1;
1536 }
1537 continue;
1538 }
1539 switch (f->scanstate) {
1540 case SCST_SCANNEEDED:
1541 // wait_for_scan = 0;
1542 f->scanstate = SCST_SCANINPROGRESS;
1543 zassert(main_minthread_create(&(f->scanthread),0,hdd_folder_scan,f));
1544 break;
1545 case SCST_SCANFINISHED:
1546 zassert(pthread_join(f->scanthread,NULL));
1547 f->scanstate = SCST_WORKING;
1548 hdd_refresh_usage(f);
1549 f->needrefresh = 0;
1550 f->lastrefresh = monotonic_time;
1551 changed = 1;
1552 break;
1553 case SCST_SENDNEEDED:
1554 hdd_senddata(f,0);
1555 f->scanstate = SCST_WORKING;
1556 hdd_refresh_usage(f);
1557 f->needrefresh = 0;
1558 f->lastrefresh = monotonic_time;
1559 changed = 1;
1560 break;
1561 case SCST_WORKING:
1562 err = 0;
1563 enoent = 0;
1564 for (i=0 ; i<LASTERRSIZE; i++) {
1565 if (f->lasterrtab[i].monotonic_time+HDDErrorTime>=monotonic_time && (f->lasterrtab[i].errornumber==EIO || f->lasterrtab[i].errornumber==EROFS || f->lasterrtab[i].errornumber==ENOENT)) {
1566 err++;
1567 if (f->lasterrtab[i].errornumber==ENOENT) {
1568 enoent = 1;
1569 }
1570 }
1571 }
1572 if (err>HDDErrorCount && f->todel<2) {
1573 syslog(LOG_WARNING,"%"PRIu32" errors occurred in %"PRIu32" seconds on folder: %s",err,HDDErrorTime,f->path);
1574 f->toremove = 2;
1575 f->damaged = 1;
1576 changed = 1;
1577 } else if (enoent && err>HDDErrorCount && f->todel>=2) {
1578 syslog(LOG_WARNING,"%"PRIu32" errors occurred in %"PRIu32" seconds on folder: %s",err,HDDErrorTime,f->path);
1579 f->damaged = 1;
1580 } else if (f->needrefresh || f->lastrefresh+60.0<monotonic_time) {
1581 hdd_refresh_usage(f);
1582 f->needrefresh = 0;
1583 f->lastrefresh = monotonic_time;
1584 changed = 1;
1585 }
1586 }
1587 }
1588 zassert(pthread_mutex_unlock(&folderlock));
1589 if (changed) {
1590 zassert(pthread_mutex_lock(&dclock));
1591 hddspacechanged = 1;
1592 zassert(pthread_mutex_unlock(&dclock));
1593 }
1594 }
1595
hdd_error_occured(chunk * c)1596 static inline void hdd_error_occured(chunk *c) {
1597 uint32_t i;
1598 folder *f;
1599 struct timeval tv;
1600 int errmem = errno;
1601
1602 zassert(pthread_mutex_lock(&folderlock));
1603 gettimeofday(&tv,NULL);
1604 f = c->owner;
1605 i = f->lasterrindx;
1606 f->lasterrtab[i].chunkid = c->chunkid;
1607 f->lasterrtab[i].errornumber = errmem;
1608 f->lasterrtab[i].timestamp = tv.tv_sec;
1609 f->lasterrtab[i].monotonic_time = monotonic_seconds();
1610 i = (i+1)%LASTERRSIZE;
1611 f->lasterrindx = i;
1612 zassert(pthread_mutex_unlock(&folderlock));
1613
1614 zassert(pthread_mutex_lock(&dclock));
1615 errorcounter++;
1616 zassert(pthread_mutex_unlock(&dclock));
1617
1618 errno = errmem;
1619 }
1620
1621
1622 /* interface */
1623
1624 #define CHUNKS_CUT_COUNT 10000
1625 static uint32_t hdd_get_chunks_pos = 0;
1626 static pthread_cond_t hdd_get_chunks_cond = PTHREAD_COND_INITIALIZER;
1627 static uint8_t hdd_get_chunks_waiting = 0;
1628 static uint8_t hdd_get_chunks_partialmode = 0;
1629
hdd_get_chunks_begin(uint8_t partialmode)1630 void hdd_get_chunks_begin(uint8_t partialmode) {
1631 zassert(pthread_mutex_lock(&hashlock));
1632 hdd_get_chunks_pos = 0;
1633 while (hdd_get_chunks_partialmode) {
1634 hdd_get_chunks_waiting++;
1635 zassert(pthread_cond_wait(&hdd_get_chunks_cond,&hashlock));
1636 }
1637 hdd_get_chunks_partialmode = partialmode;
1638 if (partialmode) {
1639 zassert(pthread_mutex_unlock(&hashlock));
1640 }
1641 }
1642
hdd_get_chunks_end()1643 void hdd_get_chunks_end() {
1644 if (hdd_get_chunks_partialmode) {
1645 zassert(pthread_mutex_lock(&hashlock));
1646 hdd_get_chunks_partialmode = 0;
1647 if (hdd_get_chunks_waiting) {
1648 zassert(pthread_cond_signal(&hdd_get_chunks_cond));
1649 hdd_get_chunks_waiting--;
1650 }
1651 }
1652 zassert(pthread_mutex_unlock(&hashlock));
1653 }
1654
hdd_get_chunks_next_list_count()1655 uint32_t hdd_get_chunks_next_list_count() {
1656 uint32_t res = 0;
1657 uint32_t i = 0;
1658 chunk *c;
1659 if (hdd_get_chunks_partialmode) {
1660 zassert(pthread_mutex_lock(&hashlock));
1661 }
1662 while (res<CHUNKS_CUT_COUNT && hdd_get_chunks_pos+i<HASHSIZE) {
1663 for (c=hashtab[hdd_get_chunks_pos+i] ; c ; c=c->next) {
1664 res++;
1665 }
1666 i++;
1667 }
1668 if (res==0 && hdd_get_chunks_partialmode) {
1669 zassert(pthread_mutex_unlock(&hashlock));
1670 }
1671 return res;
1672 }
1673
hdd_get_chunks_next_list_data(uint8_t * buff)1674 void hdd_get_chunks_next_list_data(uint8_t *buff) {
1675 uint32_t res = 0;
1676 uint32_t v;
1677 chunk *c;
1678 while (res<CHUNKS_CUT_COUNT && hdd_get_chunks_pos<HASHSIZE) {
1679 for (c=hashtab[hdd_get_chunks_pos] ; c ; c=c->next) {
1680 put64bit(&buff,c->chunkid);
1681 v = c->version;
1682 if (c->todel) {
1683 v |= 0x80000000;
1684 }
1685 put32bit(&buff,v);
1686 res++;
1687 }
1688 hdd_get_chunks_pos++;
1689 }
1690 if (hdd_get_chunks_partialmode) {
1691 zassert(pthread_mutex_unlock(&hashlock));
1692 }
1693 }
1694
1695 /*
1696 // for old register packets - deprecated
1697 uint32_t hdd_get_chunks_count() {
1698 uint32_t res = 0;
1699 uint32_t i;
1700 chunk *c;
1701 zassert(pthread_mutex_lock(&hashlock));
1702 for (i=0 ; i<HASHSIZE ; i++) {
1703 for (c=hashtab[i] ; c ; c=c->next) {
1704 res++;
1705 }
1706 }
1707 return res;
1708 }
1709
1710 void hdd_get_chunks_data(uint8_t *buff) {
1711 uint32_t i,v;
1712 chunk *c;
1713 if (buff) {
1714 for (i=0 ; i<HASHSIZE ; i++) {
1715 for (c=hashtab[i] ; c ; c=c->next) {
1716 put64bit(&buff,c->chunkid);
1717 v = c->version;
1718 if (c->owner->todel) {
1719 v |= 0x80000000;
1720 }
1721 put32bit(&buff,v);
1722 }
1723 }
1724 }
1725 }
1726 */
1727
1728 /*
1729 uint32_t get_changedchunkscount() {
1730 uint32_t res = 0;
1731 folder *f;
1732 chunk *c;
1733 if (somethingchanged==0) {
1734 return 0;
1735 }
1736 for (f=folderhead ; f ; f=f->next) {
1737 for (c=f->chunkhead ; c ; c=c->next) {
1738 if (c->lengthchanged) {
1739 res++;
1740 }
1741 }
1742 }
1743 return res;
1744 }
1745
1746 void fill_changedchunksinfo(uint8_t *buff) {
1747 folder *f;
1748 chunk *c;
1749 for (f=folderhead ; f ; f=f->next) {
1750 for (c=f->chunkhead ; c ; c=c->next) {
1751 if (c->lengthchanged) {
1752 put64bit(&buff,c->chunkid);
1753 put32bit(&buff,c->version);
1754 c->lengthchanged = 0;
1755 }
1756 }
1757 }
1758 somethingchanged = 0;
1759 }
1760 */
1761
hdd_get_space(uint64_t * usedspace,uint64_t * totalspace,uint32_t * chunkcount,uint64_t * tdusedspace,uint64_t * tdtotalspace,uint32_t * tdchunkcount)1762 void hdd_get_space(uint64_t *usedspace,uint64_t *totalspace,uint32_t *chunkcount,uint64_t *tdusedspace,uint64_t *tdtotalspace,uint32_t *tdchunkcount) {
1763 folder *f;
1764 uint64_t avail,total;
1765 uint64_t tdavail,tdtotal;
1766 uint32_t chunks,tdchunks;
1767 zassert(pthread_mutex_lock(&folderlock));
1768 avail = total = tdavail = tdtotal = 0ULL;
1769 chunks = tdchunks = 0;
1770 for (f=folderhead ; f ; f=f->next) {
1771 if (f->damaged || f->toremove) {
1772 continue;
1773 }
1774 if (f->todel==0) {
1775 if (f->scanstate==SCST_WORKING) {
1776 avail += f->avail;
1777 total += f->total;
1778 }
1779 chunks += f->chunkcount;
1780 } else {
1781 if (f->scanstate==SCST_WORKING) {
1782 tdavail += f->avail;
1783 tdtotal += f->total;
1784 }
1785 tdchunks += f->chunkcount;
1786 }
1787 }
1788 zassert(pthread_mutex_unlock(&folderlock));
1789 *usedspace = total-avail;
1790 *totalspace = total;
1791 *chunkcount = chunks;
1792 *tdusedspace = tdtotal-tdavail;
1793 *tdtotalspace = tdtotal;
1794 *tdchunkcount = tdchunks;
1795 }
1796
chunk_emptycrc(chunk * c)1797 static inline void chunk_emptycrc(chunk *c) {
1798 #ifdef MMAP_ALLOC
1799 c->crc = (uint8_t*)mmap(NULL,4096,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
1800 #else
1801 c->crc = (uint8_t*)malloc(4096);
1802 #endif
1803 passert(c->crc);
1804 memset(c->crc,0,4096); // make valgrind happy
1805 }
1806
chunk_readcrc(chunk * c)1807 static inline int chunk_readcrc(chunk *c) {
1808 int ret;
1809 uint8_t hdr[20];
1810 const uint8_t *ptr;
1811 uint64_t chunkid;
1812 uint32_t version;
1813 #ifdef USE_PIO
1814 if (pread(c->fd,hdr,20,0)!=20) {
1815 int errmem = errno;
1816 mfs_arg_errlog_silent(LOG_WARNING,"chunk_readcrc: file:%s - read error",c->filename);
1817 errno = errmem;
1818 return ERROR_IO;
1819 }
1820 #else /* USE_PIO */
1821 lseek(c->fd,0,SEEK_SET);
1822 if (read(c->fd,hdr,20)!=20) {
1823 int errmem = errno;
1824 mfs_arg_errlog_silent(LOG_WARNING,"chunk_readcrc: file:%s - read error",c->filename);
1825 errno = errmem;
1826 return ERROR_IO;
1827 }
1828 #endif /* USE_PIO */
1829 if (memcmp(hdr,MFSSIGNATURE "C 1.0",8)!=0) {
1830 syslog(LOG_WARNING,"chunk_readcrc: file:%s - wrong header",c->filename);
1831 errno = 0;
1832 return ERROR_IO;
1833 }
1834 ptr = hdr+8;
1835 chunkid = get64bit(&ptr);
1836 version = get32bit(&ptr);
1837 if (c->chunkid!=chunkid || c->version!=version) {
1838 syslog(LOG_WARNING,"chunk_readcrc: file:%s - wrong id/version in header (%016"PRIX64"_%08"PRIX32")",c->filename,chunkid,version);
1839 errno = 0;
1840 return ERROR_IO;
1841 }
1842 #ifdef MMAP_ALLOC
1843 c->crc = (uint8_t*)mmap(NULL,4096,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
1844 #else
1845 c->crc = (uint8_t*)malloc(4096);
1846 #endif
1847 passert(c->crc);
1848 #ifdef USE_PIO
1849 ret = pread(c->fd,c->crc,4096,CHUNKHDRCRC);
1850 #else /* USE_PIO */
1851 lseek(c->fd,CHUNKHDRCRC,SEEK_SET);
1852 ret = read(c->fd,c->crc,4096);
1853 #endif /* USE_PIO */
1854 if (ret!=4096) {
1855 int errmem = errno;
1856 mfs_arg_errlog_silent(LOG_WARNING,"chunk_readcrc: file:%s - read error",c->filename);
1857 #ifdef MMAP_ALLOC
1858 munmap((void*)(c->crc),4096);
1859 #else
1860 free(c->crc);
1861 #endif
1862 c->crc = NULL;
1863 errno = errmem;
1864 return ERROR_IO;
1865 }
1866 hdd_stats_read(4096);
1867 errno = 0;
1868 return STATUS_OK;
1869 }
1870
chunk_freecrc(chunk * c)1871 static inline void chunk_freecrc(chunk *c) {
1872 #ifdef MMAP_ALLOC
1873 munmap((void*)(c->crc),4096);
1874 #else
1875 free(c->crc);
1876 #endif
1877 c->crc = NULL;
1878 }
1879
chunk_writecrc(chunk * c)1880 static inline int chunk_writecrc(chunk *c) {
1881 int ret;
1882 zassert(pthread_mutex_lock(&folderlock));
1883 c->owner->needrefresh = 1;
1884 zassert(pthread_mutex_unlock(&folderlock));
1885 #ifdef USE_PIO
1886 ret = pwrite(c->fd,c->crc,4096,CHUNKHDRCRC);
1887 #else /* USE_PIO */
1888 lseek(c->fd,CHUNKHDRCRC,SEEK_SET);
1889 ret = write(c->fd,c->crc,4096);
1890 #endif /* USE_PIO */
1891 if (ret!=4096) {
1892 int errmem = errno;
1893 mfs_arg_errlog_silent(LOG_WARNING,"chunk_writecrc: file:%s - write error",c->filename);
1894 errno = errmem;
1895 return ERROR_IO;
1896 }
1897 hdd_stats_write(4096);
1898 return STATUS_OK;
1899 }
1900
hdd_test_show_chunks(void)1901 void hdd_test_show_chunks(void) {
1902 uint32_t hashpos;
1903 chunk *c;
1904 zassert(pthread_mutex_lock(&hashlock));
1905 for (hashpos=0 ; hashpos<HASHSIZE ; hashpos++) {
1906 for (c=hashtab[hashpos] ; c ; c=c->next) {
1907 printf("chunk id:%"PRIu64" version:%"PRIu32" state:%u\n",c->chunkid,c->version,c->state);
1908 }
1909 }
1910 zassert(pthread_mutex_unlock(&hashlock));
1911 }
1912
1913 #if 0
1914 void hdd_test_show_openedchunks(void) {
1915 dopchunk *cc,*tcc;
1916 uint32_t dhashpos;
1917 chunk *c;
1918 double now;
1919
1920 printf("lock doplock\n");
1921 if (pthread_mutex_lock(&doplock)<0) {
1922 printf("lock error: %u\n",errno);
1923 }
1924 printf("lock ndoplock\n");
1925 if (pthread_mutex_lock(&ndoplock)<0) {
1926 printf("lock error: %u\n",errno);
1927 }
1928 /* append new chunks */
1929 cc = newdopchunks;
1930 while (cc) {
1931 dhashpos = DHASHPOS(cc->chunkid);
1932 for (tcc=dophashtab[dhashpos] ; tcc && tcc->chunkid!=cc->chunkid ; tcc=tcc->next) {}
1933 if (tcc) { // found - ignore
1934 tcc = cc;
1935 cc = cc->next;
1936 free(tcc);
1937 } else { // not found - add
1938 tcc = cc;
1939 cc = cc->next;
1940 tcc->next = dophashtab[dhashpos];
1941 dophashtab[dhashpos] = tcc;
1942 }
1943 }
1944 newdopchunks = NULL;
1945 printf("unlock ndoplock\n");
1946 if (pthread_mutex_unlock(&ndoplock)<0) {
1947 printf("unlock error: %u\n",errno);
1948 }
1949 /* show all */
1950 now = monotonic_seconds();
1951 for (dhashpos=0 ; dhashpos<DHASHSIZE ; dhashpos++) {
1952 for (cc=dophashtab[dhashpos]; cc ; cc=cc->next) {
1953 c = hdd_chunk_find(cc->chunkid);
1954 if (c==NULL) { // no chunk - delete entry
1955 printf("id: %"PRIu64" - chunk doesn't exist\n",cc->chunkid);
1956 } else if (c->crcrefcount>0) { // io in progress - skip entry
1957 printf("id: %"PRIu64" - chunk in use (refcount:%u)\n",cc->chunkid,c->crcrefcount);
1958 hdd_chunk_release(c);
1959 } else {
1960 #ifdef PRESERVE_BLOCK
1961 double fdsec,crcsec,blocksec;
1962 fdsec = c->opento;
1963 crcsec = c->crcto;
1964 blocksec = c->blockto;
1965 if (fdsec>0.0) {
1966 fdsec -= now;
1967 }
1968 if (crcsec>0.0) {
1969 crcsec -= now;
1970 }
1971 if (blocksec>0.0) {
1972 blocksec -= now;
1973 }
1974 printf("id: %"PRIu64" - fd:%d (delay:%.3lfs) crc:%p (delay:%.3lfs) block:%p,blockno:%u (delay:%.3lfs)\n",cc->chunkid,c->fd,fdsec,(void*)(c->crc),crcsec,c->block,c->blockno,blocksec);
1975 #else /* PRESERVE_BLOCK */
1976 double fdsec,crcsec;
1977 fdsec = c->opento;
1978 crcsec = c->crcto;
1979 if (fdsec>0.0) {
1980 fdsec -= now;
1981 }
1982 if (crcsec>0.0) {
1983 crcsec -= now;
1984 }
1985 printf("id: %"PRIu64" - fd:%d (delay:%.3lfs) crc:%p (delay:%.3lfs)\n",cc->chunkid,c->fd,fdsec,(void*)(c->crc),crcsec);
1986 #endif /* PRESERVE_BLOCK */
1987 hdd_chunk_release(c);
1988 }
1989 }
1990 }
1991 printf("unlock doplock\n");
1992 if (pthread_mutex_unlock(&doplock)<0) {
1993 printf("unlock error: %u\n",errno);
1994 }
1995 }
1996 #endif
1997
hdd_delayed_ops()1998 void hdd_delayed_ops() {
1999 dopchunk **ccp,*cc,*tcc;
2000 uint32_t dhashpos;
2001 chunk *c;
2002 uint64_t ts,te;
2003 // int status;
2004
2005 // printf("delayed ops: before lock\n");
2006 zassert(pthread_mutex_lock(&doplock));
2007 zassert(pthread_mutex_lock(&ndoplock));
2008 // printf("delayed ops: after lock\n");
2009 /* append new chunks */
2010 cc = newdopchunks;
2011 while (cc) {
2012 dhashpos = DHASHPOS(cc->chunkid);
2013 for (tcc=dophashtab[dhashpos] ; tcc && tcc->chunkid!=cc->chunkid ; tcc=tcc->next) {}
2014 if (tcc) { // found - ignore
2015 tcc = cc;
2016 cc = cc->next;
2017 free(tcc);
2018 } else { // not found - add
2019 tcc = cc;
2020 cc = cc->next;
2021 tcc->next = dophashtab[dhashpos];
2022 dophashtab[dhashpos] = tcc;
2023 }
2024 }
2025 newdopchunks = NULL;
2026 zassert(pthread_mutex_unlock(&ndoplock));
2027 /* check all */
2028 // printf("delayed ops: before loop\n");
2029 for (dhashpos=0 ; dhashpos<DHASHSIZE ; dhashpos++) {
2030 ccp = dophashtab+dhashpos;
2031 while ((cc=*ccp)) {
2032 // printf("find chunk: %llu\n",cc->chunkid);
2033 c = hdd_chunk_tryfind(cc->chunkid);
2034 // if (c!=NULL && c!=CHUNKLOCKED) {
2035 // printf("found chunk: %llu (c->state:%u c->crcrefcount:%u)\n",cc->chunkid,c->state,c->crcrefcount);
2036 // }
2037 // c = hdd_chunk_find(cc->chunkid);
2038 if (c==NULL) { // no chunk - delete entry
2039 *ccp = cc->next;
2040 free(cc);
2041 } else if (c==CHUNKLOCKED) { // locked chunk - just ignore
2042 ccp = &(cc->next);
2043 } else if (c->crcrefcount>0) { // io in progress - skip entry
2044 hdd_chunk_release(c);
2045 ccp = &(cc->next);
2046 } else {
2047 double now;
2048 if (c->fsyncneeded && DoFsyncBeforeClose) {
2049 ts = monotonic_nseconds();
2050 #ifdef F_FULLFSYNC
2051 if (fcntl(c->fd,F_FULLFSYNC)<0) {
2052 hdd_error_occured(c);
2053 mfs_arg_errlog_silent(LOG_WARNING,"hdd_delayed_ops: file:%s - fsync (via fcntl) error",c->filename);
2054 hdd_report_damaged_chunk(c->chunkid);
2055 }
2056 #else
2057 if (fsync(c->fd)<0) {
2058 hdd_error_occured(c);
2059 mfs_arg_errlog_silent(LOG_WARNING,"hdd_delayed_ops: file:%s - fsync (direct call) error",c->filename);
2060 hdd_report_damaged_chunk(c->chunkid);
2061 }
2062 #endif
2063 te = monotonic_nseconds();
2064 hdd_stats_datafsync(c->owner,te-ts);
2065 c->fsyncneeded = 0;
2066 }
2067 now = monotonic_seconds();
2068 #ifdef PRESERVE_BLOCK
2069 // printf("block\n");
2070 if (c->block!=NULL && c->blockto<now) {
2071 # ifdef MMAP_ALLOC
2072 munmap((void*)(c->block),MFSBLOCKSIZE);
2073 # else
2074 free(c->block);
2075 # endif
2076 c->block = NULL;
2077 c->blockno = 0xFFFF;
2078 c->blockto = 0.0;
2079 }
2080 #endif /* PRESERVE_BLOCK */
2081 // printf("descriptor\n");
2082 if (c->fd>=0 && c->opento<now) {
2083 if (close(c->fd)<0) {
2084 hdd_error_occured(c); // uses and preserves errno !!!
2085 mfs_arg_errlog_silent(LOG_WARNING,"hdd_delayed_ops: file:%s - close error",c->filename);
2086 hdd_report_damaged_chunk(c->chunkid);
2087 }
2088 c->fd = -1;
2089 c->opento = 0.0;
2090 hdd_open_files_handle(OF_AFTER_CLOSE);
2091 }
2092 // printf("crc\n");
2093 if (c->crc!=NULL && c->crcto<now) {
2094 if (c->crcchanged) {
2095 syslog(LOG_ERR,"serious error: crc changes lost (chunk:%016"PRIX64"_%08"PRIX32")",c->chunkid,c->version);
2096 }
2097 // printf("chunk %llu - free crc record\n",c->chunkid);
2098 chunk_freecrc(c);
2099 c->crcto = 0.0;
2100 }
2101 #ifdef PRESERVE_BLOCK
2102 if (c->fd<0 && c->crc==NULL && c->block==NULL) {
2103 #else /* PRESERVE_BLOCK */
2104 if (c->fd<0 && c->crc==NULL) {
2105 #endif /* PRESERVE_BLOCK */
2106 *ccp = cc->next;
2107 free(cc);
2108 } else {
2109 ccp = &(cc->next);
2110 }
2111 hdd_chunk_release(c);
2112 }
2113 }
2114 }
2115 // printf("delayed ops: after loop , before unlock\n");
2116 zassert(pthread_mutex_unlock(&doplock));
2117 // printf("delayed ops: after unlock\n");
2118 }
2119
2120 static int hdd_io_begin(chunk *c,int newflag) {
2121 dopchunk *cc;
2122 int status;
2123 int add;
2124
2125 // sassert(c->state==CH_LOCKED||c->state==CH_TOBEDELETED);
2126
2127 // syslog(LOG_NOTICE,"chunk: %"PRIu64" - before io",c->chunkid);
2128 hdd_chunk_testmove(c);
2129 if (c->crcrefcount==0) {
2130 #ifdef PRESERVE_BLOCK
2131 add = (c->fd<0 && c->crc==NULL && c->block==NULL);
2132 #else /* PRESERVE_BLOCK */
2133 add = (c->fd<0 && c->crc==NULL);
2134 #endif /* PRESERVE_BLOCK */
2135 if (c->fd<0) {
2136 hdd_open_files_handle(OF_BEFORE_OPEN);
2137 if (newflag) {
2138 c->fd = open(c->filename,O_RDWR | O_TRUNC | O_CREAT,0666);
2139 } else {
2140 if (c->todel<2) {
2141 c->fd = open(c->filename,O_RDWR);
2142 } else {
2143 c->fd = open(c->filename,O_RDONLY);
2144 }
2145 }
2146 if (c->fd<0) {
2147 int errmem = errno;
2148 mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_begin: file:%s - open error",c->filename);
2149 hdd_open_files_handle(OF_AFTER_CLOSE);
2150 errno = errmem;
2151 return ERROR_IO;
2152 }
2153 c->fsyncneeded = 0;
2154 }
2155 if (c->crc==NULL) {
2156 if (newflag) {
2157 chunk_emptycrc(c);
2158 } else {
2159 status = chunk_readcrc(c);
2160 if (status!=STATUS_OK) {
2161 int errmem = errno;
2162 if (add) {
2163 close(c->fd);
2164 c->fd=-1;
2165 hdd_open_files_handle(OF_AFTER_CLOSE);
2166 }
2167 mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_begin: file:%s - read error",c->filename);
2168 errno = errmem;
2169 return status;
2170 }
2171 }
2172 c->crcchanged = 0;
2173 }
2174 #ifdef PRESERVE_BLOCK
2175 if (c->block==NULL) {
2176 # ifdef MMAP_ALLOC
2177 c->block = (uint8_t*)mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
2178 # else
2179 c->block = (uint8_t*)malloc(MFSBLOCKSIZE);
2180 # endif
2181 // syslog(LOG_WARNING,"chunk: %016"PRIX64", block:%p",c->chunkid,c->block);
2182 passert(c->block);
2183 c->blockno = 0xFFFF;
2184 }
2185 #endif /* PRESERVE_BLOCK */
2186 if (add) {
2187 cc = malloc(sizeof(dopchunk));
2188 passert(cc);
2189 cc->chunkid = c->chunkid;
2190 zassert(pthread_mutex_lock(&ndoplock));
2191 cc->next = newdopchunks;
2192 newdopchunks = cc;
2193 zassert(pthread_mutex_unlock(&ndoplock));
2194 }
2195 }
2196 c->crcrefcount++;
2197 errno = 0;
2198 return STATUS_OK;
2199 }
2200
2201 static int hdd_io_end(chunk *c) {
2202 int status;
2203 // uint64_t ts,te;
2204
2205 // sassert(c->state==CH_LOCKED||c->state==CH_TOBEDELETED);
2206
2207 // syslog(LOG_NOTICE,"chunk: %"PRIu64" - after io",c->chunkid);
2208 if (c->crcchanged) {
2209 status = chunk_writecrc(c);
2210 c->crcchanged = 0;
2211 if (status!=STATUS_OK) {
2212 int errmem = errno;
2213 mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_end: file:%s - write error",c->filename);
2214 errno = errmem;
2215 return status;
2216 }
2217 c->fsyncneeded = 1;
2218 }
2219 c->crcrefcount--;
2220 if (c->crcrefcount==0) {
2221 double now = monotonic_seconds();
2222 /*
2223 if (OPEN_DELAY==0) {
2224 if (c->fsyncneeded) {
2225 ts = monotonic_nseconds();
2226 #ifdef F_FULLFSYNC
2227 if (fcntl(c->fd,F_FULLFSYNC)<0) {
2228 int errmem = errno;
2229 mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_end: file:%s - fsync (via fcntl) error",c->filename);
2230 errno = errmem;
2231 return ERROR_IO;
2232 }
2233 #else
2234 if (fsync(c->fd)<0) {
2235 int errmem = errno;
2236 mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_end: file:%s - fsync (direct call) error",c->filename);
2237 errno = errmem;
2238 return ERROR_IO;
2239 }
2240 #endif
2241 te = monotonic_nseconds();
2242 hdd_stats_datafsync(c->owner,te-ts);
2243 c->fsyncneeded = 0;
2244 }
2245 if (close(c->fd)<0) {
2246 int errmem = errno;
2247 c->fd = -1;
2248 mfs_arg_errlog_silent(LOG_WARNING,"hdd_io_end: file:%s - close error",c->filename);
2249 errno = errmem;
2250 return ERROR_IO;
2251 }
2252 c->fd = -1;
2253 } else {
2254 */
2255 c->opento = now + OPEN_DELAY;
2256 // }
2257 c->crcto = now + CRC_DELAY;
2258 #ifdef PRESERVE_BLOCK
2259 c->blockto = now + BLOCK_DELAY;
2260 #endif
2261 }
2262 errno = 0;
2263 return STATUS_OK;
2264 }
2265
2266
2267
2268
2269 /* I/O operations */
2270
2271 int hdd_open(uint64_t chunkid,uint32_t version) {
2272 int status;
2273 chunk *c;
2274 c = hdd_chunk_find(chunkid);
2275 if (c==NULL) {
2276 return ERROR_NOCHUNK;
2277 }
2278 if (c->version!=version && version>0) {
2279 hdd_chunk_release(c);
2280 return ERROR_WRONGVERSION;
2281 }
2282 status = hdd_io_begin(c,0);
2283 if (status!=STATUS_OK) {
2284 hdd_error_occured(c); // uses and preserves errno !!!
2285 hdd_report_damaged_chunk(chunkid);
2286 }
2287 hdd_chunk_release(c);
2288 // if (status==STATUS_OK) {
2289 // syslog(LOG_NOTICE,"chunk %08"PRIX64" opened",chunkid);
2290 // }
2291 return status;
2292 }
2293
2294 int hdd_close(uint64_t chunkid) {
2295 int status;
2296 chunk *c;
2297 c = hdd_chunk_find(chunkid);
2298 if (c==NULL) {
2299 return ERROR_NOCHUNK;
2300 }
2301 status = hdd_io_end(c);
2302 if (status!=STATUS_OK) {
2303 hdd_error_occured(c); // uses and preserves errno !!!
2304 hdd_report_damaged_chunk(chunkid);
2305 }
2306 hdd_chunk_release(c);
2307 // if (status==STATUS_OK) {
2308 // syslog(LOG_NOTICE,"chunk %08"PRIX64" closed",chunkid);
2309 // }
2310 return status;
2311 }
2312
2313 int hdd_read(uint64_t chunkid,uint32_t version,uint16_t blocknum,uint8_t *buffer,uint32_t offset,uint32_t size,uint8_t *crcbuff) {
2314 chunk *c;
2315 int ret;
2316 int error;
2317 const uint8_t *rcrcptr;
2318 uint32_t crc,bcrc,precrc,postcrc,combinedcrc;
2319 uint64_t ts,te;
2320 #ifndef PRESERVE_BLOCK
2321 uint8_t *blockbuffer;
2322 blockbuffer = pthread_getspecific(blockbufferkey);
2323 if (blockbuffer==NULL) {
2324 # ifdef MMAP_ALLOC
2325 blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
2326 # else
2327 blockbuffer = malloc(MFSBLOCKSIZE);
2328 # endif
2329 passert(blockbuffer);
2330 zassert(pthread_setspecific(blockbufferkey,blockbuffer));
2331 }
2332 #endif /* PRESERVE_BLOCK */
2333 c = hdd_chunk_find(chunkid);
2334 if (c==NULL) {
2335 return ERROR_NOCHUNK;
2336 }
2337 if (c->version!=version && version>0) {
2338 hdd_chunk_release(c);
2339 return ERROR_WRONGVERSION;
2340 }
2341 if (blocknum>=MFSBLOCKSINCHUNK) {
2342 hdd_chunk_release(c);
2343 return ERROR_BNUMTOOBIG;
2344 }
2345 if (size>MFSBLOCKSIZE) {
2346 hdd_chunk_release(c);
2347 return ERROR_WRONGSIZE;
2348 }
2349 if ((offset>=MFSBLOCKSIZE) || (offset+size>MFSBLOCKSIZE)) {
2350 hdd_chunk_release(c);
2351 return ERROR_WRONGOFFSET;
2352 }
2353 if (blocknum>=c->blocks) {
2354 memset(buffer,0,size);
2355 if (size==MFSBLOCKSIZE) {
2356 crc = emptyblockcrc;
2357 } else {
2358 crc = mycrc32_zeroblock(0,size);
2359 }
2360 put32bit(&crcbuff,crc);
2361 hdd_chunk_release(c);
2362 return STATUS_OK;
2363 }
2364 if (offset==0 && size==MFSBLOCKSIZE) {
2365 #ifdef PRESERVE_BLOCK
2366 if (c->blockno==blocknum) {
2367 memcpy(buffer,c->block,MFSBLOCKSIZE);
2368 ret = MFSBLOCKSIZE;
2369 error = 0;
2370 } else {
2371 #endif /* PRESERVE_BLOCK */
2372 ts = monotonic_nseconds();
2373 #ifdef USE_PIO
2374 ret = pread(c->fd,buffer,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2375 #else /* USE_PIO */
2376 lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2377 ret = read(c->fd,buffer,MFSBLOCKSIZE);
2378 #endif /* USE_PIO */
2379 error = errno;
2380 te = monotonic_nseconds();
2381 hdd_stats_dataread(c->owner,MFSBLOCKSIZE,te-ts);
2382 #ifdef PRESERVE_BLOCK
2383 c->blockno = blocknum;
2384 memcpy(c->block,buffer,MFSBLOCKSIZE);
2385 }
2386 #endif /* PRESERVE_BLOCK */
2387 crc = mycrc32(0,buffer,MFSBLOCKSIZE);
2388 rcrcptr = (c->crc)+(4*blocknum);
2389 bcrc = get32bit(&rcrcptr);
2390 if (bcrc!=crc) {
2391 errno = error;
2392 hdd_error_occured(c); // uses and preserves errno !!!
2393 syslog(LOG_WARNING,"read_block_from_chunk: file:%s - crc error",c->filename);
2394 hdd_report_damaged_chunk(chunkid);
2395 hdd_chunk_release(c);
2396 return ERROR_CRC;
2397 }
2398 if (ret!=MFSBLOCKSIZE) {
2399 errno = error;
2400 hdd_error_occured(c); // uses and preserves errno !!!
2401 mfs_arg_errlog_silent(LOG_WARNING,"read_block_from_chunk: file:%s - read error",c->filename);
2402 hdd_report_damaged_chunk(chunkid);
2403 hdd_chunk_release(c);
2404 return ERROR_IO;
2405 }
2406 } else {
2407 #ifdef PRESERVE_BLOCK
2408 if (c->blockno != blocknum) {
2409 ts = monotonic_nseconds();
2410 #ifdef USE_PIO
2411 ret = pread(c->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2412 #else /* USE_PIO */
2413 lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2414 ret = read(c->fd,c->block,MFSBLOCKSIZE);
2415 #endif /* USE_PIO */
2416 error = errno;
2417 te = monotonic_nseconds();
2418 hdd_stats_dataread(c->owner,MFSBLOCKSIZE,te-ts);
2419 c->blockno = blocknum;
2420 } else {
2421 ret = MFSBLOCKSIZE;
2422 error = 0;
2423 }
2424 precrc = mycrc32(0,c->block,offset);
2425 crc = mycrc32(0,c->block+offset,size);
2426 postcrc = mycrc32(0,c->block+offset+size,MFSBLOCKSIZE-(offset+size));
2427 #else /* PRESERVE_BLOCK */
2428 ts = monotonic_nseconds();
2429 #ifdef USE_PIO
2430 ret = pread(c->fd,blockbuffer,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2431 #else /* USE_PIO */
2432 lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2433 ret = read(c->fd,blockbuffer,MFSBLOCKSIZE);
2434 error = errno;
2435 #endif /* USE_PIO */
2436 te = monotonic_nseconds();
2437 hdd_stats_dataread(c->owner,MFSBLOCKSIZE,te-ts);
2438 // crc = mycrc32(0,blockbuffer+offset,size); // first calc crc for piece
2439 precrc = mycrc32(0,blockbuffer,offset);
2440 crc = mycrc32(0,blockbuffer+offset,size);
2441 postcrc = mycrc32(0,blockbuffer+offset+size,MFSBLOCKSIZE-(offset+size));
2442 #endif /* PRESERVE_BLOCK */
2443 if (offset==0) {
2444 combinedcrc = mycrc32_combine(crc,postcrc,MFSBLOCKSIZE-(offset+size));
2445 } else {
2446 combinedcrc = mycrc32_combine(precrc,crc,size);
2447 if ((offset+size)<MFSBLOCKSIZE) {
2448 combinedcrc = mycrc32_combine(combinedcrc,postcrc,MFSBLOCKSIZE-(offset+size));
2449 }
2450 }
2451 rcrcptr = (c->crc)+(4*blocknum);
2452 bcrc = get32bit(&rcrcptr);
2453 // if (bcrc!=mycrc32(0,blockbuffer,MFSBLOCKSIZE)) {
2454 if (bcrc!=combinedcrc) {
2455 errno = error;
2456 hdd_error_occured(c); // uses and preserves errno !!!
2457 syslog(LOG_WARNING,"read_block_from_chunk: file:%s - crc error",c->filename);
2458 hdd_report_damaged_chunk(chunkid);
2459 hdd_chunk_release(c);
2460 return ERROR_CRC;
2461 }
2462 if (ret!=MFSBLOCKSIZE) {
2463 errno = error;
2464 hdd_error_occured(c); // uses and preserves errno !!!
2465 mfs_arg_errlog_silent(LOG_WARNING,"read_block_from_chunk: file:%s - read error",c->filename);
2466 hdd_report_damaged_chunk(chunkid);
2467 hdd_chunk_release(c);
2468 return ERROR_IO;
2469 }
2470 #ifdef PRESERVE_BLOCK
2471 memcpy(buffer,c->block+offset,size);
2472 #else /* PRESERVE_BLOCK */
2473 memcpy(buffer,blockbuffer+offset,size);
2474 #endif /* PRESERVE_BLOCK */
2475 }
2476 put32bit(&crcbuff,crc);
2477 hdd_chunk_release(c);
2478 return STATUS_OK;
2479 }
2480
2481 int hdd_write(uint64_t chunkid,uint32_t version,uint16_t blocknum,const uint8_t *buffer,uint32_t offset,uint32_t size,const uint8_t *crcbuff) {
2482 chunk *c;
2483 int ret;
2484 int error;
2485 uint8_t *wcrcptr;
2486 const uint8_t *rcrcptr;
2487 uint32_t crc,bcrc,precrc,postcrc,combinedcrc,chcrc;
2488 uint32_t i;
2489 uint64_t ts,te;
2490 #ifndef PRESERVE_BLOCK
2491 uint8_t *blockbuffer;
2492 blockbuffer = pthread_getspecific(blockbufferkey);
2493 if (blockbuffer==NULL) {
2494 # ifdef MMAP_ALLOC
2495 blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
2496 # else
2497 blockbuffer = malloc(MFSBLOCKSIZE);
2498 # endif
2499 passert(blockbuffer);
2500 zassert(pthread_setspecific(blockbufferkey,blockbuffer));
2501 }
2502 #endif /* PRESERVE_BLOCK */
2503 c = hdd_chunk_find(chunkid);
2504 if (c==NULL) {
2505 return ERROR_NOCHUNK;
2506 }
2507 if (c->version!=version && version>0) {
2508 hdd_chunk_release(c);
2509 return ERROR_WRONGVERSION;
2510 }
2511 if (blocknum>=MFSBLOCKSINCHUNK) {
2512 hdd_chunk_release(c);
2513 return ERROR_BNUMTOOBIG;
2514 }
2515 if (size>MFSBLOCKSIZE) {
2516 hdd_chunk_release(c);
2517 return ERROR_WRONGSIZE;
2518 }
2519 if ((offset>=MFSBLOCKSIZE) || (offset+size>MFSBLOCKSIZE)) {
2520 hdd_chunk_release(c);
2521 return ERROR_WRONGOFFSET;
2522 }
2523 crc = get32bit(&crcbuff);
2524 if (crc!=mycrc32(0,buffer,size)) {
2525 hdd_chunk_release(c);
2526 return ERROR_CRC;
2527 }
2528 if (offset==0 && size==MFSBLOCKSIZE) {
2529 if (blocknum>=c->blocks) {
2530 wcrcptr = (c->crc)+(4*(c->blocks));
2531 for (i=c->blocks ; i<blocknum ; i++) {
2532 put32bit(&wcrcptr,emptyblockcrc);
2533 }
2534 c->blocks = blocknum+1;
2535 }
2536 ts = monotonic_nseconds();
2537 #ifdef USE_PIO
2538 ret = pwrite(c->fd,buffer,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2539 #else /* USE_PIO */
2540 lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2541 ret = write(c->fd,buffer,MFSBLOCKSIZE);
2542 #endif /* USE_PIO */
2543 error = errno;
2544 te = monotonic_nseconds();
2545 hdd_stats_datawrite(c->owner,MFSBLOCKSIZE,te-ts);
2546 if (crc!=mycrc32(0,buffer,MFSBLOCKSIZE)) {
2547 errno = error;
2548 hdd_error_occured(c);
2549 syslog(LOG_WARNING,"write_block_to_chunk: file:%s - crc error",c->filename);
2550 hdd_report_damaged_chunk(chunkid);
2551 hdd_chunk_release(c);
2552 return ERROR_CRC;
2553 }
2554 wcrcptr = (c->crc)+(4*blocknum);
2555 put32bit(&wcrcptr,crc);
2556 c->crcchanged = 1;
2557 if (ret!=MFSBLOCKSIZE) {
2558 if (error==0 || error==EAGAIN) {
2559 error=ENOSPC;
2560 }
2561 errno = error;
2562 hdd_error_occured(c); // uses and preserves errno !!!
2563 mfs_arg_errlog_silent(LOG_WARNING,"write_block_to_chunk: file:%s - write error",c->filename);
2564 hdd_report_damaged_chunk(chunkid);
2565 hdd_chunk_release(c);
2566 return ERROR_IO;
2567 }
2568 #ifdef PRESERVE_BLOCK
2569 memcpy(c->block,buffer,MFSBLOCKSIZE);
2570 c->blockno = blocknum;
2571 #endif /* PRESERVE_BLOCK */
2572 } else {
2573 if (blocknum<c->blocks) {
2574 #ifdef PRESERVE_BLOCK
2575 if (c->blockno != blocknum) {
2576 ts = monotonic_nseconds();
2577 #ifdef USE_PIO
2578 ret = pread(c->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2579 #else /* USE_PIO */
2580 lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2581 ret = read(c->fd,c->block,MFSBLOCKSIZE);
2582 #endif /* USE_PIO */
2583 error = errno;
2584 te = monotonic_nseconds();
2585 hdd_stats_dataread(c->owner,MFSBLOCKSIZE,te-ts);
2586 c->blockno = blocknum;
2587 } else {
2588 ret = MFSBLOCKSIZE;
2589 error = 0;
2590 }
2591 #else /* PRESERVE_BLOCK */
2592 ts = monotonic_nseconds();
2593 #ifdef USE_PIO
2594 ret = pread(c->fd,blockbuffer,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS));
2595 #else /* USE_PIO */
2596 lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS),SEEK_SET);
2597 ret = read(c->fd,blockbuffer,MFSBLOCKSIZE);
2598 #endif /* USE_PIO */
2599 error = errno;
2600 te = monotonic_nseconds();
2601 hdd_stats_dataread(c->owner,MFSBLOCKSIZE,te-ts);
2602 #endif /* PRESERVE_BLOCK */
2603 if (ret!=MFSBLOCKSIZE) {
2604 errno = error;
2605 hdd_error_occured(c); // uses and preserves errno !!!
2606 mfs_arg_errlog_silent(LOG_WARNING,"write_block_to_chunk: file:%s - read error",c->filename);
2607 hdd_report_damaged_chunk(chunkid);
2608 hdd_chunk_release(c);
2609 return ERROR_IO;
2610 }
2611 #ifdef PRESERVE_BLOCK
2612 precrc = mycrc32(0,c->block,offset);
2613 chcrc = mycrc32(0,c->block+offset,size);
2614 postcrc = mycrc32(0,c->block+offset+size,MFSBLOCKSIZE-(offset+size));
2615 #else /* PRESERVE_BLOCK */
2616 precrc = mycrc32(0,blockbuffer,offset);
2617 chcrc = mycrc32(0,blockbuffer+offset,size);
2618 postcrc = mycrc32(0,blockbuffer+offset+size,MFSBLOCKSIZE-(offset+size));
2619 #endif /* PRESERVE_BLOCK */
2620 if (offset==0) {
2621 combinedcrc = mycrc32_combine(chcrc,postcrc,MFSBLOCKSIZE-(offset+size));
2622 } else {
2623 combinedcrc = mycrc32_combine(precrc,chcrc,size);
2624 if ((offset+size)<MFSBLOCKSIZE) {
2625 combinedcrc = mycrc32_combine(combinedcrc,postcrc,MFSBLOCKSIZE-(offset+size));
2626 }
2627 }
2628 rcrcptr = (c->crc)+(4*blocknum);
2629 bcrc = get32bit(&rcrcptr);
2630 // if (bcrc!=mycrc32(0,blockbuffer,MFSBLOCKSIZE)) {
2631 if (bcrc!=combinedcrc) {
2632 errno = error;
2633 hdd_error_occured(c); // uses and preserves errno !!!
2634 syslog(LOG_WARNING,"write_block_to_chunk: file:%s - crc error",c->filename);
2635 hdd_report_damaged_chunk(chunkid);
2636 hdd_chunk_release(c);
2637 return ERROR_CRC;
2638 }
2639 } else {
2640 if (ftruncate(c->fd,CHUNKHDRSIZE+(((uint32_t)(blocknum+1))<<MFSBLOCKBITS))<0) {
2641 hdd_error_occured(c); // uses and preserves errno !!!
2642 mfs_arg_errlog_silent(LOG_WARNING,"write_block_to_chunk: file:%s - ftruncate error",c->filename);
2643 hdd_report_damaged_chunk(chunkid);
2644 hdd_chunk_release(c);
2645 return ERROR_IO;
2646 }
2647 wcrcptr = (c->crc)+(4*(c->blocks));
2648 for (i=c->blocks ; i<blocknum ; i++) {
2649 put32bit(&wcrcptr,emptyblockcrc);
2650 }
2651 c->blocks = blocknum+1;
2652 #ifdef PRESERVE_BLOCK
2653 memset(c->block,0,MFSBLOCKSIZE);
2654 c->blockno = blocknum;
2655 #else /* PRESERVE_BLOCK */
2656 memset(blockbuffer,0,MFSBLOCKSIZE);
2657 #endif /* PRESERVE_BLOCK */
2658 precrc = mycrc32_zeroblock(0,offset);
2659 postcrc = mycrc32_zeroblock(0,MFSBLOCKSIZE-(offset+size));
2660 }
2661 #ifdef PRESERVE_BLOCK
2662 memcpy(c->block+offset,buffer,size);
2663 ts = monotonic_nseconds();
2664 #ifdef USE_PIO
2665 ret = pwrite(c->fd,c->block+offset,size,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS)+offset);
2666 #else /* USE_PIO */
2667 lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS)+offset,SEEK_SET);
2668 ret = write(c->fd,c->block+offset,size);
2669 #endif /* USE_PIO */
2670 error = errno;
2671 te = monotonic_nseconds();
2672 hdd_stats_datawrite(c->owner,size,te-ts);
2673 chcrc = mycrc32(0,c->block+offset,size);
2674 #else /* PRESERVE_BLOCK */
2675 memcpy(blockbuffer+offset,buffer,size);
2676 ts = monotonic_nseconds();
2677 #ifdef USE_PIO
2678 ret = pwrite(c->fd,blockbuffer+offset,size,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS)+offset);
2679 #else /* USE_PIO */
2680 lseek(c->fd,CHUNKHDRSIZE+(((uint32_t)blocknum)<<MFSBLOCKBITS)+offset,SEEK_SET);
2681 ret = write(c->fd,blockbuffer+offset,size);
2682 #endif /* USE_PIO */
2683 error = errno;
2684 te = monotonic_nseconds();
2685 hdd_stats_datawrite(c->owner,size,te-ts);
2686 chcrc = mycrc32(0,blockbuffer+offset,size);
2687 #endif /* PRESERVE_BLOCK */
2688 if (offset==0) {
2689 combinedcrc = mycrc32_combine(chcrc,postcrc,MFSBLOCKSIZE-(offset+size));
2690 } else {
2691 combinedcrc = mycrc32_combine(precrc,chcrc,size);
2692 if ((offset+size)<MFSBLOCKSIZE) {
2693 combinedcrc = mycrc32_combine(combinedcrc,postcrc,MFSBLOCKSIZE-(offset+size));
2694 }
2695 }
2696 wcrcptr = (c->crc)+(4*blocknum);
2697 // bcrc = mycrc32(0,blockbuffer,MFSBLOCKSIZE);
2698 // put32bit(&wcrcptr,bcrc);
2699 put32bit(&wcrcptr,combinedcrc);
2700 c->crcchanged = 1;
2701 // if (crc!=mycrc32(0,blockbuffer+offset,size)) {
2702 if (crc!=chcrc) {
2703 errno = error;
2704 hdd_error_occured(c); // uses and preserves errno !!!
2705 syslog(LOG_WARNING,"write_block_to_chunk: file:%s - crc error",c->filename);
2706 hdd_report_damaged_chunk(chunkid);
2707 hdd_chunk_release(c);
2708 return ERROR_CRC;
2709 }
2710 if (ret!=(int)size) {
2711 if (error==0 || error==EAGAIN) {
2712 error=ENOSPC;
2713 }
2714 errno = error;
2715 hdd_error_occured(c); // uses and preserves errno !!!
2716 mfs_arg_errlog_silent(LOG_WARNING,"write_block_to_chunk: file:%s - write error",c->filename);
2717 hdd_report_damaged_chunk(chunkid);
2718 hdd_chunk_release(c);
2719 return ERROR_IO;
2720 }
2721 }
2722 //#warning TEST
2723 // if ((random()&0x1F)==0) {
2724 // syslog(LOG_NOTICE,"BAM BAM BAM");
2725 // portable_usleep(500000);
2726 // }
2727 hdd_chunk_release(c);
2728 return STATUS_OK;
2729 }
2730
2731
2732
2733 /* chunk info */
2734 /*
2735 int hdd_check_version(uint64_t chunkid,uint32_t version) {
2736 chunk *c;
2737 c = hdd_chunk_find(chunkid);
2738 if (c==NULL) {
2739 return ERROR_NOCHUNK;
2740 }
2741 if (c->version!=version && version>0) {
2742 hdd_chunk_release(c);
2743 return ERROR_WRONGVERSION;
2744 }
2745 hdd_chunk_release(c);
2746 return STATUS_OK;
2747 }
2748 */
2749 int hdd_get_blocks(uint64_t chunkid,uint32_t version,uint8_t *blocks_buff) {
2750 chunk *c;
2751 c = hdd_chunk_find(chunkid);
2752 if (c==NULL) {
2753 return ERROR_NOCHUNK;
2754 }
2755 if (c->version!=version && version>0) {
2756 hdd_chunk_release(c);
2757 return ERROR_WRONGVERSION;
2758 }
2759 put16bit(&blocks_buff,c->blocks);
2760 hdd_chunk_release(c);
2761 return STATUS_OK;
2762 }
2763
2764 int hdd_get_checksum(uint64_t chunkid,uint32_t version,uint8_t *checksum_buff) {
2765 int status;
2766 uint32_t i;
2767 uint32_t chksum;
2768 chunk *c;
2769 c = hdd_chunk_find(chunkid);
2770 if (c==NULL) {
2771 return ERROR_NOCHUNK;
2772 }
2773 if (c->version!=version && version>0) {
2774 hdd_chunk_release(c);
2775 return ERROR_WRONGVERSION;
2776 }
2777 status = hdd_io_begin(c,0);
2778 if (status!=STATUS_OK) {
2779 hdd_error_occured(c); // uses and preserves errno !!!
2780 hdd_report_damaged_chunk(chunkid);
2781 hdd_chunk_release(c);
2782 return status;
2783 }
2784 chksum = 1;
2785 for (i=0 ; i<1024 ; i++) {
2786 chksum *= 426265243;
2787 chksum ^= c->crc[i];
2788 }
2789 put32bit(&checksum_buff,chksum);
2790 status = hdd_io_end(c);
2791 if (status!=STATUS_OK) {
2792 hdd_error_occured(c); // uses and preserves errno !!!
2793 hdd_report_damaged_chunk(chunkid);
2794 hdd_chunk_release(c);
2795 return status;
2796 }
2797 hdd_chunk_release(c);
2798 return STATUS_OK;
2799 }
2800
2801 int hdd_get_checksum_tab(uint64_t chunkid,uint32_t version,uint8_t *checksum_tab) {
2802 int status;
2803 uint32_t i;
2804 chunk *c;
2805 c = hdd_chunk_find(chunkid);
2806 if (c==NULL) {
2807 return ERROR_NOCHUNK;
2808 }
2809 if (c->version!=version && version>0) {
2810 hdd_chunk_release(c);
2811 return ERROR_WRONGVERSION;
2812 }
2813 status = hdd_io_begin(c,0);
2814 if (status!=STATUS_OK) {
2815 hdd_error_occured(c); // uses and preserves errno !!!
2816 hdd_report_damaged_chunk(chunkid);
2817 hdd_chunk_release(c);
2818 return status;
2819 }
2820 for (i=0 ; i<1024 ; i++) {
2821 put32bit(&checksum_tab,c->crc[i]);
2822 }
2823 status = hdd_io_end(c);
2824 if (status!=STATUS_OK) {
2825 hdd_error_occured(c); // uses and preserves errno !!!
2826 hdd_report_damaged_chunk(chunkid);
2827 hdd_chunk_release(c);
2828 return status;
2829 }
2830 hdd_chunk_release(c);
2831 return STATUS_OK;
2832 }
2833
2834
2835
2836
2837
2838 /* chunk operations */
2839
2840 static int hdd_int_create(uint64_t chunkid,uint32_t version) {
2841 folder *f;
2842 chunk *c;
2843 int status;
2844 uint8_t *ptr;
2845 #ifdef PRESERVE_BLOCK
2846 uint8_t hdrbuffer[CHUNKHDRSIZE];
2847 #else /* PRESERVE_BLOCK */
2848 uint8_t *hdrbuffer;
2849 #endif /* PRESERVE_BLOCK */
2850
2851 zassert(pthread_mutex_lock(&folderlock));
2852 f = hdd_getfolder();
2853 if (f==NULL) {
2854 zassert(pthread_mutex_unlock(&folderlock));
2855 return ERROR_NOSPACE;
2856 }
2857 c = hdd_chunk_create(f,chunkid,version);
2858 zassert(pthread_mutex_unlock(&folderlock));
2859 if (c==NULL) {
2860 return ERROR_CHUNKEXIST;
2861 }
2862
2863 #ifndef PRESERVE_BLOCK
2864 hdrbuffer = pthread_getspecific(hdrbufferkey);
2865 if (hdrbuffer==NULL) {
2866 hdrbuffer = malloc(CHUNKHDRSIZE);
2867 passert(hdrbuffer);
2868 zassert(pthread_setspecific(hdrbufferkey,hdrbuffer));
2869 }
2870 #endif /* PRESERVE_BLOCK */
2871
2872 status = hdd_io_begin(c,1);
2873 if (status!=STATUS_OK) {
2874 hdd_error_occured(c); // uses and preserves errno !!!
2875 hdd_chunk_delete(c);
2876 return ERROR_IO;
2877 }
2878 memset(hdrbuffer,0,CHUNKHDRSIZE);
2879 memcpy(hdrbuffer,MFSSIGNATURE "C 1.0",8);
2880 ptr = hdrbuffer+8;
2881 put64bit(&ptr,chunkid);
2882 put32bit(&ptr,version);
2883 if (write(c->fd,hdrbuffer,CHUNKHDRSIZE)!=CHUNKHDRSIZE) {
2884 hdd_error_occured(c); // uses and preserves errno !!!
2885 mfs_arg_errlog_silent(LOG_WARNING,"create_newchunk: file:%s - write error",c->filename);
2886 hdd_io_end(c);
2887 unlink(c->filename);
2888 hdd_chunk_delete(c);
2889 return ERROR_IO;
2890 }
2891 hdd_stats_write(CHUNKHDRSIZE);
2892 status = hdd_io_end(c);
2893 if (status!=STATUS_OK) {
2894 hdd_error_occured(c); // uses and preserves errno !!!
2895 unlink(c->filename);
2896 hdd_chunk_delete(c);
2897 return status;
2898 }
2899 hdd_chunk_release(c);
2900 return STATUS_OK;
2901 }
2902
2903 static int hdd_int_test(uint64_t chunkid,uint32_t version) {
2904 const uint8_t *ptr;
2905 uint16_t block;
2906 uint32_t bcrc;
2907 int32_t retsize;
2908 int status;
2909 chunk *c;
2910 #ifndef PRESERVE_BLOCK
2911 uint8_t *blockbuffer;
2912 blockbuffer = pthread_getspecific(blockbufferkey);
2913 if (blockbuffer==NULL) {
2914 # ifdef MMAP_ALLOC
2915 blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
2916 # else
2917 blockbuffer = malloc(MFSBLOCKSIZE);
2918 # endif
2919 passert(blockbuffer);
2920 zassert(pthread_setspecific(blockbufferkey,blockbuffer));
2921 }
2922 #endif /* PRESERVE_BLOCK */
2923 c = hdd_chunk_find(chunkid);
2924 if (c==NULL) {
2925 return ERROR_NOCHUNK;
2926 }
2927 if (c->version!=version && version>0) {
2928 hdd_chunk_release(c);
2929 return ERROR_WRONGVERSION;
2930 }
2931 status = hdd_io_begin(c,0);
2932 if (status!=STATUS_OK) {
2933 hdd_error_occured(c); // uses and preserves errno !!!
2934 hdd_chunk_release(c);
2935 return status;
2936 }
2937 lseek(c->fd,CHUNKHDRSIZE,SEEK_SET);
2938 ptr = c->crc;
2939 for (block=0 ; block<c->blocks ; block++) {
2940 #ifdef PRESERVE_BLOCK
2941 retsize = read(c->fd,c->block,MFSBLOCKSIZE);
2942 #else /* PRESERVE_BLOCK */
2943 retsize = read(c->fd,blockbuffer,MFSBLOCKSIZE);
2944 #endif /* PRESERVE_BLOCK */
2945 if (retsize!=MFSBLOCKSIZE) {
2946 hdd_error_occured(c); // uses and preserves errno !!!
2947 mfs_arg_errlog_silent(LOG_WARNING,"test_chunk: file:%s - data read error",c->filename);
2948 hdd_io_end(c);
2949 hdd_report_damaged_chunk(chunkid);
2950 hdd_chunk_release(c);
2951 return ERROR_IO;
2952 }
2953 hdd_stats_read(MFSBLOCKSIZE);
2954 #ifdef PRESERVE_BLOCK
2955 c->blockno = block;
2956 #endif
2957 bcrc = get32bit(&ptr);
2958 #ifdef PRESERVE_BLOCK
2959 if (bcrc!=mycrc32(0,c->block,MFSBLOCKSIZE)) {
2960 #else /* PRESERVE_BLOCK */
2961 if (bcrc!=mycrc32(0,blockbuffer,MFSBLOCKSIZE)) {
2962 #endif /* PRESERVE_BLOCK */
2963 errno = 0; // set anything to errno
2964 hdd_error_occured(c); // uses and preserves errno !!!
2965 syslog(LOG_WARNING,"test_chunk: file:%s - crc error",c->filename);
2966 hdd_io_end(c);
2967 hdd_report_damaged_chunk(chunkid);
2968 hdd_chunk_release(c);
2969 return ERROR_CRC;
2970 }
2971 }
2972 status = hdd_io_end(c);
2973 if (status!=STATUS_OK) {
2974 hdd_error_occured(c); // uses and preserves errno !!!
2975 hdd_report_damaged_chunk(chunkid);
2976 hdd_chunk_release(c);
2977 return status;
2978 }
2979 hdd_chunk_release(c);
2980 return STATUS_OK;
2981 }
2982
2983 static int hdd_int_duplicate(uint64_t chunkid,uint32_t version,uint32_t newversion,uint64_t copychunkid,uint32_t copyversion) {
2984 folder *f;
2985 uint32_t filenameleng;
2986 char *newfilename;
2987 uint8_t *ptr,vbuff[4];
2988 uint16_t block;
2989 int32_t retsize;
2990 int status;
2991 chunk *c,*oc;
2992 #ifdef PRESERVE_BLOCK
2993 uint8_t hdrbuffer[CHUNKHDRSIZE];
2994 #else /* PRESERVE_BLOCK */
2995 uint8_t *blockbuffer,*hdrbuffer;
2996 blockbuffer = pthread_getspecific(blockbufferkey);
2997 if (blockbuffer==NULL) {
2998 # ifdef MMAP_ALLOC
2999 blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
3000 # else
3001 blockbuffer = malloc(MFSBLOCKSIZE);
3002 # endif
3003 passert(blockbuffer);
3004 zassert(pthread_setspecific(blockbufferkey,blockbuffer));
3005 }
3006 hdrbuffer = pthread_getspecific(hdrbufferkey);
3007 if (hdrbuffer==NULL) {
3008 hdrbuffer = malloc(CHUNKHDRSIZE);
3009 passert(hdrbuffer);
3010 zassert(pthread_setspecific(hdrbufferkey,hdrbuffer));
3011 }
3012 #endif /* PRESERVE_BLOCK */
3013
3014 oc = hdd_chunk_find(chunkid);
3015 if (oc==NULL) {
3016 return ERROR_NOCHUNK;
3017 }
3018 if (oc->version!=version && version>0) {
3019 hdd_chunk_release(oc);
3020 return ERROR_WRONGVERSION;
3021 }
3022 if (copyversion==0) {
3023 copyversion = newversion;
3024 }
3025 zassert(pthread_mutex_lock(&folderlock));
3026 f = hdd_getfolder();
3027 if (f==NULL) {
3028 zassert(pthread_mutex_unlock(&folderlock));
3029 hdd_chunk_release(oc);
3030 return ERROR_NOSPACE;
3031 }
3032 c = hdd_chunk_create(f,copychunkid,copyversion);
3033 zassert(pthread_mutex_unlock(&folderlock));
3034 if (c==NULL) {
3035 hdd_chunk_release(oc);
3036 return ERROR_CHUNKEXIST;
3037 }
3038
3039 if (newversion!=version) {
3040 filenameleng = strlen(oc->filename);
3041 if (oc->filename[filenameleng-13]=='_') { // new file name format
3042 newfilename = malloc(filenameleng+1);
3043 passert(newfilename);
3044 memcpy(newfilename,c->filename,filenameleng+1);
3045 sprintf(newfilename+filenameleng-12,"%08"PRIX32".mfs",newversion);
3046 if (rename(oc->filename,newfilename)<0) {
3047 hdd_error_occured(oc); // uses and preserves errno !!!
3048 mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - rename error",oc->filename);
3049 free(newfilename);
3050 hdd_chunk_delete(c);
3051 hdd_chunk_release(oc);
3052 return ERROR_IO;
3053 }
3054 free(oc->filename);
3055 oc->filename = newfilename;
3056 }
3057 status = hdd_io_begin(oc,0);
3058 if (status!=STATUS_OK) {
3059 hdd_error_occured(oc); // uses and preserves errno !!!
3060 hdd_chunk_delete(c);
3061 hdd_chunk_release(oc);
3062 return status; //can't change file version
3063 }
3064 ptr = vbuff;
3065 put32bit(&ptr,newversion);
3066 #ifdef USE_PIO
3067 if (pwrite(oc->fd,vbuff,4,16)!=4) {
3068 #else /* USE_PIO */
3069 lseek(oc->fd,16,SEEK_SET);
3070 if (write(oc->fd,vbuff,4)!=4) {
3071 #endif /* USE_PIO */
3072 hdd_error_occured(oc); // uses and preserves errno !!!
3073 mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - write error",c->filename);
3074 hdd_chunk_delete(c);
3075 hdd_io_end(oc);
3076 hdd_chunk_release(oc);
3077 return ERROR_IO;
3078 }
3079 hdd_stats_write(4);
3080 oc->version = newversion;
3081 } else {
3082 status = hdd_io_begin(oc,0);
3083 if (status!=STATUS_OK) {
3084 hdd_error_occured(oc); // uses and preserves errno !!!
3085 hdd_chunk_delete(c);
3086 hdd_report_damaged_chunk(chunkid);
3087 hdd_chunk_release(oc);
3088 return status;
3089 }
3090 }
3091 status = hdd_io_begin(c,1);
3092 if (status!=STATUS_OK) {
3093 hdd_error_occured(c); // uses and preserves errno !!!
3094 hdd_chunk_delete(c);
3095 hdd_io_end(oc);
3096 hdd_chunk_release(oc);
3097 return status;
3098 }
3099 memset(hdrbuffer,0,CHUNKHDRSIZE);
3100 memcpy(hdrbuffer,MFSSIGNATURE "C 1.0",8);
3101 ptr = hdrbuffer+8;
3102 put64bit(&ptr,copychunkid);
3103 put32bit(&ptr,copyversion);
3104 memcpy(c->crc,oc->crc,4096);
3105 memcpy(hdrbuffer+1024,oc->crc,4096);
3106 if (write(c->fd,hdrbuffer,CHUNKHDRSIZE)!=CHUNKHDRSIZE) {
3107 hdd_error_occured(c); // uses and preserves errno !!!
3108 mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - hdr write error",c->filename);
3109 hdd_io_end(c);
3110 unlink(c->filename);
3111 hdd_chunk_delete(c);
3112 hdd_io_end(oc);
3113 hdd_chunk_release(oc);
3114 return ERROR_IO;
3115 }
3116 hdd_stats_write(CHUNKHDRSIZE);
3117 #ifndef PRESERVE_BLOCK
3118 lseek(oc->fd,CHUNKHDRSIZE,SEEK_SET);
3119 #endif /* PRESERVE_BLOCK */
3120 for (block=0 ; block<oc->blocks ; block++) {
3121 #ifdef PRESERVE_BLOCK
3122 if (oc->blockno==block) {
3123 memcpy(c->block,oc->block,MFSBLOCKSIZE);
3124 retsize = MFSBLOCKSIZE;
3125 } else {
3126 #ifdef USE_PIO
3127 retsize = pread(oc->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS));
3128 #else /* USE_PIO */
3129 lseek(oc->fd,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS),SEEK_SET);
3130 retsize = read(oc->fd,c->block,MFSBLOCKSIZE);
3131 #endif /* USE_PIO */
3132 }
3133 #else /* PRESERVE_BLOCK */
3134 retsize = read(oc->fd,blockbuffer,MFSBLOCKSIZE);
3135 #endif /* PRESERVE_BLOCK */
3136 if (retsize!=MFSBLOCKSIZE) {
3137 hdd_error_occured(oc); // uses and preserves errno !!!
3138 mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - data read error",oc->filename);
3139 hdd_io_end(c);
3140 unlink(c->filename);
3141 hdd_chunk_delete(c);
3142 hdd_io_end(oc);
3143 hdd_report_damaged_chunk(chunkid);
3144 hdd_chunk_release(oc);
3145 return ERROR_IO;
3146 }
3147 #ifdef PRESERVE_BLOCK
3148 if (oc->blockno!=block) {
3149 hdd_stats_read(MFSBLOCKSIZE);
3150 }
3151 retsize = write(c->fd,c->block,MFSBLOCKSIZE);
3152 #else /* PRESERVE_BLOCK */
3153 hdd_stats_read(MFSBLOCKSIZE);
3154 retsize = write(c->fd,blockbuffer,MFSBLOCKSIZE);
3155 #endif /* PRESERVE_BLOCK */
3156 if (retsize!=MFSBLOCKSIZE) {
3157 hdd_error_occured(c); // uses and preserves errno !!!
3158 mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - data write error",c->filename);
3159 hdd_io_end(c);
3160 unlink(c->filename);
3161 hdd_chunk_delete(c);
3162 hdd_io_end(oc);
3163 hdd_chunk_release(oc);
3164 return ERROR_IO; //write error
3165 }
3166 hdd_stats_write(MFSBLOCKSIZE);
3167 #ifdef PRESERVE_BLOCK
3168 c->blockno = block;
3169 #endif /* PRESERVE_BLOCK */
3170 }
3171 status = hdd_io_end(oc);
3172 if (status!=STATUS_OK) {
3173 hdd_error_occured(oc); // uses and preserves errno !!!
3174 hdd_io_end(c);
3175 unlink(c->filename);
3176 hdd_chunk_delete(c);
3177 hdd_report_damaged_chunk(chunkid);
3178 hdd_chunk_release(oc);
3179 return status;
3180 }
3181 status = hdd_io_end(c);
3182 if (status!=STATUS_OK) {
3183 hdd_error_occured(c); // uses and preserves errno !!!
3184 unlink(c->filename);
3185 hdd_chunk_delete(c);
3186 hdd_chunk_release(oc);
3187 return status;
3188 }
3189 c->blocks = oc->blocks;
3190 zassert(pthread_mutex_lock(&folderlock));
3191 c->owner->needrefresh = 1;
3192 zassert(pthread_mutex_unlock(&folderlock));
3193 hdd_chunk_release(c);
3194 hdd_chunk_release(oc);
3195 return STATUS_OK;
3196 }
3197
3198 static int hdd_int_version(uint64_t chunkid,uint32_t version,uint32_t newversion) {
3199 int status;
3200 uint32_t filenameleng;
3201 char *newfilename;
3202 uint8_t *ptr,vbuff[4];
3203 chunk *c;
3204 c = hdd_chunk_find(chunkid);
3205 if (c==NULL) {
3206 return ERROR_NOCHUNK;
3207 }
3208 if (c->version!=version && version>0) {
3209 hdd_chunk_release(c);
3210 return ERROR_WRONGVERSION;
3211 }
3212 filenameleng = strlen(c->filename);
3213 if (c->filename[filenameleng-13]=='_') { // new file name format
3214 newfilename = malloc(filenameleng+1);
3215 passert(newfilename);
3216 memcpy(newfilename,c->filename,filenameleng+1);
3217 sprintf(newfilename+filenameleng-12,"%08"PRIX32".mfs",newversion);
3218 if (rename(c->filename,newfilename)<0) {
3219 hdd_error_occured(c); // uses and preserves errno !!!
3220 mfs_arg_errlog_silent(LOG_WARNING,"set_chunk_version: file:%s - rename error",c->filename);
3221 free(newfilename);
3222 hdd_chunk_release(c);
3223 return ERROR_IO;
3224 }
3225 free(c->filename);
3226 c->filename = newfilename;
3227 }
3228 status = hdd_io_begin(c,0);
3229 if (status!=STATUS_OK) {
3230 hdd_error_occured(c); // uses and preserves errno !!!
3231 mfs_arg_errlog_silent(LOG_WARNING,"set_chunk_version: file:%s - open error",c->filename);
3232 hdd_chunk_release(c);
3233 return status;
3234 }
3235 ptr = vbuff;
3236 put32bit(&ptr,newversion);
3237 #ifdef USE_PIO
3238 if (pwrite(c->fd,vbuff,4,16)!=4) {
3239 #else /* USE_PIO */
3240 lseek(c->fd,16,SEEK_SET);
3241 if (write(c->fd,vbuff,4)!=4) {
3242 #endif /* USE_PIO */
3243 hdd_error_occured(c); // uses and preserves errno !!!
3244 mfs_arg_errlog_silent(LOG_WARNING,"set_chunk_version: file:%s - write error",c->filename);
3245 hdd_io_end(c);
3246 hdd_chunk_release(c);
3247 return ERROR_IO;
3248 }
3249 hdd_stats_write(4);
3250 c->version = newversion;
3251 status = hdd_io_end(c);
3252 if (status!=STATUS_OK) {
3253 hdd_error_occured(c); // uses and preserves errno !!!
3254 }
3255 hdd_chunk_release(c);
3256 return status;
3257 }
3258
3259 static int hdd_int_truncate(uint64_t chunkid,uint32_t version,uint32_t newversion,uint32_t length) {
3260 int status;
3261 uint32_t filenameleng;
3262 char *newfilename;
3263 uint8_t *ptr,vbuff[4];
3264 chunk *c;
3265 uint32_t blocks;
3266 uint32_t i;
3267 #ifndef PRESERVE_BLOCK
3268 uint8_t *blockbuffer;
3269 blockbuffer = pthread_getspecific(blockbufferkey);
3270 if (blockbuffer==NULL) {
3271 # ifdef MMAP_ALLOC
3272 blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
3273 # else
3274 blockbuffer = malloc(MFSBLOCKSIZE);
3275 # endif
3276 passert(blockbuffer);
3277 zassert(pthread_setspecific(blockbufferkey,blockbuffer));
3278 }
3279 #endif /* !PRESERVE_BLOCK */
3280 if (length>MFSCHUNKSIZE) {
3281 return ERROR_WRONGSIZE;
3282 }
3283 c = hdd_chunk_find(chunkid);
3284 // step 1 - change version
3285 if (c==NULL) {
3286 return ERROR_NOCHUNK;
3287 }
3288 if (c->version!=version && version>0) {
3289 hdd_chunk_release(c);
3290 return ERROR_WRONGVERSION;
3291 }
3292 filenameleng = strlen(c->filename);
3293 if (c->filename[filenameleng-13]=='_') { // new file name format
3294 newfilename = malloc(filenameleng+1);
3295 passert(newfilename);
3296 memcpy(newfilename,c->filename,filenameleng+1);
3297 sprintf(newfilename+filenameleng-12,"%08"PRIX32".mfs",newversion);
3298 if (rename(c->filename,newfilename)<0) {
3299 hdd_error_occured(c); // uses and preserves errno !!!
3300 mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - rename error",c->filename);
3301 free(newfilename);
3302 hdd_chunk_release(c);
3303 return ERROR_IO;
3304 }
3305 free(c->filename);
3306 c->filename = newfilename;
3307 }
3308 status = hdd_io_begin(c,0);
3309 if (status!=STATUS_OK) {
3310 hdd_error_occured(c); // uses and preserves errno !!!
3311 hdd_chunk_release(c);
3312 return status; //can't change file version
3313 }
3314 ptr = vbuff;
3315 put32bit(&ptr,newversion);
3316 #ifdef USE_PIO
3317 if (pwrite(c->fd,vbuff,4,16)!=4) {
3318 #else /* USE_PIO */
3319 lseek(c->fd,16,SEEK_SET);
3320 if (write(c->fd,vbuff,4)!=4) {
3321 #endif /* USE_PIO */
3322 hdd_error_occured(c); // uses and preserves errno !!!
3323 mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - write error",c->filename);
3324 hdd_io_end(c);
3325 hdd_chunk_release(c);
3326 return ERROR_IO;
3327 }
3328 hdd_stats_write(4);
3329 c->version = newversion;
3330 // step 2. truncate
3331 blocks = ((length+MFSBLOCKMASK)>>MFSBLOCKBITS);
3332 if (blocks>c->blocks) {
3333 if (ftruncate(c->fd,CHUNKHDRSIZE+(blocks<<MFSBLOCKBITS))<0) {
3334 hdd_error_occured(c); // uses and preserves errno !!!
3335 mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - ftruncate error",c->filename);
3336 hdd_io_end(c);
3337 hdd_chunk_release(c);
3338 return ERROR_IO;
3339 }
3340 ptr = (c->crc)+(4*(c->blocks));
3341 for (i=c->blocks ; i<blocks ; i++) {
3342 put32bit(&ptr,emptyblockcrc);
3343 }
3344 c->crcchanged = 1;
3345 } else {
3346 uint32_t blocknum = length>>MFSBLOCKBITS;
3347 uint32_t blockpos = length&MFSCHUNKBLOCKMASK;
3348 uint32_t blocksize = length&MFSBLOCKMASK;
3349 if (ftruncate(c->fd,CHUNKHDRSIZE+length)<0) {
3350 hdd_error_occured(c); // uses and preserves errno !!!
3351 mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - ftruncate error",c->filename);
3352 hdd_io_end(c);
3353 hdd_chunk_release(c);
3354 return ERROR_IO;
3355 }
3356 if (blocksize>0) {
3357 if (ftruncate(c->fd,CHUNKHDRSIZE+(blocks<<MFSBLOCKBITS))<0) {
3358 hdd_error_occured(c); // uses and preserves errno !!!
3359 mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - ftruncate error",c->filename);
3360 hdd_io_end(c);
3361 hdd_chunk_release(c);
3362 return ERROR_IO;
3363 }
3364 #ifdef PRESERVE_BLOCK
3365 if (c->blockno>=blocks) {
3366 c->blockno = 0xFFFF; // invalidate truncated block
3367 }
3368 if (c->blockno!=(blockpos>>MFSBLOCKBITS)) {
3369
3370 #ifdef USE_PIO
3371 if (pread(c->fd,c->block,blocksize,CHUNKHDRSIZE+blockpos)!=(signed)blocksize) {
3372 #else /* USE_PIO */
3373 lseek(c->fd,CHUNKHDRSIZE+blockpos,SEEK_SET);
3374 if (read(c->fd,c->block,blocksize)!=(signed)blocksize) {
3375 #endif /* USE_PIO */
3376 #else /* PRESERVE_BLOCK */
3377 #ifdef USE_PIO
3378 if (pread(c->fd,blockbuffer,blocksize,CHUNKHDRSIZE+blockpos)!=(signed)blocksize) {
3379 #else /* USE_PIO */
3380 lseek(c->fd,CHUNKHDRSIZE+blockpos,SEEK_SET);
3381 if (read(c->fd,blockbuffer,blocksize)!=(signed)blocksize) {
3382 #endif /* USE_PIO */
3383 #endif /* PRESERVE_BLOCK */
3384 hdd_error_occured(c); // uses and preserves errno !!!
3385 mfs_arg_errlog_silent(LOG_WARNING,"truncate_chunk: file:%s - read error",c->filename);
3386 hdd_io_end(c);
3387 hdd_chunk_release(c);
3388 return ERROR_IO;
3389 }
3390 hdd_stats_read(blocksize);
3391 #ifdef PRESERVE_BLOCK
3392 }
3393 memset(c->block+blocksize,0,MFSBLOCKSIZE-blocksize);
3394 c->blockno = blockpos>>MFSBLOCKBITS;
3395 i = mycrc32_zeroexpanded(0,c->block,blocksize,MFSBLOCKSIZE-blocksize);
3396 #else /* PRESERVE_BLOCK */
3397 i = mycrc32_zeroexpanded(0,blockbuffer,blocksize,MFSBLOCKSIZE-blocksize);
3398 #endif /* PRESERVE_BLOCK */
3399 ptr = (c->crc)+(4*blocknum);
3400 put32bit(&ptr,i);
3401 c->crcchanged = 1;
3402 }
3403 }
3404 if (c->blocks != blocks) {
3405 zassert(pthread_mutex_lock(&folderlock));
3406 c->owner->needrefresh = 1;
3407 zassert(pthread_mutex_unlock(&folderlock));
3408 }
3409 c->blocks = blocks;
3410 status = hdd_io_end(c);
3411 if (status!=STATUS_OK) {
3412 hdd_error_occured(c); // uses and preserves errno !!!
3413 }
3414 hdd_chunk_release(c);
3415 return status;
3416 }
3417
3418 static int hdd_int_duptrunc(uint64_t chunkid,uint32_t version,uint32_t newversion,uint64_t copychunkid,uint32_t copyversion,uint32_t length) {
3419 folder *f;
3420 uint32_t filenameleng;
3421 char *newfilename;
3422 uint8_t *ptr,vbuff[4];
3423 uint16_t block;
3424 uint16_t blocks;
3425 int32_t retsize;
3426 uint32_t crc;
3427 int status;
3428 chunk *c,*oc;
3429 #ifdef PRESERVE_BLOCK
3430 uint8_t hdrbuffer[CHUNKHDRSIZE];
3431 #else /* PRESERVE_BLOCK */
3432 uint8_t *blockbuffer,*hdrbuffer;
3433 blockbuffer = pthread_getspecific(blockbufferkey);
3434 if (blockbuffer==NULL) {
3435 # ifdef MMAP_ALLOC
3436 blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
3437 # else
3438 blockbuffer = malloc(MFSBLOCKSIZE);
3439 # endif
3440 passert(blockbuffer);
3441 zassert(pthread_setspecific(blockbufferkey,blockbuffer));
3442 }
3443 hdrbuffer = pthread_getspecific(hdrbufferkey);
3444 if (hdrbuffer==NULL) {
3445 hdrbuffer = malloc(CHUNKHDRSIZE);
3446 passert(hdrbuffer);
3447 zassert(pthread_setspecific(hdrbufferkey,hdrbuffer));
3448 }
3449 #endif /* PRESERVE_BLOCK */
3450
3451 if (length>MFSCHUNKSIZE) {
3452 return ERROR_WRONGSIZE;
3453 }
3454 oc = hdd_chunk_find(chunkid);
3455 if (oc==NULL) {
3456 return ERROR_NOCHUNK;
3457 }
3458 if (oc->version!=version && version>0) {
3459 hdd_chunk_release(oc);
3460 return ERROR_WRONGVERSION;
3461 }
3462 if (copyversion==0) {
3463 copyversion = newversion;
3464 }
3465 zassert(pthread_mutex_lock(&folderlock));
3466 f = hdd_getfolder();
3467 if (f==NULL) {
3468 zassert(pthread_mutex_unlock(&folderlock));
3469 hdd_chunk_release(oc);
3470 return ERROR_NOSPACE;
3471 }
3472 c = hdd_chunk_create(f,copychunkid,copyversion);
3473 zassert(pthread_mutex_unlock(&folderlock));
3474 if (c==NULL) {
3475 hdd_chunk_release(oc);
3476 return ERROR_CHUNKEXIST;
3477 }
3478
3479 if (newversion!=version) {
3480 filenameleng = strlen(oc->filename);
3481 if (oc->filename[filenameleng-13]=='_') { // new file name format
3482 newfilename = malloc(filenameleng+1);
3483 passert(newfilename);
3484 memcpy(newfilename,c->filename,filenameleng+1);
3485 sprintf(newfilename+filenameleng-12,"%08"PRIX32".mfs",newversion);
3486 if (rename(oc->filename,newfilename)<0) {
3487 hdd_error_occured(oc); // uses and preserves errno !!!
3488 mfs_arg_errlog_silent(LOG_WARNING,"duplicate_chunk: file:%s - rename error",oc->filename);
3489 free(newfilename);
3490 hdd_chunk_delete(c);
3491 hdd_chunk_release(oc);
3492 return ERROR_IO;
3493 }
3494 free(oc->filename);
3495 oc->filename = newfilename;
3496 }
3497 status = hdd_io_begin(oc,0);
3498 if (status!=STATUS_OK) {
3499 hdd_error_occured(oc); // uses and preserves errno !!!
3500 hdd_chunk_delete(c);
3501 hdd_chunk_release(oc);
3502 return status; //can't change file version
3503 }
3504 ptr = vbuff;
3505 put32bit(&ptr,newversion);
3506 #ifdef USE_PIO
3507 if (pwrite(oc->fd,vbuff,4,16)!=4) {
3508 #else /* USE_PIO */
3509 lseek(oc->fd,16,SEEK_SET);
3510 if (write(oc->fd,vbuff,4)!=4) {
3511 #endif /* USE_PIO */
3512 hdd_error_occured(oc); // uses and preserves errno !!!
3513 mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - write error",c->filename);
3514 hdd_chunk_delete(c);
3515 hdd_io_end(oc);
3516 hdd_chunk_release(oc);
3517 return ERROR_IO;
3518 }
3519 hdd_stats_write(4);
3520 oc->version = newversion;
3521 } else {
3522 status = hdd_io_begin(oc,0);
3523 if (status!=STATUS_OK) {
3524 hdd_error_occured(oc); // uses and preserves errno !!!
3525 hdd_chunk_delete(c);
3526 hdd_report_damaged_chunk(chunkid);
3527 hdd_chunk_release(oc);
3528 return status;
3529 }
3530 }
3531 status = hdd_io_begin(c,1);
3532 if (status!=STATUS_OK) {
3533 hdd_error_occured(c); // uses and preserves errno !!!
3534 hdd_chunk_delete(c);
3535 hdd_io_end(oc);
3536 hdd_chunk_release(oc);
3537 return status;
3538 }
3539 blocks = ((length+MFSBLOCKMASK)>>MFSBLOCKBITS);
3540 memset(hdrbuffer,0,CHUNKHDRSIZE);
3541 memcpy(hdrbuffer,MFSSIGNATURE "C 1.0",8);
3542 ptr = hdrbuffer+8;
3543 put64bit(&ptr,copychunkid);
3544 put32bit(&ptr,copyversion);
3545 memcpy(hdrbuffer+1024,oc->crc,4096);
3546 // do not write header yet - only seek to apriopriate position
3547 lseek(c->fd,CHUNKHDRSIZE,SEEK_SET);
3548 #ifndef PRESERVE_BLOCK
3549 lseek(oc->fd,CHUNKHDRSIZE,SEEK_SET);
3550 #endif /* PRESERVE_BLOCK */
3551 if (blocks>oc->blocks) { // expanding
3552 for (block=0 ; block<oc->blocks ; block++) {
3553 #ifdef PRESERVE_BLOCK
3554 if (oc->blockno==block) {
3555 memcpy(c->block,oc->block,MFSBLOCKSIZE);
3556 retsize = MFSBLOCKSIZE;
3557 } else {
3558 #ifdef USE_PIO
3559 retsize = pread(oc->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS));
3560 #else /* USE_PIO */
3561 lseek(oc->fd,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS),SEEK_SET);
3562 retsize = read(oc->fd,c->block,MFSBLOCKSIZE);
3563 #endif /* USE_PIO */
3564 }
3565 #else /* PRESERVE_BLOCK */
3566 retsize = read(oc->fd,blockbuffer,MFSBLOCKSIZE);
3567 #endif /* PRESERVE_BLOCK */
3568 if (retsize!=MFSBLOCKSIZE) {
3569 hdd_error_occured(oc); // uses and preserves errno !!!
3570 mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data read error",oc->filename);
3571 hdd_io_end(c);
3572 unlink(c->filename);
3573 hdd_chunk_delete(c);
3574 hdd_io_end(oc);
3575 hdd_report_damaged_chunk(chunkid);
3576 hdd_chunk_release(oc);
3577 return ERROR_IO;
3578 }
3579 #ifdef PRESERVE_BLOCK
3580 if (oc->blockno!=block) {
3581 hdd_stats_read(MFSBLOCKSIZE);
3582 }
3583 retsize = write(c->fd,c->block,MFSBLOCKSIZE);
3584 #else /* PRESERVE_BLOCK */
3585 hdd_stats_read(MFSBLOCKSIZE);
3586 retsize = write(c->fd,blockbuffer,MFSBLOCKSIZE);
3587 #endif /* PRESERVE_BLOCK */
3588 if (retsize!=MFSBLOCKSIZE) {
3589 hdd_error_occured(c); // uses and preserves errno !!!
3590 mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data write error",c->filename);
3591 hdd_io_end(c);
3592 unlink(c->filename);
3593 hdd_chunk_delete(c);
3594 hdd_io_end(oc);
3595 hdd_chunk_release(oc);
3596 return ERROR_IO;
3597 }
3598 hdd_stats_write(MFSBLOCKSIZE);
3599 #ifdef PRESERVE_BLOCK
3600 c->blockno = block;
3601 #endif /* PRESERVE_BLOCK */
3602 }
3603 if (ftruncate(c->fd,CHUNKHDRSIZE+(((uint32_t)blocks)<<MFSBLOCKBITS))<0) {
3604 hdd_error_occured(c); // uses and preserves errno !!!
3605 mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - ftruncate error",c->filename);
3606 hdd_io_end(c);
3607 unlink(c->filename);
3608 hdd_chunk_delete(c);
3609 hdd_io_end(oc);
3610 hdd_chunk_release(oc);
3611 return ERROR_IO; //write error
3612 }
3613 ptr = hdrbuffer+CHUNKHDRCRC+4*(oc->blocks);
3614 for (block=oc->blocks ; block<blocks ; block++) {
3615 put32bit(&ptr,emptyblockcrc);
3616 }
3617 } else { // shrinking
3618 uint32_t blocksize = (length&MFSBLOCKMASK);
3619 if (blocksize==0) { // aligned shring
3620 for (block=0 ; block<blocks ; block++) {
3621 #ifdef PRESERVE_BLOCK
3622 if (oc->blockno==block) {
3623 memcpy(c->block,oc->block,MFSBLOCKSIZE);
3624 retsize = MFSBLOCKSIZE;
3625 } else {
3626 #ifdef USE_PIO
3627 retsize = pread(oc->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS));
3628 #else /* USE_PIO */
3629 lseek(oc->fd,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS),SEEK_SET);
3630 retsize = read(oc->fd,c->block,MFSBLOCKSIZE);
3631 #endif /* USE_PIO */
3632 }
3633 #else /* PRESERVE_BLOCK */
3634 retsize = read(oc->fd,blockbuffer,MFSBLOCKSIZE);
3635 #endif /* PRESERVE_BLOCK */
3636 if (retsize!=MFSBLOCKSIZE) {
3637 hdd_error_occured(oc); // uses and preserves errno !!!
3638 mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data read error",oc->filename);
3639 hdd_io_end(c);
3640 unlink(c->filename);
3641 hdd_chunk_delete(c);
3642 hdd_io_end(oc);
3643 hdd_report_damaged_chunk(chunkid);
3644 hdd_chunk_release(oc);
3645 return ERROR_IO;
3646 }
3647 #ifdef PRESERVE_BLOCK
3648 if (oc->blockno!=block) {
3649 hdd_stats_read(MFSBLOCKSIZE);
3650 }
3651 retsize = write(c->fd,c->block,MFSBLOCKSIZE);
3652 #else /* PRESERVE_BLOCK */
3653 hdd_stats_read(MFSBLOCKSIZE);
3654 retsize = write(c->fd,blockbuffer,MFSBLOCKSIZE);
3655 #endif /* PRESERVE_BLOCK */
3656 if (retsize!=MFSBLOCKSIZE) {
3657 hdd_error_occured(c); // uses and preserves errno !!!
3658 mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data write error",c->filename);
3659 hdd_io_end(c);
3660 unlink(c->filename);
3661 hdd_chunk_delete(c);
3662 hdd_io_end(oc);
3663 hdd_chunk_release(oc);
3664 return ERROR_IO;
3665 }
3666 hdd_stats_write(MFSBLOCKSIZE);
3667 #ifdef PRESERVE_BLOCK
3668 c->blockno = block;
3669 #endif /* PRESERVE_BLOCK */
3670 }
3671 } else { // misaligned shrink
3672 for (block=0 ; block<blocks-1 ; block++) {
3673 #ifdef PRESERVE_BLOCK
3674 if (oc->blockno==block) {
3675 memcpy(c->block,oc->block,MFSBLOCKSIZE);
3676 retsize = MFSBLOCKSIZE;
3677 } else {
3678 #ifdef USE_PIO
3679 retsize = pread(oc->fd,c->block,MFSBLOCKSIZE,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS));
3680 #else /* USE_PIO */
3681 lseek(oc->fd,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS),SEEK_SET);
3682 retsize = read(oc->fd,c->block,MFSBLOCKSIZE);
3683 #endif /* USE_PIO */
3684 }
3685 #else /* PRESERVE_BLOCK */
3686 retsize = read(oc->fd,blockbuffer,MFSBLOCKSIZE);
3687 #endif /* PRESERVE_BLOCK */
3688 if (retsize!=MFSBLOCKSIZE) {
3689 hdd_error_occured(oc); // uses and preserves errno !!!
3690 mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data read error",oc->filename);
3691 hdd_io_end(c);
3692 unlink(c->filename);
3693 hdd_chunk_delete(c);
3694 hdd_io_end(oc);
3695 hdd_report_damaged_chunk(chunkid);
3696 hdd_chunk_release(oc);
3697 return ERROR_IO;
3698 }
3699 #ifdef PRESERVE_BLOCK
3700 if (oc->blockno!=block) {
3701 hdd_stats_read(MFSBLOCKSIZE);
3702 }
3703 retsize = write(c->fd,c->block,MFSBLOCKSIZE);
3704 #else /* PRESERVE_BLOCK */
3705 hdd_stats_read(MFSBLOCKSIZE);
3706 retsize = write(c->fd,blockbuffer,MFSBLOCKSIZE);
3707 #endif /* PRESERVE_BLOCK */
3708 if (retsize!=MFSBLOCKSIZE) {
3709 hdd_error_occured(c); // uses and preserves errno !!!
3710 mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data write error",c->filename);
3711 hdd_io_end(c);
3712 unlink(c->filename);
3713 hdd_chunk_delete(c);
3714 hdd_io_end(oc);
3715 hdd_chunk_release(oc);
3716 return ERROR_IO; //write error
3717 }
3718 hdd_stats_write(MFSBLOCKSIZE);
3719 }
3720 block = blocks-1;
3721 #ifdef PRESERVE_BLOCK
3722 if (oc->blockno==block) {
3723 memcpy(c->block,oc->block,blocksize);
3724 retsize = blocksize;
3725 } else {
3726 #ifdef USE_PIO
3727 retsize = pread(oc->fd,c->block,blocksize,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS));
3728 #else /* USE_PIO */
3729 lseek(oc->fd,CHUNKHDRSIZE+(((uint32_t)block)<<MFSBLOCKBITS),SEEK_SET);
3730 retsize = read(oc->fd,c->block,blocksize);
3731 #endif /* USE_PIO */
3732 }
3733 #else /* PRESERVE_BLOCK */
3734 retsize = read(oc->fd,blockbuffer,blocksize);
3735 #endif /* PRESERVE_BLOCK */
3736 if (retsize!=(signed)blocksize) {
3737 hdd_error_occured(oc); // uses and preserves errno !!!
3738 mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data read error",oc->filename);
3739 hdd_io_end(c);
3740 unlink(c->filename);
3741 hdd_chunk_delete(c);
3742 hdd_io_end(oc);
3743 hdd_report_damaged_chunk(chunkid);
3744 hdd_chunk_release(oc);
3745 return ERROR_IO;
3746 }
3747 #ifdef PRESERVE_BLOCK
3748 if (oc->blockno!=block) {
3749 hdd_stats_read(blocksize);
3750 }
3751 memset(c->block+blocksize,0,MFSBLOCKSIZE-blocksize);
3752 retsize = write(c->fd,c->block,MFSBLOCKSIZE);
3753 #else /* PRESERVE_BLOCK */
3754 hdd_stats_read(blocksize);
3755 memset(blockbuffer+blocksize,0,MFSBLOCKSIZE-blocksize);
3756 retsize = write(c->fd,blockbuffer,MFSBLOCKSIZE);
3757 #endif /* PRESERVE_BLOCK */
3758 if (retsize!=MFSBLOCKSIZE) {
3759 hdd_error_occured(c); // uses and preserves errno !!!
3760 mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - data write error",c->filename);
3761 hdd_io_end(c);
3762 unlink(c->filename);
3763 hdd_chunk_delete(c);
3764 hdd_io_end(oc);
3765 hdd_chunk_release(oc);
3766 return ERROR_IO;
3767 }
3768 hdd_stats_write(MFSBLOCKSIZE);
3769 ptr = hdrbuffer+CHUNKHDRCRC+4*(blocks-1);
3770 #ifdef PRESERVE_BLOCK
3771 crc = mycrc32_zeroexpanded(0,c->block,blocksize,MFSBLOCKSIZE-blocksize);
3772 #else /* PRESERVE_BLOCK */
3773 crc = mycrc32_zeroexpanded(0,blockbuffer,blocksize,MFSBLOCKSIZE-blocksize);
3774 #endif /* PRESERVE_BLOCK */
3775 put32bit(&ptr,crc);
3776 #ifdef PRESERVE_BLOCK
3777 c->blockno = block;
3778 #endif /* PRESERVE_BLOCK */
3779 }
3780 }
3781 // and now write header
3782 memcpy(c->crc,hdrbuffer+1024,4096);
3783 lseek(c->fd,0,SEEK_SET);
3784 if (write(c->fd,hdrbuffer,CHUNKHDRSIZE)!=CHUNKHDRSIZE) {
3785 hdd_error_occured(c); // uses and preserves errno !!!
3786 mfs_arg_errlog_silent(LOG_WARNING,"duptrunc_chunk: file:%s - hdr write error",c->filename);
3787 hdd_io_end(c);
3788 unlink(c->filename);
3789 hdd_chunk_delete(c);
3790 hdd_io_end(oc);
3791 hdd_chunk_release(oc);
3792 return ERROR_IO;
3793 }
3794 hdd_stats_write(CHUNKHDRSIZE);
3795 status = hdd_io_end(oc);
3796 if (status!=STATUS_OK) {
3797 hdd_error_occured(oc); // uses and preserves errno !!!
3798 hdd_io_end(c);
3799 unlink(c->filename);
3800 hdd_chunk_delete(c);
3801 hdd_report_damaged_chunk(chunkid);
3802 hdd_chunk_release(oc);
3803 return status;
3804 }
3805 status = hdd_io_end(c);
3806 if (status!=STATUS_OK) {
3807 hdd_error_occured(c); // uses and preserves errno !!!
3808 unlink(c->filename);
3809 hdd_chunk_delete(c);
3810 hdd_chunk_release(oc);
3811 return status;
3812 }
3813 c->blocks = blocks;
3814 zassert(pthread_mutex_lock(&folderlock));
3815 c->owner->needrefresh = 1;
3816 zassert(pthread_mutex_unlock(&folderlock));
3817 hdd_chunk_release(c);
3818 hdd_chunk_release(oc);
3819 return STATUS_OK;
3820 }
3821
3822 static int hdd_int_delete(uint64_t chunkid,uint32_t version) {
3823 chunk *c;
3824 c = hdd_chunk_find(chunkid);
3825 if (c==NULL) {
3826 return ERROR_NOCHUNK;
3827 }
3828 if (c->version!=version && version>0) {
3829 hdd_chunk_release(c);
3830 return ERROR_WRONGVERSION;
3831 }
3832 if (unlink(c->filename)<0) {
3833 if (errno!=ENOENT) {
3834 hdd_error_occured(c); // uses and preserves errno !!!
3835 mfs_arg_errlog_silent(LOG_WARNING,"delete_chunk: file:%s - unlink error",c->filename);
3836 hdd_chunk_release(c);
3837 return ERROR_IO;
3838 } else {
3839 mfs_arg_errlog_silent(LOG_WARNING,"delete_chunk: file:%s - chunk already deleted !!!",c->filename);
3840 }
3841 } else {
3842 zassert(pthread_mutex_lock(&folderlock));
3843 c->owner->needrefresh = 1;
3844 zassert(pthread_mutex_unlock(&folderlock));
3845 }
3846 hdd_chunk_delete(c);
3847 return STATUS_OK;
3848 }
3849
3850 /* all chunk operations in one call */
3851 // newversion>0 && length==0xFFFFFFFF && copychunkid==0 -> change version
3852 // newversion>0 && length==0xFFFFFFFF && copycnunkid>0 -> duplicate
3853 // newversion>0 && length<=MFSCHUNKSIZE && copychunkid==0 -> truncate
3854 // newversion>0 && length<=MFSCHUNKSIZE && copychunkid>0 -> duplicate and truncate
3855 // newversion==0 && length==0 -> delete
3856 // newversion==0 && length==1 -> create
3857 // newversion==0 && length==2 -> check chunk contents
3858 int hdd_chunkop(uint64_t chunkid,uint32_t version,uint32_t newversion,uint64_t copychunkid,uint32_t copyversion,uint32_t length) {
3859 zassert(pthread_mutex_lock(&statslock));
3860 if (newversion>0) {
3861 if (length==0xFFFFFFFF) {
3862 if (copychunkid==0) {
3863 stats_version++;
3864 } else {
3865 stats_duplicate++;
3866 }
3867 } else if (length<=MFSCHUNKSIZE) {
3868 if (copychunkid==0) {
3869 stats_truncate++;
3870 } else {
3871 stats_duptrunc++;
3872 }
3873 }
3874 } else {
3875 if (length==0) {
3876 stats_delete++;
3877 } else if (length==1) {
3878 stats_create++;
3879 } else if (length==2) {
3880 stats_test++;
3881 }
3882 }
3883 zassert(pthread_mutex_unlock(&statslock));
3884 if (newversion>0) {
3885 if (length==0xFFFFFFFF) {
3886 if (copychunkid==0) {
3887 return hdd_int_version(chunkid,version,newversion);
3888 } else {
3889 return hdd_int_duplicate(chunkid,version,newversion,copychunkid,copyversion);
3890 }
3891 } else if (length<=MFSCHUNKSIZE) {
3892 if (copychunkid==0) {
3893 return hdd_int_truncate(chunkid,version,newversion,length);
3894 } else {
3895 return hdd_int_duptrunc(chunkid,version,newversion,copychunkid,copyversion,length);
3896 }
3897 } else {
3898 return ERROR_EINVAL;
3899 }
3900 } else {
3901 if (length==0) {
3902 return hdd_int_delete(chunkid,version);
3903 } else if (length==1) {
3904 return hdd_int_create(chunkid,version);
3905 } else if (length==2) {
3906 return hdd_int_test(chunkid,version);
3907 } else {
3908 return ERROR_EINVAL;
3909 }
3910 }
3911 }
3912
3913 chunk* hdd_random_chunk(folder *f) {
3914 uint32_t try;
3915 uint32_t pos;
3916 chunk *c;
3917 zassert(pthread_mutex_lock(&folderlock));
3918 zassert(pthread_mutex_lock(&hashlock));
3919 if (f->chunkcount>0) {
3920 for (try=0 ; try<RANDOM_CHUNK_RETRIES ; try++) {
3921 pos = rndu32_ranged(f->chunkcount);
3922 if (f->chunktab[pos]->state==CH_AVAIL) {
3923 c = f->chunktab[pos];
3924 c->state = CH_LOCKED;
3925 zassert(pthread_mutex_unlock(&hashlock));
3926 zassert(pthread_mutex_unlock(&folderlock));
3927 if (c->validattr==0) {
3928 if (hdd_chunk_getattr(c)) {
3929 hdd_report_damaged_chunk(c->chunkid);
3930 unlink(c->filename);
3931 hdd_chunk_delete(c);
3932 } else {
3933 return c;
3934 }
3935 } else {
3936 return c;
3937 }
3938 zassert(pthread_mutex_lock(&folderlock));
3939 zassert(pthread_mutex_lock(&hashlock));
3940 }
3941 }
3942 }
3943 zassert(pthread_mutex_unlock(&hashlock));
3944 zassert(pthread_mutex_unlock(&folderlock));
3945 return NULL;
3946 }
3947
3948 int hdd_int_move(folder *fsrc,folder *fdst) {
3949 uint8_t *wptr;
3950 const uint8_t *rptr;
3951 uint16_t block;
3952 uint32_t bcrc;
3953 int32_t retsize;
3954 int status;
3955 int error;
3956 char *tmp_filename;
3957 char *new_filename;
3958 uint32_t leng;
3959 int new_fd;
3960 chunk *c;
3961 uint64_t ts,te;
3962 #ifdef PRESERVE_BLOCK
3963 uint8_t hdrbuffer[CHUNKHDRSIZE];
3964 #else /* PRESERVE_BLOCK */
3965 uint8_t *blockbuffer,*hdrbuffer;
3966 blockbuffer = pthread_getspecific(blockbufferkey);
3967 if (blockbuffer==NULL) {
3968 # ifdef MMAP_ALLOC
3969 blockbuffer = mmap(NULL,MFSBLOCKSIZE,PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,-1,0);
3970 # else
3971 blockbuffer = malloc(MFSBLOCKSIZE);
3972 # endif
3973 passert(blockbuffer);
3974 zassert(pthread_setspecific(blockbufferkey,blockbuffer));
3975 }
3976 hdrbuffer = pthread_getspecific(hdrbufferkey);
3977 if (hdrbuffer==NULL) {
3978 hdrbuffer = malloc(CHUNKHDRSIZE);
3979 passert(hdrbuffer);
3980 zassert(pthread_setspecific(hdrbufferkey,hdrbuffer));
3981 }
3982 #endif /* PRESERVE_BLOCK */
3983
3984 c = hdd_random_chunk(fsrc);
3985 if (c==NULL) {
3986 syslog(LOG_NOTICE,"move chunk %s -> %s (can't find valid chunk to move)",fsrc->path,fdst->path);
3987 return ERROR_NOCHUNK;
3988 }
3989 syslog(LOG_NOTICE,"move chunk %s -> %s (chunk: %016"PRIX64"_%08"PRIX32")",fsrc->path,fdst->path,c->chunkid,c->version);
3990 status = hdd_io_begin(c,0);
3991 if (status!=STATUS_OK) {
3992 hdd_error_occured(c);
3993 hdd_report_damaged_chunk(c->chunkid);
3994 hdd_chunk_release(c);
3995 return status;
3996 }
3997
3998 /* create tmp file name */
3999 leng = strlen(fdst->path);
4000 tmp_filename = malloc(leng+7);
4001 passert(tmp_filename);
4002 memcpy(tmp_filename,fdst->path,leng);
4003 memcpy(tmp_filename+leng,"reptmp",7);
4004
4005 /* create new file */
4006 new_fd = open(tmp_filename,O_RDWR | O_TRUNC | O_CREAT,0666);
4007 if (new_fd<0) {
4008 mfs_arg_errlog_silent(LOG_WARNING,"move_chunk: file:%s - hdr open error",tmp_filename);
4009 hdd_io_end(c);
4010 hdd_chunk_release(c);
4011 free(tmp_filename);
4012 return ERROR_IO;
4013 }
4014
4015 memset(hdrbuffer,0,CHUNKHDRSIZE);
4016 memcpy(hdrbuffer,MFSSIGNATURE "C 1.0",8);
4017 wptr = hdrbuffer+8;
4018 put64bit(&wptr,c->chunkid);
4019 put32bit(&wptr,c->version);
4020 memcpy(hdrbuffer+1024,c->crc,4096);
4021 if (write(new_fd,hdrbuffer,CHUNKHDRSIZE)!=CHUNKHDRSIZE) {
4022 mfs_arg_errlog_silent(LOG_WARNING,"move_chunk: file:%s - hdr write error",tmp_filename);
4023 close(new_fd);
4024 unlink(tmp_filename);
4025 hdd_io_end(c);
4026 hdd_chunk_release(c);
4027 free(tmp_filename);
4028 return ERROR_IO;
4029 }
4030 hdd_stats_write(CHUNKHDRSIZE);
4031 lseek(c->fd,CHUNKHDRSIZE,SEEK_SET);
4032 rptr = c->crc;
4033 for (block=0 ; block<c->blocks ; block++) {
4034 ts = monotonic_nseconds();
4035 #ifdef PRESERVE_BLOCK
4036 retsize = read(c->fd,c->block,MFSBLOCKSIZE);
4037 #else /* PRESERVE_BLOCK */
4038 retsize = read(c->fd,blockbuffer,MFSBLOCKSIZE);
4039 #endif /* PRESERVE_BLOCK */
4040 error = errno;
4041 te = monotonic_nseconds();
4042 hdd_stats_dataread(fsrc,MFSBLOCKSIZE,te-ts);
4043 if (retsize!=MFSBLOCKSIZE) {
4044 errno = error;
4045 hdd_error_occured(c); // uses and preserves errno !!!
4046 mfs_arg_errlog_silent(LOG_WARNING,"move_chunk: file:%s - data read error",c->filename);
4047 close(new_fd);
4048 unlink(tmp_filename);
4049 hdd_io_end(c);
4050 hdd_report_damaged_chunk(c->chunkid);
4051 hdd_chunk_release(c);
4052 free(tmp_filename);
4053 return ERROR_IO;
4054 }
4055 hdd_stats_read(MFSBLOCKSIZE);
4056 #ifdef PRESERVE_BLOCK
4057 c->blockno = block;
4058 #endif
4059 bcrc = get32bit(&rptr);
4060 #ifdef PRESERVE_BLOCK
4061 if (bcrc!=mycrc32(0,c->block,MFSBLOCKSIZE)) {
4062 #else /* PRESERVE_BLOCK */
4063 if (bcrc!=mycrc32(0,blockbuffer,MFSBLOCKSIZE)) {
4064 #endif /* PRESERVE_BLOCK */
4065 errno = 0; // set anything to errno
4066 hdd_error_occured(c); // uses and preserves errno !!!
4067 syslog(LOG_WARNING,"move_chunk: file:%s - crc error",c->filename);
4068 close(new_fd);
4069 unlink(tmp_filename);
4070 hdd_io_end(c);
4071 hdd_chunk_release(c);
4072 free(tmp_filename);
4073 return ERROR_CRC;
4074 }
4075 ts = monotonic_nseconds();
4076 #ifdef PRESERVE_BLOCK
4077 retsize = write(new_fd,c->block,MFSBLOCKSIZE);
4078 #else /* PRESERVE_BLOCK */
4079 retsize = write(new_fd,blockbuffer,MFSBLOCKSIZE);
4080 #endif /* PRESERVE_BLOCK */
4081 te = monotonic_nseconds();
4082 hdd_stats_datawrite(fdst,MFSBLOCKSIZE,te-ts);
4083 if (retsize!=MFSBLOCKSIZE) {
4084 mfs_arg_errlog_silent(LOG_WARNING,"move_chunk: file:%s - data write error",tmp_filename);
4085 close(new_fd);
4086 unlink(tmp_filename);
4087 hdd_io_end(c);
4088 hdd_chunk_release(c);
4089 free(tmp_filename);
4090 return ERROR_IO; //write error
4091 }
4092 hdd_stats_write(MFSBLOCKSIZE);
4093 }
4094 status = hdd_io_end(c);
4095 if (status!=STATUS_OK) {
4096 hdd_error_occured(c); // uses and preserves errno !!!
4097 close(new_fd);
4098 unlink(tmp_filename);
4099 hdd_report_damaged_chunk(c->chunkid);
4100 hdd_chunk_release(c);
4101 free(tmp_filename);
4102 return status;
4103 }
4104
4105 /* create new file name */
4106 new_filename = malloc(leng+39);
4107 passert(new_filename);
4108 memcpy(new_filename,fdst->path,leng);
4109 sprintf(new_filename+leng,"%02X/chunk_%016"PRIX64"_%08"PRIX32".mfs",(unsigned int)(c->chunkid&255),c->chunkid,c->version);
4110
4111 if (rename(tmp_filename,new_filename)<0) {
4112 mfs_arg_errlog_silent(LOG_WARNING,"move_chunk: file:%s->%s - rename error",tmp_filename,new_filename);
4113 close(new_fd);
4114 unlink(tmp_filename);
4115 hdd_chunk_release(c);
4116 free(tmp_filename);
4117 free(new_filename);
4118 return ERROR_IO;
4119 }
4120
4121 if (c->fd>=0) {
4122 close(c->fd);
4123 c->fd = new_fd;
4124 } else {
4125 close(new_fd);
4126 }
4127
4128 unlink(c->filename);
4129 free(c->filename);
4130 free(tmp_filename);
4131 c->filename = new_filename;
4132 zassert(pthread_mutex_lock(&folderlock));
4133 fsrc->needrefresh = 1;
4134 fdst->needrefresh = 1;
4135 hdd_remove_chunk_from_folder(c,fsrc);
4136 hdd_add_chunk_to_folder(c,fdst);
4137 zassert(pthread_mutex_unlock(&folderlock));
4138 zassert(pthread_mutex_lock(&testlock));
4139 hdd_remove_chunk_from_test_chain(c,fsrc);
4140 hdd_add_chunk_to_test_chain(c,fdst);
4141 zassert(pthread_mutex_unlock(&testlock));
4142 hdd_chunk_release(c);
4143 return STATUS_OK;
4144 }
4145
4146 void* hdd_rebalance_thread(void *arg) {
4147 folder *f,*fdst,*fsrc;
4148 double aboveminerr,belowminerr,err,expdist;
4149 double usage;
4150 double avgusage;
4151 double rebalancediff;
4152 uint32_t avgcount;
4153 uint32_t belowcnt;
4154 uint32_t abovecnt;
4155 uint64_t belowsum;
4156 uint64_t abovesum;
4157 uint8_t changed;
4158 uint8_t rebalance_servers;
4159 uint8_t rebalance_is_on;
4160 double rebalance_finished;
4161 double monotonic_time;
4162 uint32_t perc;
4163 uint64_t st,en;
4164
4165 rebalance_is_on = 0;
4166 rebalance_finished = 0;
4167 for (;;) {
4168 zassert(pthread_mutex_lock(&testlock));
4169 perc = HDDRebalancePerc;
4170 zassert(pthread_mutex_unlock(&testlock));
4171 zassert(pthread_mutex_lock(&termlock));
4172 if (term) {
4173 zassert(pthread_mutex_unlock(&termlock));
4174 return arg;
4175 }
4176 zassert(pthread_mutex_unlock(&termlock));
4177
4178 monotonic_time = monotonic_seconds();
4179 zassert(pthread_mutex_lock(&folderlock));
4180 if (folderactions==0 || (rebalance_finished + 60.0) > monotonic_time || perc==0) {
4181 zassert(pthread_mutex_unlock(&folderlock));
4182 sleep(1);
4183 continue;
4184 }
4185 // check REBALANCE_FORCE_SRC and REBALANCE_FORCE_DST
4186 abovecnt = 0;
4187 belowcnt = 0;
4188 avgcount = 0;
4189 changed = 0;
4190 for (f=folderhead ; f ; f=f->next) {
4191 if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>0) {
4192 // if (f->needrefresh || (f->lastrefresh<monotonic_time && rebalance_is_on)) {
4193 if (f->needrefresh || rebalance_is_on) {
4194 hdd_refresh_usage(f);
4195 f->needrefresh = 0;
4196 f->lastrefresh = monotonic_time;
4197 changed = 1;
4198 }
4199 if (f->balancemode==REBALANCE_FORCE_SRC) {
4200 abovecnt++;
4201 } else if (f->balancemode==REBALANCE_FORCE_DST) {
4202 belowcnt++;
4203 } else {
4204 avgcount++;
4205 }
4206 }
4207 f->tmpbalancemode = REBALANCE_NONE;
4208 }
4209 rebalance_servers = 0;
4210 if ((abovecnt>0 && (belowcnt+avgcount)>0) || (belowcnt>0 && (abovecnt+avgcount)>0)) { // force data movement
4211 for (f=folderhead ; f ; f=f->next) {
4212 if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>0) {
4213 usage = f->total-f->avail;
4214 usage /= f->total;
4215 if (abovecnt==0) {
4216 if (f->balancemode==REBALANCE_FORCE_DST && usage<REBALANCE_DST_MAX_USAGE) {
4217 f->tmpbalancemode = REBALANCE_DST;
4218 rebalance_servers |= 1;
4219 } else if (f->chunkcount>0) {
4220 f->tmpbalancemode = REBALANCE_SRC;
4221 rebalance_servers |= 2;
4222 }
4223 } else if (belowcnt==0) {
4224 if (f->balancemode==REBALANCE_FORCE_SRC && f->chunkcount>0) {
4225 f->tmpbalancemode = REBALANCE_SRC;
4226 rebalance_servers |= 2;
4227 } else if (usage<REBALANCE_DST_MAX_USAGE) {
4228 f->tmpbalancemode = REBALANCE_DST;
4229 rebalance_servers |= 1;
4230 }
4231 } else {
4232 if (f->balancemode==REBALANCE_FORCE_DST && usage<REBALANCE_DST_MAX_USAGE) {
4233 f->tmpbalancemode = REBALANCE_DST;
4234 rebalance_servers |= 1;
4235 } else if (f->balancemode==REBALANCE_FORCE_SRC && f->chunkcount>0) {
4236 f->tmpbalancemode = REBALANCE_SRC;
4237 rebalance_servers |= 2;
4238 }
4239 }
4240 }
4241 }
4242 } else { // usage rebalance
4243 rebalancediff = REBALANCE_DIFF_MAX;
4244 if (rebalance_is_on) {
4245 rebalancediff /= 2.0;
4246 }
4247 avgusage = 0.0;
4248 avgcount = 0;
4249 for (f=folderhead ; f ; f=f->next) {
4250 if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>REBALANCE_TOTAL_MIN) {
4251 usage = f->total-f->avail;
4252 usage /= f->total;
4253 avgusage += usage;
4254 avgcount++;
4255 }
4256 }
4257 if (avgcount>0) {
4258 avgusage /= avgcount;
4259 belowcnt = 0;
4260 belowsum = 0;
4261 abovecnt = 0;
4262 abovesum = 0;
4263 for (f=folderhead ; f ; f=f->next) {
4264 if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>REBALANCE_TOTAL_MIN) {
4265 usage = f->total-f->avail;
4266 usage /= f->total;
4267 if (usage < avgusage - rebalancediff) {
4268 belowcnt++;
4269 belowsum+=f->total;
4270 } else if (usage > avgusage + rebalancediff) {
4271 abovecnt++;
4272 abovesum+=f->total;
4273 }
4274 }
4275 }
4276 if (abovecnt>0 || belowcnt>0) {
4277 for (f=folderhead ; f ; f=f->next) {
4278 if (f->damaged==0 && f->toremove==0 && f->todel==0 && f->scanstate==SCST_WORKING && f->total>REBALANCE_TOTAL_MIN) {
4279 usage = f->total-f->avail;
4280 usage /= f->total;
4281 if ((((usage < avgusage - rebalancediff) && belowcnt>0) || ((usage <= avgusage + rebalancediff) && belowcnt==0)) && usage<REBALANCE_DST_MAX_USAGE) {
4282 f->tmpbalancemode = REBALANCE_DST;
4283 rebalance_servers |= 1;
4284 } else if ((((usage > avgusage + rebalancediff) && abovecnt>0) || ((usage >= avgusage - rebalancediff) && abovecnt==0)) && f->chunkcount>0) {
4285 f->tmpbalancemode = REBALANCE_SRC;
4286 rebalance_servers |= 2;
4287 }
4288 }
4289 }
4290 }
4291 }
4292 }
4293 fdst = NULL;
4294 fsrc = NULL;
4295 if (rebalance_servers==3) {
4296 belowcnt = 0;
4297 belowsum = 0;
4298 abovecnt = 0;
4299 abovesum = 0;
4300 for (f=folderhead ; f ; f=f->next) {
4301 if (f->tmpbalancemode == REBALANCE_DST) {
4302 belowcnt++;
4303 belowsum+=f->total;
4304 } else if (f->tmpbalancemode == REBALANCE_SRC) {
4305 abovecnt++;
4306 abovesum+=f->total;
4307 }
4308 }
4309 aboveminerr = 0.0;
4310 belowminerr = 0.0;
4311 for (f=folderhead ; f ; f=f->next) {
4312 if (f->tmpbalancemode == REBALANCE_DST) {
4313 f->write_dist++;
4314 if (f->write_first) {
4315 err = 1.0;
4316 } else {
4317 expdist = belowsum;
4318 expdist /= f->total;
4319 err = (expdist + f->write_corr) / f->write_dist;
4320 }
4321 if (fdst==NULL || err<belowminerr) {
4322 belowminerr = err;
4323 fdst = f;
4324 }
4325 } else if (f->tmpbalancemode == REBALANCE_SRC) {
4326 f->read_dist++;
4327 if (f->read_first) {
4328 err = 1.0;
4329 } else {
4330 expdist = abovesum;
4331 expdist /= f->total;
4332 err = (expdist + f->read_corr) / f->read_dist;
4333 }
4334 if (fsrc==NULL || err<aboveminerr) {
4335 aboveminerr = err;
4336 fsrc = f;
4337 }
4338 }
4339 }
4340 }
4341 if (fdst && fsrc) {
4342 // syslog(LOG_NOTICE,"debug: move %s -> %s",fsrc->path,fdst->path);
4343 if (fsrc->read_first) {
4344 fsrc->read_first = 0;
4345 } else {
4346 expdist = abovesum;
4347 expdist /= fsrc->total;
4348 fsrc->read_corr += expdist - fsrc->read_dist;
4349 }
4350 fsrc->read_dist = 0;
4351 if (fdst->write_first) {
4352 fdst->write_first = 0;
4353 } else {
4354 expdist = belowsum;
4355 expdist /= fdst->total;
4356 fdst->write_corr += expdist - fdst->write_dist;
4357 }
4358 fdst->write_dist = 0;
4359 fsrc->rebalance_in_progress = 1;
4360 fdst->rebalance_in_progress = 1;
4361 zassert(pthread_mutex_unlock(&folderlock));
4362 if (changed) {
4363 zassert(pthread_mutex_lock(&dclock));
4364 hddspacechanged = 1;
4365 zassert(pthread_mutex_unlock(&dclock));
4366 }
4367 st = monotonic_useconds();
4368 (void)hdd_int_move(fsrc,fdst);
4369 en = monotonic_useconds();
4370 zassert(pthread_mutex_lock(&folderlock));
4371 fsrc->rebalance_in_progress = 0;
4372 fdst->rebalance_in_progress = 0;
4373 fdst->rebalance_last_usec = en;
4374 zassert(pthread_mutex_unlock(&folderlock));
4375 rebalance_is_on = 1;
4376 if (perc<100 && en>st) {
4377 en -= st;
4378 st = en;
4379 en *= 100;
4380 en /= perc;
4381 en -= st;
4382 if (en>0) {
4383 portable_usleep(en);
4384 }
4385 }
4386 } else {
4387 zassert(pthread_mutex_unlock(&folderlock));
4388 if (changed) {
4389 zassert(pthread_mutex_lock(&dclock));
4390 hddspacechanged = 1;
4391 zassert(pthread_mutex_unlock(&dclock));
4392 }
4393 if (rebalance_is_on) {
4394 zassert(pthread_mutex_lock(&folderlock));
4395 for (f=folderhead ; f ; f=f->next) {
4396 f->read_dist = 0;
4397 f->read_first = 1;
4398 f->read_corr = 0.0;
4399 }
4400 zassert(pthread_mutex_unlock(&folderlock));
4401 rebalance_finished = monotonic_time;
4402 }
4403 rebalance_is_on = 0;
4404 sleep(1);
4405 }
4406 }
4407 return arg;
4408 }
4409
4410 void* hdd_tester_thread(void* arg) {
4411 folder *f,*of;
4412 chunk *c;
4413 uint64_t chunkid;
4414 uint32_t version;
4415 uint32_t freq;
4416 uint32_t cnt;
4417 uint64_t st,en;
4418 char *path;
4419
4420 f = folderhead;
4421 freq = HDDTestFreq;
4422 cnt = 0;
4423 for (;;) {
4424 st = monotonic_useconds();
4425 path = NULL;
4426 chunkid = 0;
4427 version = 0;
4428 zassert(pthread_mutex_lock(&folderlock));
4429 zassert(pthread_mutex_lock(&hashlock));
4430 zassert(pthread_mutex_lock(&testlock));
4431 if (testerreset) {
4432 testerreset = 0;
4433 f = folderhead;
4434 freq = HDDTestFreq;
4435 cnt = 0;
4436 }
4437 cnt++;
4438 if (cnt<freq || freq==0 || folderactions==0 || folderhead==NULL) {
4439 path = NULL;
4440 } else {
4441 cnt = 0;
4442 of = f;
4443 do {
4444 f = f->next;
4445 if (f==NULL) {
4446 f = folderhead;
4447 }
4448 } while ((f->damaged || f->todel || f->toremove || f->scanstate!=SCST_WORKING) && of!=f);
4449 if (of==f && (f->damaged || f->todel || f->toremove || f->scanstate!=SCST_WORKING)) { // all folders are unavailable
4450 path = NULL;
4451 } else {
4452 c = f->testhead;
4453 if (c && c->state==CH_AVAIL) {
4454 chunkid = c->chunkid;
4455 version = c->version;
4456 path = strdup(c->filename);
4457 passert(path);
4458 }
4459 }
4460 }
4461 zassert(pthread_mutex_unlock(&testlock));
4462 zassert(pthread_mutex_unlock(&hashlock));
4463 zassert(pthread_mutex_unlock(&folderlock));
4464 if (path) {
4465 // syslog(LOG_NOTICE,"testing chunk: %s",path);
4466 (void)hdd_int_test(chunkid,version); // ignore status here - hdd_int_test on error does everything itself
4467 free(path);
4468 }
4469 zassert(pthread_mutex_lock(&termlock));
4470 if (term) {
4471 zassert(pthread_mutex_unlock(&termlock));
4472 return arg;
4473 }
4474 zassert(pthread_mutex_unlock(&termlock));
4475 en = monotonic_useconds();
4476 if (en>st) {
4477 en-=st;
4478 if (en<1000000) {
4479 portable_usleep(1000000-en);
4480 }
4481 }
4482 }
4483 return arg;
4484 }
4485
4486 void hdd_testshuffle(folder *f) {
4487 uint32_t i,j,chunksno;
4488 chunk **csorttab,*c;
4489 zassert(pthread_mutex_lock(&testlock));
4490 chunksno = 0;
4491 for (c=f->testhead ; c ; c=c->testnext) {
4492 chunksno++;
4493 }
4494 if (chunksno>0) {
4495 csorttab = malloc(sizeof(chunk*)*chunksno);
4496 passert(csorttab);
4497 chunksno = 0;
4498 for (c=f->testhead ; c ; c=c->testnext) {
4499 csorttab[chunksno++] = c;
4500 }
4501 if (chunksno>1) {
4502 for (i=0 ; i<chunksno-1 ; i++) {
4503 j = i+rndu32_ranged(chunksno-i);
4504 if (j!=i) {
4505 c = csorttab[i];
4506 csorttab[i] = csorttab[j];
4507 csorttab[j] = c;
4508 }
4509 }
4510 }
4511 } else {
4512 csorttab = NULL;
4513 }
4514 f->testhead = NULL;
4515 f->testtail = &(f->testhead);
4516 for (i=0 ; i<chunksno ; i++) {
4517 c = csorttab[i];
4518 c->testnext = NULL;
4519 c->testprev = f->testtail;
4520 *(c->testprev) = c;
4521 f->testtail = &(c->testnext);
4522 }
4523 if (csorttab) {
4524 free(csorttab);
4525 }
4526 zassert(pthread_mutex_unlock(&testlock));
4527 }
4528
4529 /*
4530 int hdd_testcompare(const void *a,const void *b) {
4531 chunk const* *aa = (chunk const* *)a;
4532 chunk const* *bb = (chunk const* *)b;
4533 return (**aa).testtime - (**bb).testtime;
4534 }
4535
4536 void hdd_testsort(folder *f) {
4537 uint32_t i,chunksno;
4538 chunk **csorttab,*c;
4539 zassert(pthread_mutex_lock(&testlock));
4540 chunksno = 0;
4541 for (c=f->testhead ; c ; c=c->testnext) {
4542 chunksno++;
4543 }
4544 if (chunksno>0) {
4545 csorttab = malloc(sizeof(chunk*)*chunksno);
4546 passert(csorttab);
4547 chunksno = 0;
4548 for (c=f->testhead ; c ; c=c->testnext) {
4549 csorttab[chunksno++] = c;
4550 }
4551 qsort(csorttab,chunksno,sizeof(chunk*),hdd_testcompare);
4552 } else {
4553 csorttab = NULL;
4554 }
4555 f->testhead = NULL;
4556 f->testtail = &(f->testhead);
4557 for (i=0 ; i<chunksno ; i++) {
4558 c = csorttab[i];
4559 c->testnext = NULL;
4560 c->testprev = f->testtail;
4561 *(c->testprev) = c;
4562 f->testtail = &(c->testnext);
4563 }
4564 if (csorttab) {
4565 free(csorttab);
4566 }
4567 zassert(pthread_mutex_unlock(&testlock));
4568 }
4569 */
4570
4571 /* initialization */
4572
4573 static inline int hdd_check_filename(const char *fname,uint64_t *chunkid,uint32_t *version) {
4574 uint64_t namechunkid;
4575 uint32_t nameversion;
4576 char ch;
4577 uint32_t i;
4578
4579 if (strncmp(fname,"chunk_",6)!=0) {
4580 return -1;
4581 }
4582 namechunkid = 0;
4583 nameversion = 0;
4584 for (i=6 ; i<22 ; i++) {
4585 ch = fname[i];
4586 if (ch>='0' && ch<='9') {
4587 ch-='0';
4588 } else if (ch>='A' && ch<='F') {
4589 ch-='A'-10;
4590 } else {
4591 return -1;
4592 }
4593 namechunkid *= 16;
4594 namechunkid += ch;
4595 }
4596 if (fname[22]!='_') {
4597 return -1;
4598 }
4599 for (i=23 ; i<31 ; i++) {
4600 ch = fname[i];
4601 if (ch>='0' && ch<='9') {
4602 ch-='0';
4603 } else if (ch>='A' && ch<='F') {
4604 ch-='A'-10;
4605 } else {
4606 return -1;
4607 }
4608 nameversion *= 16;
4609 nameversion += ch;
4610 }
4611 if (strcmp(fname+31,".mfs")!=0) {
4612 return -1;
4613 }
4614 *chunkid = namechunkid;
4615 *version = nameversion;
4616 return 0;
4617 }
4618
4619 static inline void hdd_add_chunk(folder *f,const char *fullname,uint64_t chunkid,uint32_t version,uint8_t todel) {
4620 struct stat sb;
4621 folder *prevf,*currf;
4622 chunk *c;
4623 uint16_t blocks;
4624 uint8_t validattr;
4625
4626 if (f->sizelimit) {
4627 if (stat(fullname,&sb)<0) {
4628 if (f->todel<2) {
4629 unlink(fullname);
4630 }
4631 return;
4632 }
4633 if ((sb.st_mode & S_IFMT) != S_IFREG) {
4634 mfs_arg_syslog(LOG_WARNING,"%s: is not regular file",fullname);
4635 return;
4636 }
4637 if (sb.st_size<CHUNKHDRSIZE || sb.st_size>(CHUNKHDRSIZE+MFSCHUNKSIZE) || ((sb.st_size-CHUNKHDRSIZE)&MFSBLOCKMASK)!=0) {
4638 if (f->todel<2) {
4639 unlink(fullname); // remove wrong chunk
4640 }
4641 return;
4642 }
4643 blocks = (sb.st_size - CHUNKHDRSIZE) / MFSBLOCKSIZE;
4644 validattr = 1;
4645 } else {
4646 blocks = 0;
4647 validattr = 0;
4648 }
4649 prevf = NULL;
4650 currf = f;
4651 c = hdd_chunk_get(chunkid,CH_NEW_AUTO);
4652 if (c==NULL) { // already have this chunk, but with error state - try, to create new one
4653 c = hdd_chunk_get(chunkid,CH_NEW_AUTO);
4654 if (c==NULL) {
4655 mfs_arg_syslog(LOG_WARNING,"can't create chunk record for file: %s",fullname);
4656 return;
4657 }
4658 }
4659 if (c->filename!=NULL) { // already have this chunk
4660 if (version <= c->version) { // current chunk is older
4661 if (todel<2) { // this is R/W fs?
4662 unlink(fullname); // if yes then remove file
4663 }
4664 currf = NULL;
4665 } else { // current chunk is better, so use it, and clear older one
4666 prevf = c->owner;
4667 if (c->todel<2) { // current chunk is on R/W fs?
4668 unlink(c->filename); // if yes then remove file
4669 }
4670 free(c->filename);
4671 c->filename = strdup(fullname);
4672 passert(c->filename);
4673 c->version = version;
4674 c->blocks = blocks;
4675 c->validattr = validattr;
4676 c->todel = todel;
4677 // c->testtime = (sb.st_atime>sb.st_mtime)?sb.st_atime:sb.st_mtime;
4678 zassert(pthread_mutex_lock(&testlock));
4679 hdd_remove_chunk_from_test_chain(c,prevf);
4680 hdd_add_chunk_to_test_chain(c,currf);
4681 zassert(pthread_mutex_unlock(&testlock));
4682 }
4683 } else {
4684 c->filename = strdup(fullname);
4685 passert(c->filename);
4686 c->version = version;
4687 c->blocks = blocks;
4688 c->validattr = validattr;
4689 c->todel = todel;
4690 // c->testtime = (sb.st_atime>sb.st_mtime)?sb.st_atime:sb.st_mtime;
4691 zassert(pthread_mutex_lock(&testlock));
4692 hdd_add_chunk_to_test_chain(c,currf);
4693 zassert(pthread_mutex_unlock(&testlock));
4694 hdd_report_new_chunk(c->chunkid,c->version|(todel?0x80000000:0));
4695 }
4696 zassert(pthread_mutex_lock(&folderlock));
4697 if (prevf) {
4698 hdd_remove_chunk_from_folder(c,prevf);
4699 }
4700 if (currf) {
4701 hdd_add_chunk_to_folder(c,currf);
4702 }
4703 zassert(pthread_mutex_unlock(&folderlock));
4704 hdd_chunk_release(c);
4705 }
4706
4707 void* hdd_folder_scan(void *arg) {
4708 folder *f = (folder*)arg;
4709 DIR *dd;
4710 struct dirent *de,*destorage;
4711 uint16_t subf;
4712 char *fullname,*oldfullname;
4713 uint16_t plen,oldplen;
4714 uint64_t namechunkid;
4715 uint32_t nameversion;
4716 uint32_t tcheckcnt;
4717 uint8_t scanterm,todel;
4718 // uint8_t progressreportmode;
4719 uint8_t lastperc,currentperc;
4720 uint32_t lasttime,currenttime,begintime;
4721
4722 begintime = time(NULL);
4723
4724 zassert(pthread_mutex_lock(&folderlock));
4725 todel = f->todel;
4726 hdd_refresh_usage(f);
4727 // progressreportmode = wait_for_scan;
4728 zassert(pthread_mutex_unlock(&folderlock));
4729
4730 plen = strlen(f->path);
4731 oldplen = plen;
4732
4733 fullname = malloc(plen+39);
4734 passert(fullname);
4735
4736 memcpy(fullname,f->path,plen);
4737 fullname[plen]='\0';
4738 if (todel==0) {
4739 mkdir(fullname,0755);
4740 }
4741
4742 fullname[plen++]='_';
4743 fullname[plen++]='_';
4744 fullname[plen++]='/';
4745 fullname[plen]='\0';
4746
4747 /* size of name added to size of structure because on some os'es d_name has size of 1 byte */
4748 destorage = (struct dirent*)malloc(sizeof(struct dirent)+pathconf(f->path,_PC_NAME_MAX)+1);
4749 passert(destorage);
4750
4751 scanterm = 0;
4752
4753 zassert(pthread_mutex_lock(&dclock));
4754 hddspacechanged = 1;
4755 zassert(pthread_mutex_unlock(&dclock));
4756
4757 if (todel==0) {
4758 for (subf=0 ; subf<256 ; subf++) {
4759 fullname[plen-3]="0123456789ABCDEF"[subf>>4];
4760 fullname[plen-2]="0123456789ABCDEF"[subf&15];
4761 mkdir(fullname,0755);
4762 }
4763
4764 /* move chunks from "X/name" to "XX/name" */
4765
4766 oldfullname = malloc(oldplen+38);
4767 passert(oldfullname);
4768 memcpy(oldfullname,f->path,oldplen);
4769 oldfullname[oldplen++]='_';
4770 oldfullname[oldplen++]='/';
4771 oldfullname[oldplen]='\0';
4772
4773 for (subf=0 ; subf<16 ; subf++) {
4774 oldfullname[oldplen-2]="0123456789ABCDEF"[subf];
4775 oldfullname[oldplen]='\0';
4776 dd = opendir(oldfullname);
4777 if (dd==NULL) {
4778 continue;
4779 }
4780 while (readdir_r(dd,destorage,&de)==0 && de!=NULL) {
4781 if (hdd_check_filename(de->d_name,&namechunkid,&nameversion)<0) {
4782 continue;
4783 }
4784 memcpy(oldfullname+oldplen,de->d_name,36);
4785 memcpy(fullname+plen,de->d_name,36);
4786 fullname[plen-3]="0123456789ABCDEF"[(namechunkid>>4)&15];
4787 fullname[plen-2]="0123456789ABCDEF"[namechunkid&15];
4788 rename(oldfullname,fullname);
4789 }
4790 oldfullname[oldplen]='\0';
4791 rmdir(oldfullname);
4792 closedir(dd);
4793 }
4794 free(oldfullname);
4795
4796 }
4797 /* scan new file names */
4798
4799 tcheckcnt = 0;
4800 lastperc = 0;
4801 lasttime = time(NULL);
4802 for (subf=0 ; subf<256 && scanterm==0 ; subf++) {
4803 fullname[plen-3]="0123456789ABCDEF"[subf>>4];
4804 fullname[plen-2]="0123456789ABCDEF"[subf&15];
4805 fullname[plen]='\0';
4806 // mkdir(fullname,0755);
4807 dd = opendir(fullname);
4808 if (dd) {
4809 while (readdir_r(dd,destorage,&de)==0 && de!=NULL && scanterm==0) {
4810 //#warning debug
4811 // portable_usleep(100000);
4812 //
4813 if (hdd_check_filename(de->d_name,&namechunkid,&nameversion)<0) {
4814 continue;
4815 }
4816 memcpy(fullname+plen,de->d_name,36);
4817 hdd_add_chunk(f,fullname,namechunkid,nameversion,todel);
4818 tcheckcnt++;
4819 if (tcheckcnt>=1000) {
4820 zassert(pthread_mutex_lock(&folderlock));
4821 if (f->scanstate==SCST_SCANTERMINATE) {
4822 scanterm = 1;
4823 }
4824 zassert(pthread_mutex_unlock(&folderlock));
4825 // portable_usleep(100000); - slow down scanning (also change 1000 in 'if' to something much smaller) - for tests
4826 tcheckcnt = 0;
4827 }
4828 }
4829 closedir(dd);
4830 }
4831 currenttime = time(NULL);
4832 currentperc = ((subf*100.0)/256.0);
4833 if (currentperc>lastperc && currenttime>lasttime) {
4834 lastperc=currentperc;
4835 lasttime=currenttime;
4836 zassert(pthread_mutex_lock(&folderlock));
4837 f->scanprogress = currentperc;
4838 zassert(pthread_mutex_unlock(&folderlock));
4839 zassert(pthread_mutex_lock(&dclock));
4840 hddspacechanged = 1; // report chunk count to master
4841 zassert(pthread_mutex_unlock(&dclock));
4842 syslog(LOG_NOTICE,"scanning folder %s: %"PRIu8"%% (%"PRIu32"s)",f->path,lastperc,currenttime-begintime);
4843 }
4844 }
4845 free(fullname);
4846 free(destorage);
4847 // fprintf(stderr,"hdd space manager: %s: %"PRIu32" chunks found\n",f->path,f->chunkcount);
4848
4849 hdd_testshuffle(f);
4850
4851 zassert(pthread_mutex_lock(&folderlock));
4852 if (f->scanstate==SCST_SCANTERMINATE) {
4853 syslog(LOG_NOTICE,"scanning folder %s: interrupted",f->path);
4854 } else {
4855 syslog(LOG_NOTICE,"scanning folder %s: complete (%"PRIu32"s)",f->path,(uint32_t)(time(NULL))-begintime);
4856 }
4857 f->scanstate = SCST_SCANFINISHED;
4858 f->scanprogress = 100;
4859 zassert(pthread_mutex_unlock(&folderlock));
4860 return NULL;
4861 }
4862
4863 void* hdd_folders_thread(void *arg) {
4864 for (;;) {
4865 hdd_check_folders();
4866 zassert(pthread_mutex_lock(&termlock));
4867 if (term) {
4868 zassert(pthread_mutex_unlock(&termlock));
4869 return arg;
4870 }
4871 zassert(pthread_mutex_unlock(&termlock));
4872 sleep(1);
4873 }
4874 return arg;
4875 }
4876
4877 void* hdd_delayed_thread(void *arg) {
4878 for (;;) {
4879 hdd_delayed_ops();
4880 zassert(pthread_mutex_lock(&termlock));
4881 if (term) {
4882 zassert(pthread_mutex_unlock(&termlock));
4883 return arg;
4884 }
4885 zassert(pthread_mutex_unlock(&termlock));
4886 sleep(DELAYEDSTEP);
4887 }
4888 return arg;
4889 }
4890
4891 #ifndef PRESERVE_BLOCK
4892 # ifdef MMAP_ALLOC
4893 void hdd_blockbuffer_free(void *addr) {
4894 munmap(addr,MFSBLOCKSIZE);
4895 }
4896 # endif
4897 #endif
4898
4899 void hdd_term(void) {
4900 uint32_t i;
4901 folder *f,*fn;
4902 chunk *c,*cn;
4903 dopchunk *dc,*dcn;
4904 cntcond *cc,*ccn;
4905 lostchunk *lc,*lcn;
4906 newchunk *nc,*ncn;
4907 damagedchunk *dmc,*dmcn;
4908
4909 zassert(pthread_mutex_lock(&termlock));
4910 i = term; // if term is non zero here then it means that threads have not been started, so do not join with them
4911 term = 1;
4912 zassert(pthread_mutex_unlock(&termlock));
4913 if (i==0) {
4914 zassert(pthread_join(testerthread,NULL));
4915 zassert(pthread_join(foldersthread,NULL));
4916 zassert(pthread_join(rebalancethread,NULL));
4917 zassert(pthread_join(delayedthread,NULL));
4918 }
4919 zassert(pthread_mutex_lock(&folderlock));
4920 i = 0;
4921 for (f=folderhead ; f ; f=f->next) {
4922 if (f->scanstate==SCST_SCANINPROGRESS) {
4923 f->scanstate = SCST_SCANTERMINATE;
4924 }
4925 if (f->scanstate==SCST_SCANTERMINATE || f->scanstate==SCST_SCANFINISHED) {
4926 i++;
4927 }
4928 }
4929 zassert(pthread_mutex_unlock(&folderlock));
4930 // syslog(LOG_NOTICE,"waiting for scanning threads (%"PRIu32")",i);
4931 while (i>0) {
4932 portable_usleep(10000); // not very elegant solution.
4933 zassert(pthread_mutex_lock(&folderlock));
4934 for (f=folderhead ; f ; f=f->next) {
4935 if (f->scanstate==SCST_SCANFINISHED) {
4936 zassert(pthread_join(f->scanthread,NULL));
4937 f->scanstate = SCST_WORKING; // any state - to prevent calling pthread_join again
4938 i--;
4939 }
4940 }
4941 zassert(pthread_mutex_unlock(&folderlock));
4942 }
4943 for (i=0 ; i<HASHSIZE ; i++) {
4944 for (c=hashtab[i] ; c ; c=cn) {
4945 cn = c->next;
4946 if (c->state==CH_AVAIL) {
4947 if (c->crcchanged) {
4948 syslog(LOG_WARNING,"hdd_term: CRC not flushed - writing now");
4949 if (chunk_writecrc(c)!=STATUS_OK) {
4950 mfs_arg_errlog_silent(LOG_WARNING,"hdd_term: file:%s - write error",c->filename);
4951 }
4952 }
4953 if (c->fd>=0) {
4954 close(c->fd);
4955 hdd_open_files_handle(OF_AFTER_CLOSE);
4956 }
4957 if (c->crc!=NULL) {
4958 #ifdef MMAP_ALLOC
4959 munmap((void*)(c->crc),4096);
4960 #else
4961 free(c->crc);
4962 #endif
4963 }
4964 #ifdef PRESERVE_BLOCK
4965 if (c->block!=NULL) {
4966 # ifdef MMAP_ALLOC
4967 munmap((void*)(c->block),MFSBLOCKSIZE);
4968 # else
4969 free(c->block);
4970 # endif
4971 }
4972 #endif /* PRESERVE_BLOCK */
4973 if (c->filename) {
4974 free(c->filename);
4975 }
4976 free(c);
4977 } else {
4978 syslog(LOG_WARNING,"hdd_term: locked chunk !!!");
4979 }
4980 }
4981 }
4982 for (f=folderhead ; f ; f=fn) {
4983 fn = f->next;
4984 if (f->lfd>=0) {
4985 close(f->lfd);
4986 }
4987 if (f->chunktab) {
4988 free(f->chunktab);
4989 }
4990 free(f->path);
4991 free(f);
4992 }
4993 for (i=0 ; i<DHASHSIZE ; i++) {
4994 for (dc=dophashtab[i] ; dc ; dc=dcn) {
4995 dcn = dc->next;
4996 free(dc);
4997 }
4998 }
4999 for (dc=newdopchunks ; dc ; dc=dcn) {
5000 dcn = dc->next;
5001 free(dc);
5002 }
5003 for (cc=cclist ; cc ; cc=ccn) {
5004 ccn = cc->next;
5005 if (cc->wcnt) {
5006 syslog(LOG_WARNING,"hddspacemgr (atexit): used cond !!!");
5007 } else {
5008 zassert(pthread_cond_destroy(&(cc->cond)));
5009 }
5010 free(cc);
5011 }
5012 for (nc=newchunks ; nc ; nc=ncn) {
5013 ncn = nc->next;
5014 free(nc);
5015 }
5016 for (lc=lostchunks ; lc ; lc=lcn) {
5017 lcn = lc->next;
5018 free(lc);
5019 }
5020 for (dmc=damagedchunks ; dmc ; dmc=dmcn) {
5021 dmcn = dmc->next;
5022 free(dmc);
5023 }
5024 }
5025
5026 int hdd_size_parse(const char *str,uint64_t *ret) {
5027 uint64_t val,frac,fracdiv;
5028 double drval,mult;
5029 int f;
5030 val=0;
5031 frac=0;
5032 fracdiv=1;
5033 f=0;
5034 while (*str>='0' && *str<='9') {
5035 f=1;
5036 val*=10;
5037 val+=(*str-'0');
5038 str++;
5039 }
5040 if (*str=='.') { // accept format ".####" (without 0)
5041 str++;
5042 while (*str>='0' && *str<='9') {
5043 fracdiv*=10;
5044 frac*=10;
5045 frac+=(*str-'0');
5046 str++;
5047 }
5048 if (fracdiv==1) { // if there was '.' expect number afterwards
5049 return -1;
5050 }
5051 } else if (f==0) { // but not empty string
5052 return -1;
5053 }
5054 if (str[0]=='\0' || (str[0]=='B' && str[1]=='\0')) {
5055 mult=1.0;
5056 } else if (str[0]!='\0' && (str[1]=='\0' || (str[1]=='B' && str[2]=='\0'))) {
5057 switch(str[0]) {
5058 case 'k':
5059 mult=1e3;
5060 break;
5061 case 'M':
5062 mult=1e6;
5063 break;
5064 case 'G':
5065 mult=1e9;
5066 break;
5067 case 'T':
5068 mult=1e12;
5069 break;
5070 case 'P':
5071 mult=1e15;
5072 break;
5073 case 'E':
5074 mult=1e18;
5075 break;
5076 default:
5077 return -1;
5078 }
5079 } else if (str[0]!='\0' && str[1]=='i' && (str[2]=='\0' || (str[2]=='B' && str[3]=='\0'))) {
5080 switch(str[0]) {
5081 case 'K':
5082 mult=1024.0;
5083 break;
5084 case 'M':
5085 mult=1048576.0;
5086 break;
5087 case 'G':
5088 mult=1073741824.0;
5089 break;
5090 case 'T':
5091 mult=1099511627776.0;
5092 break;
5093 case 'P':
5094 mult=1125899906842624.0;
5095 break;
5096 case 'E':
5097 mult=1152921504606846976.0;
5098 break;
5099 default:
5100 return -1;
5101 }
5102 } else {
5103 return -1;
5104 }
5105 drval = round(((double)frac/(double)fracdiv+(double)val)*mult);
5106 if (drval>18446744073709551615.0) {
5107 return -2;
5108 } else {
5109 *ret = drval;
5110 }
5111 return 1;
5112 }
5113
5114 int hdd_parseline(char *hddcfgline) {
5115 uint32_t l,p;
5116 int lfd,td,im,bm;
5117 int mfd;
5118 char *pptr;
5119 char *lockfname;
5120 char *metaidfname;
5121 struct stat sb;
5122 folder *f;
5123 uint8_t lockneeded;
5124 uint8_t cannotbeused;
5125 uint64_t limit;
5126 uint64_t metaid;
5127 uint8_t lmode;
5128
5129 if (hddcfgline[0]=='#') {
5130 return 0;
5131 }
5132 l = strlen(hddcfgline);
5133 while (l>0 && (hddcfgline[l-1]=='\r' || hddcfgline[l-1]=='\n' || hddcfgline[l-1]==' ' || hddcfgline[l-1]=='\t')) {
5134 l--;
5135 }
5136 if (l==0) {
5137 return 0;
5138 }
5139 hddcfgline[l]='\0';
5140 p = l;
5141 while (p>0 && hddcfgline[p-1]!=' ' && hddcfgline[p-1]!='\t') {
5142 p--;
5143 }
5144 lmode = 0;
5145 if (p>0) {
5146 if (hddcfgline[p]=='-') {
5147 if (hdd_size_parse(hddcfgline+p+1,&limit)>=0) {
5148 lmode = 1;
5149 } else {
5150 mfs_arg_syslog(LOG_WARNING,"size parse error, data: %s",hddcfgline+p);
5151 }
5152 } if ((hddcfgline[p]>='0' && hddcfgline[p]<='9') || hddcfgline[p]=='.') {
5153 if (hdd_size_parse(hddcfgline+p,&limit)>=0) {
5154 lmode = 2;
5155 } else {
5156 mfs_arg_syslog(LOG_WARNING,"size parse error, data: %s",hddcfgline+p);
5157 }
5158 }
5159 if (lmode) {
5160 l = p;
5161 while (l>0 && (hddcfgline[l-1]==' ' || hddcfgline[l-1]=='\t')) {
5162 l--;
5163 }
5164 if (l==0) {
5165 return 0;
5166 }
5167 }
5168 }
5169 if (hddcfgline[l-1]!='/') {
5170 hddcfgline[l]='/';
5171 hddcfgline[l+1]='\0';
5172 l++;
5173 } else {
5174 hddcfgline[l]='\0';
5175 }
5176 td = 0;
5177 im = 0;
5178 bm = REBALANCE_STD;
5179 pptr = hddcfgline;
5180 while (1) {
5181 if (*pptr == '*') {
5182 td = 1;
5183 } else if (*pptr == '!') {
5184 im = 1;
5185 } else if (*pptr == '>') {
5186 bm = REBALANCE_FORCE_DST;
5187 } else if (*pptr == '<') {
5188 bm = REBALANCE_FORCE_SRC;
5189 } else {
5190 break;
5191 }
5192 l--;
5193 pptr++;
5194 }
5195
5196 zassert(pthread_mutex_lock(&folderlock));
5197 lockneeded = 1;
5198 cannotbeused = 0;
5199 for (f=folderhead ; f && lockneeded ; f=f->next) {
5200 if (strcmp(f->path,pptr)==0) {
5201 if (f->toremove==1) {
5202 cannotbeused = 1;
5203 } else {
5204 lockneeded = 0;
5205 }
5206 }
5207 }
5208 zassert(pthread_mutex_unlock(&folderlock));
5209
5210 if (cannotbeused) {
5211 mfs_arg_syslog(LOG_WARNING,"hdd space manager: drive '%s' is being removed and can not be added again while removing is in progress - try it again in couple of seconds",pptr);
5212 return -1;
5213 }
5214
5215 if (lmode==1) { // sanity checks
5216 if (limit<0x4000000) {
5217 mfs_arg_syslog(LOG_WARNING,"hdd space manager: limit on '%s' < chunk size - leaving so small space on hdd is not recommended",pptr);
5218 } else {
5219 struct statvfs fsinfo;
5220
5221 if (statvfs(pptr,&fsinfo)<0) {
5222 mfs_arg_errlog(LOG_NOTICE,"hdd space manager: statvfs on '%s'",pptr);
5223 } else {
5224 uint64_t size = (uint64_t)(fsinfo.f_frsize)*(uint64_t)(fsinfo.f_blocks-(fsinfo.f_bfree-fsinfo.f_bavail));
5225 if (limit > size) {
5226 mfs_arg_syslog(LOG_WARNING,"hdd space manager: space to be left free on '%s' (%"PRIu64") is greater than real volume size (%"PRIu64") !!!",pptr,limit,size);
5227 }
5228 }
5229 }
5230 }
5231 if (lmode==2) { // sanity checks
5232 if (limit==0) {
5233 mfs_arg_syslog(LOG_WARNING,"hdd space manager: limit on '%s' set to zero - using real volume size",pptr);
5234 lmode = 0;
5235 } else {
5236 struct statvfs fsinfo;
5237
5238 if (statvfs(pptr,&fsinfo)<0) {
5239 mfs_arg_errlog(LOG_NOTICE,"hdd space manager: statvfs on '%s'",pptr);
5240 } else {
5241 uint64_t size = (uint64_t)(fsinfo.f_frsize)*(uint64_t)(fsinfo.f_blocks-(fsinfo.f_bfree-fsinfo.f_bavail));
5242 if (limit > size) {
5243 mfs_arg_syslog(LOG_WARNING,"hdd space manager: limit on '%s' (%"PRIu64") is greater than real volume size (%"PRIu64") - using real volume size",pptr,limit,size);
5244 lmode = 0;
5245 }
5246 }
5247 }
5248 }
5249
5250 metaid = masterconn_getmetaid();
5251 metaidfname = (char*)malloc(l+8);
5252 passert(metaidfname);
5253 memcpy(metaidfname,pptr,l);
5254 memcpy(metaidfname+l,".metaid",8);
5255 mfd = open(metaidfname,O_RDONLY);
5256 if (mfd>=0) {
5257 uint64_t filemetaid;
5258 uint8_t buff[8];
5259 const uint8_t *rptr;
5260 if (read(mfd,buff,8)==8) {
5261 rptr = buff;
5262 filemetaid = get64bit(&rptr);
5263 if (filemetaid!=metaid) {
5264 if (metaid>0) {
5265 if (im==0) {
5266 mfs_arg_syslog(LOG_ERR,"hdd space manager: wrong meta id in file '%s' (0x%016"PRIX64",expected:0x%016"PRIX64") - shouldn't use this drive - use '!' in drive definition to ignore this (dangerous)",metaidfname,filemetaid,metaid);
5267 } else {
5268 mfs_arg_syslog(LOG_ERR,"hdd space manager: wrong meta id in file '%s' (0x%016"PRIX64",expected:0x%016"PRIX64") - forced to use this drive",metaidfname,filemetaid,metaid);
5269 }
5270 } else {
5271 if (im==0) {
5272 mfs_arg_syslog(LOG_ERR,"hdd space manager: chunkserver without meta id shouldn't use drive with defined meta id (file: '%s') - use '!' in drive definition to ignore this (dangerous)",metaidfname);
5273 } else {
5274 mfs_arg_syslog(LOG_ERR,"hdd space manager: chunkserver without meta id shouldn't use drive with defined meta id (file: '%s') - forced to ignore",metaidfname);
5275 }
5276 }
5277 close(mfd);
5278 free(metaidfname);
5279 if (im==0) {
5280 return -1;
5281 }
5282 }
5283 metaid = 0; // file exists and is correct (or forced do be ignored), so do not re create it
5284 }
5285 close(mfd);
5286 }
5287 free(metaidfname);
5288 lockfname = (char*)malloc(l+6);
5289 passert(lockfname);
5290 memcpy(lockfname,pptr,l);
5291 memcpy(lockfname+l,".lock",6);
5292 lfd = open(lockfname,O_RDWR|O_CREAT|O_TRUNC,0640);
5293 if (lfd<0 && errno==EROFS && td) {
5294 lfd = open(lockfname,O_RDONLY); // prevents umounting
5295 free(lockfname);
5296 td = 2;
5297 } else {
5298 if (lfd<0) {
5299 mfs_arg_errlog(LOG_ERR,"hdd space manager: can't create lock file '%s'",lockfname);
5300 free(lockfname);
5301 return -1;
5302 }
5303 if (lockneeded && lockf(lfd,F_TLOCK,0)<0) {
5304 if (ERRNO_ERROR) {
5305 mfs_arg_errlog(LOG_NOTICE,"hdd space manager: lockf '%s' error",lockfname);
5306 } else {
5307 mfs_arg_syslog(LOG_ERR,"hdd space manager: data folder '%s' already locked (used by another process)",pptr);
5308 }
5309 free(lockfname);
5310 close(lfd);
5311 return -1;
5312 }
5313 if (fstat(lfd,&sb)<0) {
5314 mfs_arg_errlog(LOG_NOTICE,"hdd space manager: fstat '%s' error",lockfname);
5315 free(lockfname);
5316 close(lfd);
5317 return -1;
5318 }
5319 free(lockfname);
5320 if (lockneeded) {
5321 zassert(pthread_mutex_lock(&folderlock));
5322 for (f=folderhead ; f ; f=f->next) {
5323 if (f->devid==sb.st_dev) {
5324 if (f->lockinode==sb.st_ino) {
5325 mfs_arg_syslog(LOG_ERR,"hdd space manager: data folders '%s' and '%s have the same lockfile !!!",pptr,f->path);
5326 zassert(pthread_mutex_unlock(&folderlock));
5327 close(lfd);
5328 return -1;
5329 } else {
5330 mfs_arg_syslog(LOG_WARNING,"hdd space manager: data folders '%s' and '%s' are on the same physical device (could lead to unexpected behaviours)",pptr,f->path);
5331 }
5332 }
5333 }
5334 zassert(pthread_mutex_unlock(&folderlock));
5335 }
5336 }
5337 if (im==0 && metaid>0) {
5338 metaidfname = (char*)malloc(l+8);
5339 passert(metaidfname);
5340 memcpy(metaidfname,pptr,l);
5341 memcpy(metaidfname+l,".metaid",8);
5342 mfd = open(metaidfname,O_RDWR|O_CREAT|O_TRUNC,0640);
5343 if (mfd>=0) {
5344 uint8_t buff[8];
5345 uint8_t *wptr;
5346 wptr = buff;
5347 put64bit(&wptr,metaid);
5348 if (write(mfd,buff,8)!=8) {
5349 mfs_errlog(LOG_WARNING,"hdd space manager: error writing meta id file");
5350 }
5351 close(mfd);
5352 } else {
5353 mfs_errlog(LOG_WARNING,"hdd space manager: error writing meta id file");
5354 }
5355 free(metaidfname);
5356 }
5357 zassert(pthread_mutex_lock(&folderlock));
5358 for (f=folderhead ; f ; f=f->next) {
5359 if (strcmp(f->path,pptr)==0) {
5360 if (f->toremove==2) {
5361 f->toremove = 0;
5362 }
5363 if (lmode==1) {
5364 f->leavefree = limit;
5365 } else {
5366 f->leavefree = LeaveFree;
5367 }
5368 if (lmode==2) {
5369 f->sizelimit = limit;
5370 } else {
5371 f->sizelimit = 0;
5372 }
5373 if (f->damaged) {
5374 f->scanstate = SCST_SCANNEEDED;
5375 f->scanprogress = 0;
5376 f->damaged = 0;
5377 f->avail = 0ULL;
5378 f->total = 0ULL;
5379 if (f->chunktab) {
5380 free(f->chunktab);
5381 }
5382 f->chunkcount = 0;
5383 f->chunktabsize = 0;
5384 f->chunktab = NULL;
5385 hdd_stats_clear(&(f->cstat));
5386 for (l=0 ; l<STATSHISTORY ; l++) {
5387 hdd_stats_clear(&(f->stats[l]));
5388 }
5389 f->statspos = 0;
5390 for (l=0 ; l<LASTERRSIZE ; l++) {
5391 f->lasterrtab[l].chunkid = 0ULL;
5392 f->lasterrtab[l].timestamp = 0;
5393 f->lasterrtab[l].monotonic_time = 0.0;
5394 f->lasterrtab[l].errornumber = 0;
5395 }
5396 f->lasterrindx = 0;
5397 f->lastrefresh = 0.0;
5398 f->needrefresh = 1;
5399 } else {
5400 if ((f->todel==0 && td>0) || (f->todel>0 && td==0)) {
5401 // the change is important - chunks need to be send to master again
5402 f->scanstate = SCST_SENDNEEDED;
5403 }
5404 }
5405 f->todel = td;
5406 f->balancemode = bm;
5407 zassert(pthread_mutex_unlock(&folderlock));
5408 if (lfd>=0) {
5409 close(lfd);
5410 }
5411 return 1;
5412 }
5413 }
5414 f = (folder*)malloc(sizeof(folder));
5415 passert(f);
5416 f->todel = td;
5417 f->balancemode = bm;
5418 f->damaged = 0;
5419 f->scanstate = SCST_SCANNEEDED;
5420 f->scanprogress = 0;
5421 f->path = strdup(pptr);
5422 passert(f->path);
5423 f->toremove = 0;
5424 if (lmode==1) {
5425 f->leavefree = limit;
5426 } else {
5427 f->leavefree = LeaveFree;
5428 }
5429 if (lmode==2) {
5430 f->sizelimit = limit;
5431 } else {
5432 f->sizelimit = 0;
5433 }
5434 f->avail = 0ULL;
5435 f->total = 0ULL;
5436 f->chunkcount = 0;
5437 f->chunktabsize = 0;
5438 f->chunktab = NULL;
5439 hdd_stats_clear(&(f->cstat));
5440 for (l=0 ; l<STATSHISTORY ; l++) {
5441 hdd_stats_clear(&(f->stats[l]));
5442 }
5443 f->statspos = 0;
5444 for (l=0 ; l<LASTERRSIZE ; l++) {
5445 f->lasterrtab[l].chunkid = 0ULL;
5446 f->lasterrtab[l].timestamp = 0;
5447 f->lasterrtab[l].monotonic_time = 0.0;
5448 f->lasterrtab[l].errornumber = 0;
5449 }
5450 f->lasterrindx = 0;
5451 f->lastrefresh = 0.0;
5452 f->needrefresh = 1;
5453 f->devid = sb.st_dev;
5454 f->lockinode = sb.st_ino;
5455 f->lfd = lfd;
5456 f->testhead = NULL;
5457 f->testtail = &(f->testhead);
5458 // f->carry = (double)(random()&0x7FFFFFFF)/(double)(0x7FFFFFFF);
5459 f->read_dist = 0;
5460 f->write_dist = 0;
5461 f->read_first = 1;
5462 f->write_first = 1;
5463 f->read_corr = 0.0;
5464 f->write_corr = 0.0;
5465 f->rebalance_in_progress = 0;
5466 f->rebalance_last_usec = 0;
5467 f->next = folderhead;
5468 folderhead = f;
5469 testerreset = 1;
5470 zassert(pthread_mutex_unlock(&folderlock));
5471 return 2;
5472 }
5473
5474 int hdd_folders_reinit(void) {
5475 folder *f;
5476 FILE *fd;
5477 char buff[1000];
5478 char *hddfname;
5479 int ret,datadef;
5480
5481 if (!cfg_isdefined("HDD_CONF_FILENAME")) {
5482 hddfname = strdup(ETC_PATH "/mfs/mfshdd.cfg");
5483 passert(hddfname);
5484 fd = fopen(hddfname,"r");
5485 if (!fd) {
5486 free(hddfname);
5487 hddfname = strdup(ETC_PATH "/mfshdd.cfg");
5488 fd = fopen(hddfname,"r");
5489 if (fd) {
5490 mfs_syslog(LOG_WARNING,"default sysconf path has changed - please move mfshdd.cfg from "ETC_PATH"/ to "ETC_PATH"/mfs/");
5491 }
5492 }
5493 } else {
5494 hddfname = cfg_getstr("HDD_CONF_FILENAME",ETC_PATH "/mfs/mfshdd.cfg");
5495 fd = fopen(hddfname,"r");
5496 }
5497
5498 if (!fd) {
5499 free(hddfname);
5500 return -1;
5501 }
5502
5503 ret = 0;
5504
5505 zassert(pthread_mutex_lock(&folderlock));
5506 folderactions = 0; // stop folder actions
5507 for (f=folderhead ; f ; f=f->next) {
5508 if (f->toremove==0) {
5509 f->toremove = 2;
5510 }
5511 }
5512 zassert(pthread_mutex_unlock(&folderlock));
5513
5514 while (fgets(buff,999,fd)) {
5515 buff[999] = 0;
5516 if (hdd_parseline(buff)<0) {
5517 ret = -1;
5518 }
5519
5520 }
5521 fclose(fd);
5522
5523 zassert(pthread_mutex_lock(&folderlock));
5524 datadef = 0;
5525 for (f=folderhead ; f ; f=f->next) {
5526 if (f->toremove==0) {
5527 datadef = 1;
5528 if (f->scanstate==SCST_SCANNEEDED) {
5529 syslog(LOG_NOTICE,"hdd space manager: folder %s will be scanned",f->path);
5530 } else if (f->scanstate==SCST_SENDNEEDED) {
5531 syslog(LOG_NOTICE,"hdd space manager: folder %s will be resend",f->path);
5532 } else {
5533 syslog(LOG_NOTICE,"hdd space manager: folder %s didn't change",f->path);
5534 }
5535 } else {
5536 f->damaged = 0;
5537 syslog(LOG_NOTICE,"hdd space manager: folder %s will be removed",f->path);
5538 }
5539 }
5540 folderactions = 1; // continue folder actions
5541 zassert(pthread_mutex_unlock(&folderlock));
5542
5543 if (datadef==0) {
5544 mfs_arg_syslog(LOG_ERR,"hdd space manager: no hdd space defined in %s file",hddfname);
5545 ret = -1;
5546 }
5547
5548 free(hddfname);
5549
5550 return ret;
5551 }
5552
5553 void hdd_info(void) {
5554 hdd_open_files_handle(OF_INFO);
5555 }
5556
5557 void hdd_reload(void) {
5558 char *LeaveFreeStr;
5559
5560 zassert(pthread_mutex_lock(&folderlock));
5561 HDDErrorCount = cfg_getuint32("HDD_ERROR_TOLERANCE_COUNT",2);
5562 if (HDDErrorCount<1) {
5563 syslog(LOG_NOTICE,"hdd space manager: error tolerance count too small - changed to 1");
5564 HDDErrorCount = 1;
5565 } else if (HDDErrorCount>10) {
5566 syslog(LOG_NOTICE,"hdd space manager: error tolerance count too big - changed to 10");
5567 HDDErrorCount = 10;
5568 }
5569 HDDErrorTime = cfg_getuint32("HDD_ERROR_TOLERANCE_PERIOD",600);
5570 if (HDDErrorTime<10) {
5571 syslog(LOG_NOTICE,"hdd space manager: error tolerance period too small - changed to 10 seconds");
5572 HDDErrorTime = 10;
5573 } else if (HDDErrorTime>86400) {
5574 syslog(LOG_NOTICE,"hdd space manager: error tolerance period too big - changed to 86400 seconds (1 day)");
5575 HDDErrorTime = 86400;
5576 }
5577 zassert(pthread_mutex_unlock(&folderlock));
5578 zassert(pthread_mutex_lock(&testlock));
5579 HDDTestFreq = cfg_getuint32("HDD_TEST_FREQ",10);
5580 HDDRebalancePerc = cfg_getuint32("HDD_REBALANCE_UTILIZATION",20);
5581 if (HDDRebalancePerc>100) {
5582 HDDRebalancePerc=100;
5583 }
5584 zassert(pthread_mutex_unlock(&testlock));
5585 zassert(pthread_mutex_lock(&doplock));
5586 DoFsyncBeforeClose = cfg_getuint8("HDD_FSYNC_BEFORE_CLOSE",0);
5587 zassert(pthread_mutex_unlock(&doplock));
5588
5589 LeaveFreeStr = cfg_getstr("HDD_LEAVE_SPACE_DEFAULT","256MiB");
5590 if (hdd_size_parse(LeaveFreeStr,&LeaveFree)<0) {
5591 syslog(LOG_NOTICE,"hdd space manager: HDD_LEAVE_SPACE_DEFAULT parse error - left unchanged");
5592 }
5593 free(LeaveFreeStr);
5594 if (LeaveFree<0x4000000) {
5595 syslog(LOG_NOTICE,"hdd space manager: HDD_LEAVE_SPACE_DEFAULT < chunk size - leaving so small space on hdd is not recommended");
5596 }
5597
5598 syslog(LOG_NOTICE,"reloading hdd data ...");
5599 hdd_folders_reinit();
5600 }
5601
5602 int hdd_late_init(void) {
5603 zassert(pthread_mutex_lock(&termlock));
5604 term = 0;
5605 zassert(pthread_mutex_unlock(&termlock));
5606
5607 zassert(main_minthread_create(&testerthread,0,hdd_tester_thread,NULL));
5608 zassert(main_minthread_create(&foldersthread,0,hdd_folders_thread,NULL));
5609 zassert(main_minthread_create(&rebalancethread,0,hdd_rebalance_thread,NULL));
5610 zassert(main_minthread_create(&delayedthread,0,hdd_delayed_thread,NULL));
5611 return 0;
5612 }
5613
5614 int hdd_init(void) {
5615 uint32_t hp;
5616 folder *f;
5617 char *LeaveFreeStr;
5618
5619 // this routine is called at the beginning from the main thread so no locks are necessary here
5620 for (hp=0 ; hp<HASHSIZE ; hp++) {
5621 hashtab[hp] = NULL;
5622 }
5623 for (hp=0 ; hp<DHASHSIZE ; hp++) {
5624 dophashtab[hp] = NULL;
5625 }
5626
5627 #ifndef PRESERVE_BLOCK
5628 zassert(pthread_key_create(&hdrbufferkey,free));
5629 # ifdef MMAP_ALLOC
5630 zassert(pthread_key_create(&blockbufferkey,hdd_blockbuffer_free));
5631 # else
5632 zassert(pthread_key_create(&blockbufferkey,free));
5633 # endif
5634 #endif /* PRESERVE_BLOCK */
5635
5636 emptyblockcrc = mycrc32_zeroblock(0,MFSBLOCKSIZE);
5637
5638 LeaveFreeStr = cfg_getstr("HDD_LEAVE_SPACE_DEFAULT","256MiB");
5639 if (hdd_size_parse(LeaveFreeStr,&LeaveFree)<0) {
5640 fprintf(stderr,"hdd space manager: HDD_LEAVE_SPACE_DEFAULT parse error - using default (256MiB)\n");
5641 LeaveFree = 0x10000000;
5642 }
5643 free(LeaveFreeStr);
5644 if (LeaveFree<0x4000000) {
5645 fprintf(stderr,"hdd space manager: HDD_LEAVE_SPACE_DEFAULT < chunk size - leaving so small space on hdd is not recommended\n");
5646 }
5647
5648 if (hdd_folders_reinit()<0) {
5649 return -1;
5650 }
5651
5652 hdd_open_files_handle(OF_INIT);
5653
5654 zassert(pthread_mutex_lock(&folderlock));
5655 for (f=folderhead ; f ; f=f->next) {
5656 fprintf(stderr,"hdd space manager: path to scan: %s\n",f->path);
5657 }
5658 zassert(pthread_mutex_unlock(&folderlock));
5659 fprintf(stderr,"hdd space manager: start background hdd scanning (searching for available chunks)\n");
5660
5661 HDDErrorCount = cfg_getuint32("HDD_ERROR_TOLERANCE_COUNT",2);
5662 if (HDDErrorCount<1) {
5663 fprintf(stderr,"hdd space manager: error tolerance count too small - changed to 1\n");
5664 HDDErrorCount = 2;
5665 } else if (HDDErrorCount>10) {
5666 fprintf(stderr,"hdd space manager: error tolerance count too big - changed to 10\n");
5667 HDDErrorCount = 10;
5668 }
5669 HDDErrorTime = cfg_getuint32("HDD_ERROR_TOLERANCE_PERIOD",600);
5670 if (HDDErrorTime<10) {
5671 fprintf(stderr,"hdd space manager: error tolerance period too small - changed to 10 seconds\n");
5672 HDDErrorTime = 10;
5673 } else if (HDDErrorTime>86400) {
5674 fprintf(stderr,"hdd space manager: error tolerance period too big - changed to 86400 seconds (1 day)\n");
5675 HDDErrorTime = 86400;
5676 }
5677 HDDTestFreq = cfg_getuint32("HDD_TEST_FREQ",10);
5678 HDDRebalancePerc = cfg_getuint32("HDD_REBALANCE_UTILIZATION",20);
5679 if (HDDRebalancePerc>100) {
5680 HDDRebalancePerc=100;
5681 }
5682
5683 main_reload_register(hdd_reload);
5684 main_time_register(60,0,hdd_diskinfo_movestats);
5685 main_destruct_register(hdd_term);
5686 main_info_register(hdd_info);
5687
5688 zassert(pthread_mutex_lock(&termlock));
5689 term = 1;
5690 zassert(pthread_mutex_unlock(&termlock));
5691
5692 return 0;
5693 }
5694