1 /**************************************************************************************************
2  * IOWOW library
3  *
4  * MIT License
5  *
6  * Copyright (c) 2012-2021 Softmotions Ltd <info@softmotions.com>
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in all
16  * copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24  * SOFTWARE.
25  *************************************************************************************************/
26 
27 #include "iwcfg.h"
28 #include "platform/iwp.h"
29 #include "log/iwlog.h"
30 #include "iwfsmfile.h"
31 #include "utils/iwutils.h"
32 #include "utils/kbtree.h"
33 #include "utils/iwbits.h"
34 
35 #include <pthread.h>
36 
37 typedef struct IWFS_FSM_IMPL FSM;
38 
39 void iwfs_fsmdbg_dump_fsm_tree(IWFS_FSM *f, const char *hdr);
40 
41 /**
42  * Free-space blocks-tree key.
43  */
44 typedef struct {
45   uint32_t off;
46   uint32_t len;
47 } FSMBK;
48 
49 /** Additional options for `_fsm_set_bit_status_lw` routine */
50 typedef uint8_t fsm_bmopts_t;
51 
52 /** No options. */
53 #define FSM_BM_NONE ((fsm_bmopts_t) 0x00U)
54 
55 /** Do not modify bitmap. */
56 #define FSM_BM_DRY_RUN ((fsm_bmopts_t) 0x01U)
57 
58 /** Perform strict checking of bitmap consistency */
59 #define FSM_BM_STRICT ((fsm_bmopts_t) 0x02U)
60 
61 /* Maximum size of block: 1Mb */
62 #define FSM_MAX_BLOCK_POW 20
63 
64 /* Maximum number of records used in allocation statistics */
65 #define FSM_MAX_STATS_COUNT 0x0000ffff
66 
67 #define FSM_ENSURE_OPEN(impl_)                                                                          \
68   if (!(impl_) || !(impl_)->f) return IW_ERROR_INVALID_STATE;
69 
70 #define FSM_ENSURE_OPEN2(f_)                                                                             \
71   if (!(f_) || !(f_)->impl) return IW_ERROR_INVALID_STATE;
72 
73 #define FSMBK_OFFSET(b_) ((b_)->off)
74 
75 #define FSMBK_LENGTH(b_) ((b_)->len)
76 
77 ////////////////////////////////////////////////////////////////////////////////////////////////////
78 
79 IW_INLINE int _fsm_cmp_ptr(const FSMBK *a, const FSMBK *b);
80 
81 #define _fsm_cmp(a_, b_) (_fsm_cmp_ptr(&(a_), &(b_)))
82 
83 // -V:KBTREE_INIT:522, 641
84 KBTREE_INIT(fsm, FSMBK, _fsm_cmp)
85 
86 struct IWFS_FSM_IMPL {
87   IWFS_EXT  pool;                 /**< Underlying rwl file. */
88   uint64_t  bmlen;                /**< Free-space bitmap block length in bytes. */
89   uint64_t  bmoff;                /**< Free-space bitmap block offset in bytes. */
90   uint64_t  lfbkoff;              /**< Offset in blocks of free block chunk with the largest offset. */
91   uint64_t  lfbklen;              /**< Length in blocks of free block chunk with the largest offset. */
92   uint64_t  crzsum;               /**< Cumulative sum all allocated blocks */
93   uint64_t  crzvar;               /**< Record sizes standard variance (deviation^2 * N) */
94   uint32_t  hdrlen;               /**< Length of custom file header */
95   uint32_t  crznum;               /**< Number of all allocated continuous areas acquired by `allocate` */
96   IWFS_FSM *f;                    /**< Self reference. */
97   IWDLSNR  *dlsnr;                /**< Data events listener */
98   kbtree_t(fsm) * fsm;            /**< Free-space tree */
99   pthread_rwlock_t *ctlrwlk;      /**< Methods RW lock */
100   size_t aunit;                   /**< System allocation unit size.
101                                        - Page size on *NIX
102                                        - Minimal allocation unit for WIN32 */
103   iwfs_fsm_openflags oflags;      /**< Operation mode flags. */
104   iwfs_omode omode;               /**< Open mode. */
105   uint8_t    bpow;                /**< Block size power for 2 */
106   bool       mmap_all;            /**< Mmap all file data */
107   iwfs_ext_mmap_opts_t mmap_opts; /**< Defaul mmap options used in `add_mmap` */
108 };
109 
110 static iwrc _fsm_ensure_size_lw(FSM *impl, off_t size);
111 
112 ////////////////////////////////////////////////////////////////////////////////////////////////////
113 
114 IW_INLINE int _fsm_cmp_ptr(const FSMBK *a, const FSMBK *b) {
115   int ret = ((FSMBK_LENGTH(b) < FSMBK_LENGTH(a)) - (FSMBK_LENGTH(a) < FSMBK_LENGTH(b)));
116   if (ret) {
117     return ret;
118   } else {
119     return ((FSMBK_OFFSET(b) < FSMBK_OFFSET(a)) - (FSMBK_OFFSET(a) < FSMBK_OFFSET(b)));
120   }
121 }
122 
123 IW_INLINE iwrc _fsm_ctrl_wlock(FSM *impl) {
124   int rci = impl->ctlrwlk ? pthread_rwlock_wrlock(impl->ctlrwlk) : 0;
125   return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
126 }
127 
128 IW_INLINE iwrc _fsm_ctrl_rlock(FSM *impl) {
129   int rci = impl->ctlrwlk ? pthread_rwlock_rdlock(impl->ctlrwlk) : 0;
130   return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
131 }
132 
133 IW_INLINE iwrc _fsm_ctrl_unlock(FSM *impl) {
134   int rci = impl->ctlrwlk ? pthread_rwlock_unlock(impl->ctlrwlk) : 0;
135   return (rci ? iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci) : 0);
136 }
137 
138 IW_INLINE iwrc _fsm_bmptr(FSM *impl, uint64_t **bmptr) {
139   size_t sp;
140   uint8_t *mm;
141   *bmptr = 0;
142   // get mmap pointer without locked
143   iwrc rc = impl->pool.probe_mmap(&impl->pool, impl->mmap_all ? 0 : impl->bmoff, &mm, &sp);
144   RCRET(rc);
145   if (impl->mmap_all) {
146     if (sp < impl->bmoff + impl->bmlen) {
147       return IWFS_ERROR_NOT_MMAPED;
148     }
149     *bmptr = (uint64_t*) (mm + impl->bmoff);
150   } else {
151     if (sp < impl->bmlen) {
152       return IWFS_ERROR_NOT_MMAPED;
153     }
154     *bmptr = (uint64_t*) mm;
155   }
156   return 0;
157 }
158 
159 /**
160  * @brief Init the given @a bk key
161  *        with given @a offset
162  *        and @a length values.
163  */
164 IW_INLINE WUR iwrc _fsm_init_fbk(FSMBK *bk, uint64_t offset_blk, uint64_t len_blk) {
165   if (  (offset_blk > ((uint32_t) -1))
166      || (len_blk > ((uint32_t) -1))) {
167     return IW_ERROR_OVERFLOW;
168   }
169   bk->off = (uint32_t) offset_blk;
170   bk->len = (uint32_t) len_blk;
171   return 0;
172 }
173 
174 /**
175  * @brief Remove free space block from the fsm tree.
176  * @param impl `FSM`
177  * @param offset_blk Offset block number
178  * @param length_blk Number of blocks
179  */
180 IW_INLINE iwrc _fsm_del_fbk(FSM *impl, uint64_t offset_blk, uint64_t length_blk) {
181   FSMBK fbk;
182   assert(length_blk);
183   iwrc rc = _fsm_init_fbk(&fbk, offset_blk, length_blk);
184   RCRET(rc);
185 #ifndef NDEBUG
186   int s2, s1 = kb_size(impl->fsm);
187 #endif
188   kb_delp(fsm, impl->fsm, &fbk);
189 #ifndef NDEBUG
190   s2 = kb_size(impl->fsm);
191   assert(s2 < s1);
192 #endif
193   if (FSMBK_OFFSET(&fbk) == impl->lfbkoff) {
194     impl->lfbkoff = 0;
195     impl->lfbklen = 0;
196   }
197   return 0;
198 }
199 
200 /**
201  * @brief Deregister free-block chunk from the fsm tree.
202  * @param impl `FSM`
203  * @param fbk `FSMBK` Fsm tree key structure.
204  */
205 IW_INLINE void _fsm_del_fbk2(FSM *impl, const FSMBK fbk) {
206 #ifndef NDEBUG
207   int s2, s1 = kb_size(impl->fsm);
208 #endif
209   kb_delp(fsm, impl->fsm, &fbk);
210 #ifndef NDEBUG
211   s2 = kb_size(impl->fsm);
212   assert(s2 < s1);
213 #endif
214   if (FSMBK_OFFSET(&fbk) == impl->lfbkoff) {
215     impl->lfbkoff = 0;
216     impl->lfbklen = 0;
217   }
218 }
219 
220 /**
221  * @brief Register free space block in the fsm tree.
222  * @param impl `FSM`
223  * @param offset_blk Offset block number
224  * @param length_blk Number of blocks
225  */
226 IW_INLINE iwrc _fsm_put_fbk(FSM *impl, uint64_t offset_blk, uint64_t length_blk) {
227   FSMBK fbk;
228   assert(length_blk);
229   iwrc rc = _fsm_init_fbk(&fbk, offset_blk, length_blk);
230   RCRET(rc);
231   kb_putp(fsm, impl->fsm, &fbk);
232   if (offset_blk + length_blk >= impl->lfbkoff + impl->lfbklen) {
233     impl->lfbkoff = offset_blk;
234     impl->lfbklen = length_blk;
235   }
236   return 0;
237 }
238 
239 /**
240  * @brief Get the nearest free-space block.
241  * @param impl `FSM`
242  * @param offset_blk Desired offset in number of blocks.
243  * @param length_blk Desired free area size specified in blocks.
244  * @param opts Allocation opts
245  * @return `0` if matching block is not found.
246  */
247 IW_INLINE FSMBK *_fsm_find_matching_fblock_lw(
248   FSM            *impl,
249   uint64_t        offset_blk,
250   uint64_t        length_blk,
251   iwfs_fsm_aflags opts) {
252   FSMBK k, *uk, *lk;
253   iwrc rc = _fsm_init_fbk(&k, offset_blk, length_blk);
254   if (rc) {
255     iwlog_ecode_error3(rc);
256     return 0;
257   }
258 
259   kb_intervalp(fsm, impl->fsm, &k, &lk, &uk);
260 
261   uint64_t lklength = lk ? FSMBK_LENGTH(lk) : 0;
262   uint64_t uklength = uk ? FSMBK_LENGTH(uk) : 0;
263   if (lklength == length_blk) {
264     return lk;
265   } else if (uklength == length_blk) {
266     return uk;
267   }
268   if (lklength > uklength) {
269     return (lklength > length_blk) ? lk : 0;
270   } else {
271     return (uklength > length_blk) ? uk : 0;
272   }
273 }
274 
275 /**
276  * @brief Set the allocation bits in the fsm bitmap.
277  *
278  * @param impl
279  * @param offset_bits Bit offset in the bitmap.
280  * @param length_bits Number of bits to set
281  * @param bit_status  If `1` bits will be set to `1` otherwise `0`
282  * @param opts        Operation options
283  */
284 static iwrc _fsm_set_bit_status_lw(
285   FSM               *impl,
286   const uint64_t     offset_bits,
287   const uint64_t     length_bits_,
288   const int          bit_status,
289   const fsm_bmopts_t opts) {
290 
291   iwrc rc;
292   size_t sp;
293   uint8_t *mm;
294   register int64_t length_bits = length_bits_;
295   register uint64_t *p, set_mask;
296   uint64_t bend = offset_bits + length_bits;
297   int set_bits;
298 
299   if (bend < offset_bits) { // overflow
300     return IW_ERROR_OUT_OF_BOUNDS;
301   }
302   assert(impl->bmlen * 8 >= offset_bits + length_bits);
303   if (impl->bmlen * 8 < offset_bits + length_bits) {
304     return IWFS_ERROR_FSM_SEGMENTATION;
305   }
306   if (impl->mmap_all) {
307     rc = impl->pool.probe_mmap(&impl->pool, 0, &mm, &sp);
308     RCRET(rc);
309     if (sp < impl->bmoff + impl->bmlen) {
310       return IWFS_ERROR_NOT_MMAPED;
311     } else {
312       mm += impl->bmoff;
313     }
314   } else {
315     rc = impl->pool.probe_mmap(&impl->pool, impl->bmoff, &mm, &sp);
316     RCRET(rc);
317     if (sp < impl->bmlen) {
318       return IWFS_ERROR_NOT_MMAPED;
319     }
320   }
321   p = ((uint64_t*) mm) + offset_bits / 64;
322   set_bits = 64 - (offset_bits & (64 - 1)); // NOLINT
323   set_mask = (~((uint64_t) 0) << (offset_bits & (64 - 1)));
324 
325 #ifdef IW_BIGENDIAN
326   while (length_bits - set_bits >= 0) {
327     uint64_t pv = *p;
328     pv = IW_ITOHLL(pv);
329     if (bit_status) {
330       if ((opts & FSM_BM_STRICT) && (pv & set_mask)) {
331         rc = IWFS_ERROR_FSM_SEGMENTATION;
332       }
333       if ((opts & FSM_BM_DRY_RUN) == 0) {
334         pv |= set_mask;
335         *p = IW_HTOILL(pv);
336       }
337     } else {
338       if ((opts & FSM_BM_STRICT) && ((pv & set_mask) != set_mask)) {
339         rc = IWFS_ERROR_FSM_SEGMENTATION;
340       }
341       if ((opts & FSM_BM_DRY_RUN) == 0) {
342         pv &= ~set_mask;
343         *p = IW_HTOILL(pv);
344       }
345     }
346     length_bits -= set_bits;
347     set_bits = 64;
348     set_mask = ~((uint64_t) 0);
349     ++p;
350   }
351   if (length_bits) {
352     uint64_t pv = *p;
353     pv = IW_ITOHLL(pv);
354     set_mask &= (bend & (64 - 1)) ? ((((uint64_t) 1) << (bend & (64 - 1))) - 1) : ~((uint64_t) 0);
355     if (bit_status) {
356       if ((opts & FSM_BM_STRICT) && (pv & set_mask)) {
357         rc = IWFS_ERROR_FSM_SEGMENTATION;
358       }
359       if ((opts & FSM_BM_DRY_RUN) == 0) {
360         pv |= set_mask;
361         *p = IW_HTOILL(pv);
362       }
363     } else {
364       if ((opts & FSM_BM_STRICT) && ((pv & set_mask) != set_mask)) {
365         rc = IWFS_ERROR_FSM_SEGMENTATION;
366       }
367       if ((opts & FSM_BM_DRY_RUN) == 0) {
368         pv &= ~set_mask;
369         *p = IW_HTOILL(pv);
370       }
371     }
372   }
373 #else
374   while (length_bits - set_bits >= 0) {
375     if (bit_status) {
376       if ((opts & FSM_BM_STRICT) && (*p & set_mask)) {
377         rc = IWFS_ERROR_FSM_SEGMENTATION;
378       }
379       if ((opts & FSM_BM_DRY_RUN) == 0) {
380         *p |= set_mask;
381       }
382     } else {
383       if ((opts & FSM_BM_STRICT) && ((*p & set_mask) != set_mask)) {
384         rc = IWFS_ERROR_FSM_SEGMENTATION;
385       }
386       if ((opts & FSM_BM_DRY_RUN) == 0) {
387         *p &= ~set_mask;
388       }
389     }
390     length_bits -= set_bits;
391     set_bits = 64;
392     set_mask = ~((uint64_t) 0);
393     ++p;
394   }
395   if (length_bits) {
396     set_mask &= (bend & (64 - 1)) ? ((((uint64_t) 1) << (bend & (64 - 1))) - 1) : ~((uint64_t) 0);
397     if (bit_status) {
398       if ((opts & FSM_BM_STRICT) && (*p & set_mask)) {
399         rc = IWFS_ERROR_FSM_SEGMENTATION;
400       }
401       if ((opts & FSM_BM_DRY_RUN) == 0) {
402         *p |= set_mask;
403       }
404     } else {
405       if ((opts & FSM_BM_STRICT) && ((*p & set_mask) != set_mask)) {
406         rc = IWFS_ERROR_FSM_SEGMENTATION;
407       }
408       if ((opts & FSM_BM_DRY_RUN) == 0) {
409         *p &= ~set_mask;
410       }
411     }
412   }
413 #endif
414   if (!rc && impl->dlsnr) {
415     uint64_t so = offset_bits / 8;
416     uint64_t lb = length_bits_ + offset_bits % 8;
417     uint64_t dl = lb / 8;
418     if (lb % 8) {
419       ++dl;
420     }
421     rc = impl->dlsnr->onwrite(impl->dlsnr, impl->bmoff + so, mm + so, dl, 0);
422   }
423   return rc;
424 }
425 
426 /**
427  *  @brief Allocate a continuous segment of blocks with page aligned offset.
428  *
429  *  @param impl `FSM`
430  *  @param length_blk Desired segment length in blocks.
431  *  @param [in,out] offset_blk Allocated segment offset in blocks will be stored into.
432                     It also specified the desired segment offset to provide
433  *                  allocation locality.
434  *  @param [out] olength_blk Assigned segment length in blocks.
435  *  @param  max_offset_blk Maximal offset of allocated block.
436  *  @param opts Allocation options.
437  */
438 static iwrc _fsm_blk_allocate_aligned_lw(
439   FSM                  *impl,
440   const uint64_t        length_blk,
441   uint64_t             *offset_blk,
442   uint64_t             *olength_blk,
443   const uint64_t        max_offset_blk,
444   const iwfs_fsm_aflags opts) {
445   FSMBK *nk;
446   fsm_bmopts_t bopts = FSM_BM_NONE;
447   size_t aunit_blk = (impl->aunit >> impl->bpow);
448 
449   assert(impl && impl->fsm && length_blk > 0);
450   if (impl->oflags & IWFSM_STRICT) {
451     bopts |= FSM_BM_STRICT;
452   }
453   *olength_blk = 0;
454   *offset_blk = 0;
455 
456   /* First attempt */
457   nk = _fsm_find_matching_fblock_lw(impl, 0, length_blk + aunit_blk, opts);
458   if (!nk) {
459     nk = _fsm_find_matching_fblock_lw(impl, 0, length_blk, opts);
460     if (!nk) {
461       return IWFS_ERROR_NO_FREE_SPACE;
462     }
463   }
464   uint64_t akoff = FSMBK_OFFSET(nk);
465   uint64_t aklen = FSMBK_LENGTH(nk);
466   uint64_t noff = IW_ROUNDUP(akoff, aunit_blk);
467 
468   if ((noff <= max_offset_blk) && (noff < aklen + akoff) && (aklen - (noff - akoff) >= length_blk)) {
469     _fsm_del_fbk(impl, akoff, aklen);
470     aklen = aklen - (noff - akoff);
471     if (noff > akoff) {
472       _fsm_put_fbk(impl, akoff, noff - akoff);
473     }
474     if (aklen > length_blk) {
475       _fsm_put_fbk(impl, noff + length_blk, aklen - length_blk);
476     }
477     *offset_blk = noff;
478     *olength_blk = length_blk;
479     return _fsm_set_bit_status_lw(impl, noff, length_blk, 1, bopts);
480   }
481 
482   aklen = 0;
483   akoff = UINT64_MAX;
484   /* full scan */
485 #define _fsm_traverse(k)                                                                                     \
486   {                                                                                                          \
487     uint64_t koff = FSMBK_OFFSET(k);                                                                         \
488     uint64_t klen = FSMBK_LENGTH(k);                                                                         \
489     if (koff < akoff) {                                                                                      \
490       noff = IW_ROUNDUP(koff, aunit_blk);                                                                    \
491       if (noff <= max_offset_blk && (noff < klen + koff) && (klen - (noff - koff) >= length_blk)) {          \
492         akoff = koff;                                                                                        \
493         aklen = klen;                                                                                        \
494       }                                                                                                      \
495     }                                                                                                        \
496   }
497   //-V:__kb_traverse:576, 701, 619, 769, 522
498   __kb_traverse(FSMBK, impl->fsm, _fsm_traverse);
499 #undef _fsm_traverse
500 
501   if (akoff == UINT64_MAX) {
502     return IWFS_ERROR_NO_FREE_SPACE;
503   }
504   _fsm_del_fbk(impl, akoff, aklen);
505   noff = IW_ROUNDUP(akoff, aunit_blk);
506   aklen = aklen - (noff - akoff);
507   if (noff > akoff) {
508     _fsm_put_fbk(impl, akoff, noff - akoff);
509   }
510   if (aklen > length_blk) {
511     _fsm_put_fbk(impl, noff + length_blk, aklen - length_blk);
512   }
513   *offset_blk = noff;
514   *olength_blk = length_blk;
515   return _fsm_set_bit_status_lw(impl, noff, length_blk, 1, bopts);
516 }
517 
518 /**
519  * @brief Load existing bitmap area into free-space search tree.
520  * @param impl  `FSM`
521  * @param bm    Bitmap area start ptr
522  * @param len   Bitmap area length in bytes.
523  */
524 static void _fsm_load_fsm_lw(FSM *impl, const uint8_t *bm, uint64_t len) {
525   uint64_t cbnum = 0, fbklength = 0, fbkoffset = 0;
526   if (impl->fsm) {
527     // -V:kb_destroy:701, 769
528     kb_destroy(fsm, impl->fsm);
529   }
530   impl->fsm = kb_init(fsm, KB_DEFAULT_SIZE);
531   for (uint64_t b = 0; b < len; ++b) {
532     register uint8_t bb = bm[b];
533     if (bb == 0) {
534       fbklength += 8;
535       cbnum += 8;
536     } else if (bb == 0xffU) {
537       if (fbklength) {
538         fbkoffset = cbnum - fbklength;
539         _fsm_put_fbk(impl, fbkoffset, fbklength);
540         fbklength = 0;
541       }
542       cbnum += 8;
543     } else {
544       for (int i = 0; i < 8; ++i, ++cbnum) {
545         if (bb & (1U << i)) {
546           if (fbklength) {
547             fbkoffset = cbnum - fbklength;
548             _fsm_put_fbk(impl, fbkoffset, fbklength);
549             fbklength = 0;
550           }
551         } else {
552           ++fbklength;
553         }
554       }
555     }
556   }
557   if (fbklength > 0) {
558     fbkoffset = len * 8 - fbklength;
559     _fsm_put_fbk(impl, fbkoffset, fbklength);
560   }
561 }
562 
563 /**
564  * @brief Flush a current `iwfsmfile` metadata into the file header.
565  * @param impl
566  * @param is_sync If `1` perform mmap sync.
567  * @return
568  */
569 static iwrc _fsm_write_meta_lw(FSM *impl) {
570   uint64_t llv;
571   size_t wlen;
572   uint32_t sp = 0, lv;
573   uint8_t hdr[IWFSM_CUSTOM_HDR_DATA_OFFSET] = { 0 };
574 
575   /*
576       [FSM_CTL_MAGICK u32][block pow u8]
577       [bmoffset u64][bmlength u64]
578       [u64 crzsum][u32 crznum][u64 crszvar][u256 reserved]
579       [custom header size u32][custom header data...]
580       [fsm data...]
581    */
582 
583   /* magic */
584   lv = IW_HTOIL(IWFSM_MAGICK);
585   assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
586   memcpy(hdr + sp, &lv, sizeof(lv));
587   sp += sizeof(lv);
588 
589   /* block pow */
590   static_assert(sizeof(impl->bpow) == 1, "sizeof(impl->bpow) == 1");
591   assert(sp + sizeof(impl->bpow) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
592   memcpy(hdr + sp, &impl->bpow, sizeof(impl->bpow));
593   sp += sizeof(impl->bpow);
594 
595   /* fsm bitmap block offset */
596   llv = impl->bmoff;
597   llv = IW_HTOILL(llv);
598   assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
599   memcpy(hdr + sp, &llv, sizeof(llv));
600   sp += sizeof(llv);
601 
602   /* fsm bitmap block length */
603   llv = impl->bmlen;
604   llv = IW_HTOILL(llv);
605   assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
606   memcpy(hdr + sp, &llv, sizeof(llv));
607   sp += sizeof(llv);
608 
609   /* Cumulative sum of record sizes acquired by `allocate` */
610   llv = impl->crzsum;
611   llv = IW_HTOILL(llv);
612   assert(sp + sizeof(llv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
613   memcpy(hdr + sp, &llv, sizeof(llv));
614   sp += sizeof(llv);
615 
616   /* Cumulative number of records acquired by `allocated` */
617   lv = impl->crznum;
618   lv = IW_HTOIL(lv);
619   assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
620   memcpy(hdr + sp, &lv, sizeof(lv));
621   sp += sizeof(lv);
622 
623   /* Record sizes standard variance (deviation^2 * N) */
624   llv = impl->crzvar;
625   llv = IW_HTOILL(llv);
626   assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
627   memcpy(hdr + sp, &llv, sizeof(llv));
628   sp += sizeof(llv);
629 
630   /* Reserved */
631   sp += 32;
632 
633   /* Size of header */
634   lv = impl->hdrlen;
635   lv = IW_HTOIL(lv);
636   assert(sp + sizeof(lv) <= IWFSM_CUSTOM_HDR_DATA_OFFSET);
637   memcpy(hdr + sp, &lv, sizeof(lv));
638   sp += sizeof(lv);
639 
640   assert(sp == IWFSM_CUSTOM_HDR_DATA_OFFSET);
641   return impl->pool.write(&impl->pool, 0, hdr, IWFSM_CUSTOM_HDR_DATA_OFFSET, &wlen);
642 }
643 
644 /**
645  * @brief Search for the first next set bit position
646  *        starting from the specified offset bit (INCLUDED).
647  */
648 static uint64_t _fsm_find_next_set_bit(
649   const uint64_t   *addr,
650   register uint64_t offset_bit,
651   const uint64_t    max_offset_bit,
652   int              *found) {
653   *found = 0;
654   register uint64_t bit, size;
655   register const uint64_t *p = addr + offset_bit / 64;
656   if (offset_bit >= max_offset_bit) {
657     return 0;
658   }
659   bit = offset_bit & (64 - 1);
660   offset_bit -= bit;
661   size = max_offset_bit - offset_bit;
662 
663 #ifdef IW_BIGENDIAN
664   uint64_t pv = *p;
665   if (bit) {
666     pv = IW_ITOHLL(pv) & (~((uint64_t) 0) << bit);
667     if (pv) {
668       pv = iwbits_find_first_sbit64(pv);
669       if (pv >= size) {
670         return 0;
671       } else {
672         *found = 1;
673         return offset_bit + pv;
674       }
675     }
676     if (size <= 64) {
677       return 0;
678     }
679     offset_bit += 64;
680     size -= 64;
681     ++p;
682   }
683   while (size & ~(64 - 1)) {
684     pv = *(p++);
685     if (pv) {
686       *found = 1;
687       return offset_bit + iwbits_find_first_sbit64(IW_ITOHLL(pv));
688     }
689     offset_bit += 64;
690     size -= 64;
691   }
692   if (!size) {
693     return 0;
694   }
695   pv = *p;
696   pv = IW_ITOHLL(pv) & (~((uint64_t) 0) >> (64 - size));
697   if (pv) {
698     *found = 1;
699     return offset_bit + iwbits_find_first_sbit64(pv);
700   } else {
701     return 0;
702   }
703 #else
704   register uint64_t tmp;
705   if (bit) {
706     tmp = *p & (~((uint64_t) 0) << bit);
707     if (tmp) {
708       tmp = iwbits_find_first_sbit64(tmp);
709       if (tmp >= size) {
710         return 0;
711       } else {
712         *found = 1;
713         return offset_bit + tmp;
714       }
715     }
716     if (size <= 64) {
717       return 0;
718     }
719     offset_bit += 64;
720     size -= 64;
721     ++p;
722   }
723   while (size & ~(64 - 1)) {
724     if ((tmp = *(p++))) {
725       *found = 1;
726       return offset_bit + iwbits_find_first_sbit64(tmp);
727     }
728     offset_bit += 64;
729     size -= 64;
730   }
731   if (!size) {
732     return 0;
733   }
734   tmp = (*p) & (~((uint64_t) 0) >> (64 - size));
735   if (tmp) {
736     *found = 1;
737     return offset_bit + iwbits_find_first_sbit64(tmp);
738   } else {
739     return 0;
740   }
741 #endif
742 }
743 
744 /**
745  * @brief Search for the first previous set bit position
746  *        starting from the specified offset_bit (EXCLUDED).
747  */
748 static uint64_t _fsm_find_prev_set_bit(
749   const uint64_t   *addr,
750   register uint64_t offset_bit,
751   const uint64_t    min_offset_bit,
752   int              *found) {
753   register const uint64_t *p;
754   register uint64_t tmp, bit, size;
755   *found = 0;
756   if (min_offset_bit >= offset_bit) {
757     return 0;
758   }
759   size = offset_bit - min_offset_bit;
760   bit = offset_bit & (64 - 1);
761   p = addr + offset_bit / 64;
762 
763 #ifdef IW_BIGENDIAN
764   uint64_t pv;
765   if (bit) {
766     pv = *p;
767     pv = (iwbits_reverse_64(IW_ITOHLL(pv)) >> (64 - bit));
768     if (pv) {
769       pv = iwbits_find_first_sbit64(pv);
770       if (pv >= size) {
771         return 0;
772       } else {
773         *found = 1;
774         assert(offset_bit > pv);
775         return offset_bit > pv ? offset_bit - pv - 1 : 0;
776       }
777     }
778     offset_bit -= bit;
779     size -= bit;
780   }
781   while (size & ~(64 - 1)) {
782     if (*(--p)) {
783       pv = *p;
784       *found = 1;
785       tmp = iwbits_find_first_sbit64(iwbits_reverse_64(IW_ITOHLL(pv)));
786       assert(offset_bit > tmp);
787       return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
788     }
789     offset_bit -= 64;
790     size -= 64;
791   }
792   if (size == 0) {
793     return 0;
794   }
795   pv = *(--p);
796   tmp = iwbits_reverse_64(IW_ITOHLL(pv)) & ((((uint64_t) 1) << size) - 1);
797 #else
798   if (bit) {
799     tmp = (iwbits_reverse_64(*p) >> (64 - bit));
800     if (tmp) {
801       tmp = iwbits_find_first_sbit64(tmp);
802       if (tmp >= size) {
803         return 0;
804       } else {
805         *found = 1;
806         assert(offset_bit > tmp);
807         return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
808       }
809     }
810     offset_bit -= bit;
811     size -= bit;
812   }
813   while (size & ~(64 - 1)) {
814     if (*(--p)) {
815       *found = 1;
816       tmp = iwbits_find_first_sbit64(iwbits_reverse_64(*p));
817       assert(offset_bit > tmp);
818       return offset_bit > tmp ? offset_bit - tmp - 1 : 0;
819     }
820     offset_bit -= 64;
821     size -= 64;
822   }
823   if (size == 0) {
824     return 0;
825   }
826   tmp = iwbits_reverse_64(*(--p)) & ((((uint64_t) 1) << size) - 1);
827 #endif
828   if (tmp) {
829     uint64_t tmp2;
830     *found = 1;
831     tmp2 = iwbits_find_first_sbit64(tmp);
832     assert(offset_bit > tmp2);
833     return offset_bit > tmp2 ? offset_bit - tmp2 - 1 : 0;
834   } else {
835     return 0;
836   }
837 }
838 
839 /**
840  * @brief Return a previously allocated blocks
841  *        back into the free-blocks pool.
842  *
843  * @param impl `FSM`
844  * @param offset_blk Starting block number of the specified range.
845  * @param length_blk Range size in blocks.
846  */
847 static iwrc _fsm_blk_deallocate_lw(
848   FSM           *impl,
849   const uint64_t offset_blk,
850   const uint64_t length_blk) {
851   iwrc rc;
852   uint64_t *bmptr;
853   uint64_t left, right;
854   int hasleft = 0, hasright = 0;
855   uint64_t key_offset = offset_blk, key_length = length_blk;
856   uint64_t rm_offset = 0, rm_length = 0;
857   uint64_t lfbkoff = impl->lfbkoff;
858   uint64_t end_offset_blk = offset_blk + length_blk;
859   fsm_bmopts_t bopts = FSM_BM_NONE;
860 
861 
862   if (impl->oflags & IWFSM_STRICT) {
863     bopts |= FSM_BM_STRICT;
864   }
865   rc = _fsm_set_bit_status_lw(impl, offset_blk, length_blk, 0, bopts);
866   RCRET(rc);
867 
868   rc = _fsm_bmptr(impl, &bmptr);
869   RCRET(rc);
870 
871   /* Merge with neighborhoods */
872   left = _fsm_find_prev_set_bit(bmptr, offset_blk, 0, &hasleft);
873   if (lfbkoff && (lfbkoff == end_offset_blk)) {
874     right = lfbkoff + impl->lfbklen;
875     hasright = 1;
876   } else {
877     uint64_t maxoff = lfbkoff ? lfbkoff : (impl->bmlen << 3);
878     right = _fsm_find_next_set_bit(bmptr, end_offset_blk, maxoff, &hasright);
879   }
880 
881   if (hasleft) {
882     if (offset_blk > left + 1) {
883       left += 1;
884       rm_offset = left;
885       rm_length = offset_blk - left;
886       IWRC(_fsm_del_fbk(impl, rm_offset, rm_length), rc);
887       key_offset = rm_offset;
888       key_length += rm_length;
889     }
890   } else if (offset_blk > 0) { /* zero start */
891     rm_offset = 0;
892     rm_length = offset_blk;
893     IWRC(_fsm_del_fbk(impl, rm_offset, rm_length), rc);
894     key_offset = rm_offset;
895     key_length += rm_length;
896   }
897   if (hasright && (right > end_offset_blk)) {
898     rm_offset = end_offset_blk;
899     rm_length = right - end_offset_blk;
900     _fsm_del_fbk(impl, rm_offset, rm_length);
901     key_length += rm_length;
902   }
903   IWRC(_fsm_put_fbk(impl, key_offset, key_length), rc);
904   return rc;
905 }
906 
907 /**
908  * @brief Initialize a new free-space bitmap area.
909  *
910  * If bitmap exists, its content will be moved into newly created area.
911  * Blocks from the previous bitmap are will disposed and deallocated.
912  *
913  * @param impl `FSM`
914  * @param bmoff Byte offset of the new bitmap. Value must be page aligned.
915  * @param bmlen Byte length of the new bitmap. Value must be page aligned.
916                 Its length must not be lesser than length of old bitmap.
917  */
918 static iwrc _fsm_init_lw(FSM *impl, uint64_t bmoff, uint64_t bmlen) {
919   iwrc rc;
920   uint8_t *mm, *mm2;
921   size_t sp, sp2;
922   uint64_t old_bmoff, old_bmlen;
923   IWFS_EXT *pool = &impl->pool;
924 
925   if ((bmlen & ((1U << impl->bpow) - 1)) || (bmoff & ((1U << impl->bpow) - 1)) || (bmoff & (impl->aunit - 1))) {
926     return IWFS_ERROR_RANGE_NOT_ALIGNED;
927   }
928   if (bmlen < impl->bmlen) {
929     rc = IW_ERROR_INVALID_ARGS;
930     iwlog_ecode_error(rc, "Length of the newly initiated bitmap area (bmlen): %" PRIu64
931                       " must not be lesser than the current bitmap area length %" PRIu64 "",
932                       bmlen, impl->bmlen);
933     return rc;
934   }
935   if (bmlen * 8 < ((bmoff + bmlen) >> impl->bpow) + 1) {
936     rc = IW_ERROR_INVALID_ARGS;
937     iwlog_ecode_error(rc, "Length of the newly initiated bitmap area (bmlen): %" PRIu64
938                       " is not enough to handle bitmap itself and the file header area.",
939                       bmlen);
940     return rc;
941   }
942   rc = _fsm_ensure_size_lw(impl, bmoff + bmlen);
943   RCRET(rc);
944   if (impl->mmap_all) {
945     // get mmap area without locking, since we ensured what pool file will not be remapped
946     rc = pool->probe_mmap(pool, 0, &mm, &sp);
947     RCRET(rc);
948     if (sp < bmoff + bmlen) {
949       return IWFS_ERROR_NOT_MMAPED;
950     } else {
951       mm += bmoff;
952     }
953   } else {
954     // get mmap area without locking, since we ensured what pool file will not be remapped
955     rc = pool->probe_mmap(pool, bmoff, &mm, &sp);
956     RCRET(rc);
957     if (sp < bmlen) {
958       return IWFS_ERROR_NOT_MMAPED;
959     }
960   }
961   if (impl->bmlen) {
962     /* We have an old active bitmap. Lets copy its content to the new location.*/
963     if (IW_RANGES_OVERLAP(impl->bmoff, impl->bmoff + impl->bmlen, bmoff, bmoff + bmlen)) {
964       iwlog_ecode_error2(rc, "New and old bitmap areas are overlaped");
965       return IW_ERROR_INVALID_ARGS;
966     }
967     if (impl->mmap_all) {
968       mm2 = mm - bmoff + impl->bmoff;
969     } else {
970       rc = pool->probe_mmap(pool, impl->bmoff, &mm2, &sp2);
971       if (!rc && (sp2 < impl->bmlen)) {
972         rc = IWFS_ERROR_NOT_MMAPED;
973       }
974       if (rc) {
975         iwlog_ecode_error2(rc, "Old bitmap area is not mmaped");
976         return rc;
977       }
978     }
979     assert(!((impl->bmlen - bmlen) & ((1U << impl->bpow) - 1)));
980     if (impl->dlsnr) {
981       rc = impl->dlsnr->onwrite(impl->dlsnr, bmoff, mm2, impl->bmlen, 0);
982       RCRET(rc);
983     }
984     memcpy(mm, mm2, impl->bmlen);
985     if (bmlen > impl->bmlen) {
986       memset(mm + impl->bmlen, 0, bmlen - impl->bmlen);
987       if (impl->dlsnr) {
988         rc = impl->dlsnr->onset(impl->dlsnr, bmoff + impl->bmlen, 0, bmlen - impl->bmlen, 0);
989         RCRET(rc);
990       }
991     }
992   } else {
993     mm2 = 0;
994     memset(mm, 0, bmlen);
995     if (impl->dlsnr) {
996       rc = impl->dlsnr->onset(impl->dlsnr, bmoff, 0, bmlen, 0);
997       RCRET(rc);
998     }
999   }
1000 
1001   /* Backup the previous bitmap range */
1002   old_bmlen = impl->bmlen;
1003   old_bmoff = impl->bmoff;
1004   impl->bmoff = bmoff;
1005   impl->bmlen = bmlen;
1006 
1007   rc = _fsm_set_bit_status_lw(impl, (bmoff >> impl->bpow), (bmlen >> impl->bpow), 1, FSM_BM_NONE);
1008   RCGO(rc, rollback);
1009   if (!old_bmlen) { /* First time initialization */
1010     /* Header allocation */
1011     rc = _fsm_set_bit_status_lw(impl, 0, (impl->hdrlen >> impl->bpow), 1, FSM_BM_NONE);
1012     RCGO(rc, rollback);
1013   }
1014 
1015   /* Reload fsm tree */
1016   _fsm_load_fsm_lw(impl, mm, bmlen);
1017 
1018   /* Flush new meta */
1019   rc = _fsm_write_meta_lw(impl);
1020   RCGO(rc, rollback);
1021 
1022   rc = pool->sync(pool, IWFS_FDATASYNC);
1023   RCGO(rc, rollback);
1024 
1025   if (old_bmlen) {
1026     /* Now we are save to deallocate the old bitmap */
1027     rc = _fsm_blk_deallocate_lw(impl, (old_bmoff >> impl->bpow), (old_bmlen >> impl->bpow));
1028     if (!impl->mmap_all) {
1029       pool->remove_mmap(pool, old_bmoff);
1030     }
1031   }
1032   return rc;
1033 
1034 rollback:
1035   /* try to rollback previous bitmap state */
1036   impl->bmoff = old_bmoff;
1037   impl->bmlen = old_bmlen;
1038   if (old_bmlen && mm2) {
1039     _fsm_load_fsm_lw(impl, mm2, old_bmlen);
1040   }
1041   pool->sync(pool, IWFS_FDATASYNC);
1042   return rc;
1043 }
1044 
1045 /**
1046  * @brief Resize bitmap area.
1047  * @param impl `FSM`
1048  * @param size New size of bitmap area in bytes.
1049  */
1050 static iwrc _fsm_resize_fsm_bitmap_lw(FSM *impl, uint64_t size) {
1051   iwrc rc;
1052   uint64_t bmoffset = 0, bmlen, sp;
1053   IWFS_EXT *pool = &impl->pool;
1054 
1055   if (impl->bmlen >= size) {
1056     return 0;
1057   }
1058   bmlen = IW_ROUNDUP(size, impl->aunit); /* align to the system page size. */
1059   rc = _fsm_blk_allocate_aligned_lw(
1060     impl, (bmlen >> impl->bpow), &bmoffset, &sp, UINT64_MAX,
1061     IWFSM_ALLOC_NO_STATS | IWFSM_ALLOC_NO_EXTEND | IWFSM_ALLOC_NO_OVERALLOCATE);
1062   if (!rc) {
1063     bmoffset = bmoffset << impl->bpow;
1064     bmlen = sp << impl->bpow;
1065   } else if (rc == IWFS_ERROR_NO_FREE_SPACE) {
1066     bmoffset = impl->bmlen * (1 << impl->bpow) * 8;
1067     bmoffset = IW_ROUNDUP(bmoffset, impl->aunit);
1068   }
1069   if (!impl->mmap_all) {
1070     rc = pool->add_mmap(pool, bmoffset, bmlen, impl->mmap_opts);
1071     RCRET(rc);
1072   }
1073   rc = _fsm_init_lw(impl, bmoffset, bmlen);
1074   if (rc && !impl->mmap_all) {
1075     pool->remove_mmap(pool, bmoffset);
1076   }
1077   return rc;
1078 }
1079 
1080 /**
1081  * @brief Allocate a continuous segment of blocks.
1082  *
1083  * @param impl `FSM`
1084  * @param length_blk Desired segment length in blocks.
1085  * @param [in,out] offset_blk Allocated segment offset in blocks will be stored into.
1086  *                It also specified the desired segment offset to provide allocation locality.
1087  * @param [out] olength_blk Assigned segment length in blocks.
1088  * @param opts
1089  */
1090 static iwrc _fsm_blk_allocate_lw(
1091   FSM            *impl,
1092   uint64_t        length_blk,
1093   uint64_t       *offset_blk,
1094   uint64_t       *olength_blk,
1095   iwfs_fsm_aflags opts) {
1096   iwrc rc;
1097   FSMBK *nk;
1098   fsm_bmopts_t bopts = FSM_BM_NONE;
1099 
1100   if (opts & IWFSM_ALLOC_PAGE_ALIGNED) {
1101     while (1) {
1102       rc = _fsm_blk_allocate_aligned_lw(impl, length_blk, offset_blk, olength_blk, UINT64_MAX, opts);
1103       if (rc == IWFS_ERROR_NO_FREE_SPACE) {
1104         if (opts & IWFSM_ALLOC_NO_EXTEND) {
1105           return IWFS_ERROR_NO_FREE_SPACE;
1106         }
1107         rc = _fsm_resize_fsm_bitmap_lw(impl, impl->bmlen << 1);
1108         RCRET(rc);
1109         continue;
1110       }
1111       if (!rc && (opts & IWFSM_SOLID_ALLOCATED_SPACE)) {
1112         uint64_t bs = *offset_blk;
1113         int64_t bl = *olength_blk;
1114         rc = _fsm_ensure_size_lw(impl, (bs << impl->bpow) + (bl << impl->bpow));
1115       }
1116       return rc;
1117     }
1118   }
1119 
1120   *olength_blk = length_blk;
1121 
1122 start:
1123   nk = _fsm_find_matching_fblock_lw(impl, *offset_blk, length_blk, opts);
1124   if (nk) { /* use existing free space block */
1125     uint64_t nlength = FSMBK_LENGTH(nk);
1126     *offset_blk = FSMBK_OFFSET(nk);
1127     assert(kb_get(fsm, impl->fsm, *nk));
1128 
1129     _fsm_del_fbk2(impl, *nk);
1130 
1131     if (nlength > length_blk) { /* re-save rest of free-space */
1132       if (!(opts & IWFSM_ALLOC_NO_OVERALLOCATE) && impl->crznum) {
1133         /* todo use lognormal distribution? */
1134         double_t d = ((double_t) impl->crzsum / (double_t) impl->crznum)        /*avg*/
1135                      - (nlength - length_blk);                                  /*rest blk size*/
1136         double_t s = ((double_t) impl->crzvar / (double_t) impl->crznum) * 6.0; /* blk size dispersion * 6 */
1137         if ((s > 1) && (d > 0) && (d * d > s)) {
1138           /* its better to attach rest of block to
1139              the record */
1140           *olength_blk = nlength;
1141         } else {
1142           _fsm_put_fbk(impl, (*offset_blk + length_blk), (nlength - length_blk));
1143         }
1144       } else {
1145         _fsm_put_fbk(impl, (*offset_blk + length_blk), (nlength - length_blk));
1146       }
1147     }
1148   } else {
1149     if (opts & IWFSM_ALLOC_NO_EXTEND) {
1150       return IWFS_ERROR_NO_FREE_SPACE;
1151     }
1152     rc = _fsm_resize_fsm_bitmap_lw(impl, impl->bmlen << 1);
1153     RCRET(rc);
1154     goto start;
1155   }
1156 
1157   if (impl->oflags & IWFSM_STRICT) {
1158     bopts |= FSM_BM_STRICT;
1159   }
1160   rc = _fsm_set_bit_status_lw(impl, *offset_blk, *olength_blk, 1, bopts);
1161   if (!rc && !(opts & IWFSM_ALLOC_NO_STATS)) {
1162     double_t avg;
1163     /* Update allocation statistics */
1164     if (impl->crznum > FSM_MAX_STATS_COUNT) {
1165       impl->crznum = 0;
1166       impl->crzsum = 0;
1167       impl->crzvar = 0;
1168     }
1169     ++impl->crznum;
1170     impl->crzsum += length_blk;
1171     avg = (double_t) impl->crzsum / (double_t) impl->crznum; /* average */
1172     impl->crzvar
1173       += (uint64_t) (((double_t) length_blk - avg) * ((double_t) length_blk - avg) + 0.5L); /* variance */
1174   }
1175   if (!rc && (opts & IWFSM_SOLID_ALLOCATED_SPACE)) {
1176     uint64_t bs = *offset_blk;
1177     int64_t bl = *olength_blk;
1178     rc = _fsm_ensure_size_lw(impl, (bs << impl->bpow) + (bl << impl->bpow));
1179   }
1180   if (!rc && (opts & IWFSM_SYNC_BMAP)) {
1181     uint64_t *bmptr;
1182     if (!_fsm_bmptr(impl, &bmptr)) {
1183       IWFS_EXT *pool = &impl->pool;
1184       rc = pool->sync_mmap(pool, impl->bmoff, IWFS_SYNCDEFAULT);
1185     }
1186   }
1187   return rc;
1188 }
1189 
1190 /**
1191  * @brief Remove all free blocks from the and of file and trim its size.
1192  */
1193 static iwrc _fsm_trim_tail_lw(FSM *impl) {
1194   iwrc rc;
1195   int hasleft;
1196   uint64_t length, lastblk, *bmptr;
1197   IWFS_EXT_STATE fstate;
1198   uint64_t offset = 0;
1199 
1200   if (!(impl->omode & IWFS_OWRITE)) {
1201     return 0;
1202   }
1203   /* find free space for fsm with lesser offset than actual */
1204   rc = _fsm_blk_allocate_aligned_lw(
1205     impl, (impl->bmlen >> impl->bpow), &offset, &length, (impl->bmoff >> impl->bpow),
1206     IWFSM_ALLOC_NO_EXTEND | IWFSM_ALLOC_NO_OVERALLOCATE | IWFSM_ALLOC_NO_STATS);
1207 
1208   if (rc && (rc != IWFS_ERROR_NO_FREE_SPACE)) {
1209     return rc;
1210   }
1211   if (rc) {
1212     rc = 0;
1213   } else if ((offset << impl->bpow) < impl->bmoff) {
1214     offset = offset << impl->bpow;
1215     length = length << impl->bpow;
1216     assert(offset != impl->bmoff);
1217     impl->pool.add_mmap(&impl->pool, offset, length, impl->mmap_opts);
1218     rc = _fsm_init_lw(impl, offset, length);
1219     RCGO(rc, finish);
1220   } else {
1221     /* shoud never be reached */
1222     assert(0);
1223     rc = _fsm_blk_deallocate_lw(impl, offset, length);
1224     RCGO(rc, finish);
1225   }
1226 
1227   rc = _fsm_bmptr(impl, &bmptr); // -V519
1228   RCGO(rc, finish);
1229 
1230   lastblk = (impl->bmoff + impl->bmlen) >> impl->bpow;
1231   offset = _fsm_find_prev_set_bit(bmptr, (impl->bmlen << 3), lastblk, &hasleft);
1232   if (hasleft) {
1233     lastblk = offset + 1;
1234   }
1235   rc = impl->pool.state(&impl->pool, &fstate);
1236   if (!rc && (fstate.fsize > (lastblk << impl->bpow))) {
1237     rc = impl->pool.truncate(&impl->pool, lastblk << impl->bpow);
1238   }
1239 
1240 finish:
1241   return rc;
1242 }
1243 
1244 static iwrc _fsm_init_impl(FSM *impl, const IWFS_FSM_OPTS *opts) {
1245   impl->oflags = opts->oflags;
1246   impl->aunit = iwp_alloc_unit();
1247   impl->bpow = opts->bpow;
1248   impl->mmap_all = opts->mmap_all;
1249   if (!impl->bpow) {
1250     impl->bpow = 6;  // 64bit block
1251   } else if (impl->bpow > FSM_MAX_BLOCK_POW) {
1252     return IWFS_ERROR_INVALID_BLOCK_SIZE;
1253   } else if ((1U << impl->bpow) > impl->aunit) {
1254     return IWFS_ERROR_PLATFORM_PAGE;
1255   }
1256   return 0;
1257 }
1258 
1259 static iwrc _fsm_init_locks(FSM *impl, const IWFS_FSM_OPTS *opts) {
1260   if (opts->oflags & IWFSM_NOLOCKS) {
1261     impl->ctlrwlk = 0;
1262     return 0;
1263   }
1264   impl->ctlrwlk = calloc(1, sizeof(*impl->ctlrwlk));
1265   if (!impl->ctlrwlk) {
1266     return iwrc_set_errno(IW_ERROR_ALLOC, errno);
1267   }
1268   int rci = pthread_rwlock_init(impl->ctlrwlk, 0);
1269   if (rci) {
1270     free(impl->ctlrwlk);
1271     impl->ctlrwlk = 0;
1272     return iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci);
1273   }
1274   return 0;
1275 }
1276 
1277 static iwrc _fsm_destroy_locks(FSM *impl) {
1278   if (!impl->ctlrwlk) {
1279     return 0;
1280   }
1281   iwrc rc = 0;
1282   int rci = pthread_rwlock_destroy(impl->ctlrwlk);
1283   if (rci) {
1284     IWRC(iwrc_set_errno(IW_ERROR_THREADING_ERRNO, rci), rc);
1285   }
1286   free(impl->ctlrwlk);
1287   impl->ctlrwlk = 0;
1288   return rc;
1289 }
1290 
1291 static iwrc _fsm_read_meta_lr(FSM *impl) {
1292   iwrc rc;
1293   uint32_t lv;
1294   uint64_t llv;
1295   size_t sp, rp = 0;
1296   uint8_t hdr[IWFSM_CUSTOM_HDR_DATA_OFFSET] = { 0 };
1297 
1298   /*
1299       [FSM_CTL_MAGICK u32][block pow u8]
1300       [bmoffset u64][bmlength u64]
1301       [u64 crzsum][u32 crznum][u64 crszvar][u256 reserved]
1302       [custom header size u32][custom header data...]
1303       [fsm data...]
1304    */
1305 
1306   rc = impl->pool.read(&impl->pool, 0, hdr, IWFSM_CUSTOM_HDR_DATA_OFFSET, &sp);
1307   if (rc) {
1308     iwlog_ecode_error3(rc);
1309     return rc;
1310   }
1311 
1312   /* Magic */
1313   memcpy(&lv, hdr + rp, sizeof(lv)); // -V512
1314   lv = IW_ITOHL(lv);
1315   if (lv != IWFSM_MAGICK) {
1316     rc = IWFS_ERROR_INVALID_FILEMETA;
1317     iwlog_ecode_error2(rc, "Invalid file magic number");
1318     return rc;
1319   }
1320   rp += sizeof(lv);
1321 
1322   /* Block pow */
1323   memcpy(&impl->bpow, hdr + rp, sizeof(impl->bpow));
1324   rp += sizeof(impl->bpow);
1325 
1326   if (impl->bpow > FSM_MAX_BLOCK_POW) {
1327     rc = IWFS_ERROR_INVALID_FILEMETA;
1328     iwlog_ecode_error(rc, "Invalid file blocks pow: %u", impl->bpow);
1329     return rc;
1330   }
1331   if ((1U << impl->bpow) > impl->aunit) {
1332     rc = IWFS_ERROR_PLATFORM_PAGE;
1333     iwlog_ecode_error(rc, "Block size: %u must not be greater than system page size: %zu",
1334                       (1U << impl->bpow), impl->aunit);
1335   }
1336 
1337   /* Free-space bitmap offset */
1338   memcpy(&llv, hdr + rp, sizeof(llv));
1339   llv = IW_ITOHLL(llv);
1340   impl->bmoff = llv;
1341   rp += sizeof(llv);
1342 
1343   /* Free-space bitmap length */
1344   memcpy(&llv, hdr + rp, sizeof(llv));
1345   llv = IW_ITOHLL(llv);
1346   impl->bmlen = llv;
1347   if (llv & (64 - 1)) {
1348     rc = IWFS_ERROR_INVALID_FILEMETA;
1349     iwlog_ecode_error(rc, "Free-space bitmap length is not 64bit aligned: %" PRIuMAX "", impl->bmlen);
1350   }
1351   rp += sizeof(llv);
1352 
1353   /* Cumulative sum of record sizes acquired by `allocate` */
1354   memcpy(&llv, hdr + rp, sizeof(llv));
1355   llv = IW_ITOHLL(llv);
1356   impl->crzsum = llv;
1357   rp += sizeof(llv);
1358 
1359   /* Cumulative number of records acquired by `allocated` */
1360   memcpy(&lv, hdr + rp, sizeof(lv));
1361   lv = IW_ITOHL(lv);
1362   impl->crznum = lv;
1363   rp += sizeof(lv);
1364 
1365   /* Record sizes standard variance (deviation^2 * N) */
1366   memcpy(&llv, hdr + rp, sizeof(llv));
1367   llv = IW_ITOHLL(llv);
1368   impl->crzvar = llv;
1369   rp += sizeof(llv);
1370 
1371   /* Reserved */
1372   rp += 32;
1373 
1374   /* Header size */
1375   memcpy(&lv, hdr + rp, sizeof(lv));
1376   lv = IW_ITOHL(lv);
1377   impl->hdrlen = lv;
1378   rp += sizeof(lv);
1379 
1380   assert(rp == IWFSM_CUSTOM_HDR_DATA_OFFSET);
1381   return rc;
1382 }
1383 
1384 static iwrc _fsm_init_new_lw(FSM *impl, const IWFS_FSM_OPTS *opts) {
1385   FSM_ENSURE_OPEN(impl);
1386   iwrc rc;
1387   uint64_t bmlen, bmoff;
1388   IWFS_EXT *pool = &impl->pool;
1389   assert(impl->aunit && impl->bpow);
1390 
1391   impl->hdrlen = opts->hdrlen + IWFSM_CUSTOM_HDR_DATA_OFFSET;
1392   impl->hdrlen = IW_ROUNDUP(impl->hdrlen, 1ULL << impl->bpow);
1393   bmlen = opts->bmlen > 0 ? IW_ROUNDUP(opts->bmlen, impl->aunit) : impl->aunit;
1394   bmoff = IW_ROUNDUP(impl->hdrlen, impl->aunit);
1395 
1396   if (impl->mmap_all) {
1397     /* mmap whole file */
1398     rc = pool->add_mmap(pool, 0, SIZE_T_MAX, impl->mmap_opts);
1399     RCRET(rc);
1400   } else {
1401     /* mmap header */
1402     rc = pool->add_mmap(pool, 0, impl->hdrlen, impl->mmap_opts);
1403     RCRET(rc);
1404     /* mmap the fsm bitmap index */
1405     rc = pool->add_mmap(pool, bmoff, bmlen, impl->mmap_opts);
1406     RCRET(rc);
1407   }
1408   return _fsm_init_lw(impl, bmoff, bmlen);
1409 }
1410 
1411 static iwrc _fsm_init_existing_lw(FSM *impl) {
1412   FSM_ENSURE_OPEN(impl);
1413   iwrc rc;
1414   size_t sp;
1415   uint8_t *mm;
1416   IWFS_EXT *pool = &impl->pool;
1417 
1418   rc = _fsm_read_meta_lr(impl);
1419   RCGO(rc, finish);
1420 
1421   if (impl->mmap_all) {
1422     /* mmap the whole file */
1423     rc = pool->add_mmap(pool, 0, SIZE_T_MAX, impl->mmap_opts);
1424     RCGO(rc, finish);
1425     rc = pool->probe_mmap(pool, 0, &mm, &sp);
1426     RCGO(rc, finish);
1427     if (sp < impl->bmoff + impl->bmlen) {
1428       rc = IWFS_ERROR_NOT_MMAPED;
1429       goto finish;
1430     } else {
1431       mm += impl->bmoff;
1432     }
1433   } else {
1434     /* mmap the header of file */
1435     rc = pool->add_mmap(pool, 0, impl->hdrlen, impl->mmap_opts);
1436     RCGO(rc, finish);
1437     /* mmap the fsm bitmap index */
1438     rc = pool->add_mmap(pool, impl->bmoff, impl->bmlen, impl->mmap_opts);
1439     RCGO(rc, finish);
1440     rc = pool->probe_mmap(pool, impl->bmoff, &mm, &sp);
1441     RCGO(rc, finish);
1442     if (sp < impl->bmlen) {
1443       rc = IWFS_ERROR_NOT_MMAPED;
1444       goto finish;
1445     }
1446   }
1447 
1448   _fsm_load_fsm_lw(impl, mm, impl->bmlen);
1449 
1450 finish:
1451   return rc;
1452 }
1453 
1454 /**
1455  * @brief Check if all blocks within the specified range have been `allocated`.
1456  *
1457  * @param impl `FSM`
1458  * @param offset_blk Starting block number of the specified range.
1459  * @param length_blk Range size in blocks.
1460  * @param [out] ret Checking result.
1461  */
1462 static iwrc _fsm_is_fully_allocated_lr(FSM *impl, uint64_t offset_blk, uint64_t length_blk, int *ret) {
1463   uint64_t end = offset_blk + length_blk;
1464   *ret = 1;
1465   if ((length_blk < 1) || (end < offset_blk) || (end > (impl->bmlen << 3))) {
1466     *ret = 0;
1467     return 0;
1468   }
1469   iwrc rc = _fsm_set_bit_status_lw(impl, offset_blk, length_blk, 0, FSM_BM_DRY_RUN | FSM_BM_STRICT);
1470   if (rc == IWFS_ERROR_FSM_SEGMENTATION) {
1471     *ret = 0;
1472     return 0;
1473   }
1474   return rc;
1475 }
1476 
1477 /*************************************************************************************************
1478 *                                  Public API *
1479 *************************************************************************************************/
1480 
1481 static iwrc _fsm_write(struct IWFS_FSM *f, off_t off, const void *buf, size_t siz, size_t *sp) {
1482   FSM_ENSURE_OPEN2(f);
1483   FSM *impl = f->impl;
1484   iwrc rc = _fsm_ctrl_rlock(impl);
1485   RCRET(rc);
1486   if (impl->oflags & IWFSM_STRICT) {
1487     int allocated = 0;
1488     IWRC(_fsm_is_fully_allocated_lr(impl,
1489                                     (uint64_t) off >> impl->bpow,
1490                                     IW_ROUNDUP(siz, 1ULL << impl->bpow) >> impl->bpow,
1491                                     &allocated), rc);
1492     if (!rc) {
1493       if (!allocated) {
1494         rc = IWFS_ERROR_FSM_SEGMENTATION;
1495       } else {
1496         rc = impl->pool.write(&impl->pool, off, buf, siz, sp);
1497       }
1498     }
1499   } else {
1500     rc = impl->pool.write(&impl->pool, off, buf, siz, sp);
1501   }
1502   _fsm_ctrl_unlock(impl);
1503   return rc;
1504 }
1505 
1506 static iwrc _fsm_read(struct IWFS_FSM *f, off_t off, void *buf, size_t siz, size_t *sp) {
1507   FSM_ENSURE_OPEN2(f);
1508   FSM *impl = f->impl;
1509   iwrc rc = _fsm_ctrl_rlock(impl);
1510   RCRET(rc);
1511   if (impl->oflags & IWFSM_STRICT) {
1512     int allocated = 0;
1513     IWRC(_fsm_is_fully_allocated_lr(impl, (uint64_t) off >> impl->bpow,
1514                                     IW_ROUNDUP(siz, 1ULL << impl->bpow) >> impl->bpow,
1515                                     &allocated), rc);
1516     if (!rc) {
1517       if (!allocated) {
1518         rc = IWFS_ERROR_FSM_SEGMENTATION;
1519       } else {
1520         rc = impl->pool.read(&impl->pool, off, buf, siz, sp);
1521       }
1522     }
1523   } else {
1524     rc = impl->pool.read(&impl->pool, off, buf, siz, sp);
1525   }
1526   _fsm_ctrl_unlock(impl);
1527   return rc;
1528 }
1529 
1530 static iwrc _fsm_sync(struct IWFS_FSM *f, iwfs_sync_flags flags) {
1531   FSM_ENSURE_OPEN2(f);
1532   iwrc rc = _fsm_ctrl_rlock(f->impl);
1533   RCRET(rc);
1534   IWRC(_fsm_write_meta_lw(f->impl), rc);
1535   IWRC(f->impl->pool.sync(&f->impl->pool, flags), rc);
1536   IWRC(_fsm_ctrl_unlock(f->impl), rc);
1537   return rc;
1538 }
1539 
1540 static iwrc _fsm_close(struct IWFS_FSM *f) {
1541   if (!f || !f->impl) {
1542     return 0;
1543   }
1544   FSM *impl = f->impl;
1545   iwrc rc = 0;
1546   IWRC(_fsm_ctrl_wlock(impl), rc);
1547   if (impl->fsm && (impl->omode & IWFS_OWRITE)) {
1548     if (!(impl->oflags & IWFSM_NO_TRIM_ON_CLOSE)) {
1549       IWRC(_fsm_trim_tail_lw(impl), rc);
1550     }
1551     IWRC(_fsm_write_meta_lw(impl), rc);
1552     if (!impl->dlsnr) {
1553       IWRC(impl->pool.sync(&impl->pool, IWFS_SYNCDEFAULT), rc);
1554     }
1555   }
1556   IWRC(impl->pool.close(&impl->pool), rc);
1557   if (impl->fsm) {
1558     // -V:__kb_destroy:701, 769
1559     __kb_destroy(impl->fsm);
1560   }
1561   IWRC(_fsm_ctrl_unlock(impl), rc);
1562   IWRC(_fsm_destroy_locks(impl), rc);
1563   impl->f->impl = 0;
1564   impl->f = 0;
1565   free(impl);
1566   return rc;
1567 }
1568 
1569 IW_INLINE iwrc _fsm_ensure_size_lw(FSM *impl, off_t size) {
1570   return impl->pool.ensure_size(&impl->pool, size);
1571 }
1572 
1573 static iwrc _fsm_ensure_size(struct IWFS_FSM *f, off_t size) {
1574   FSM_ENSURE_OPEN2(f);
1575   iwrc rc = _fsm_ctrl_rlock(f->impl);
1576   RCRET(rc);
1577   if (f->impl->bmoff + f->impl->bmlen > size) {
1578     rc = IWFS_ERROR_RESIZE_FAIL;
1579     goto finish;
1580   }
1581   rc = _fsm_ensure_size_lw(f->impl, size);
1582 
1583 finish:
1584   IWRC(_fsm_ctrl_unlock(f->impl), rc);
1585   return rc;
1586 }
1587 
1588 static iwrc _fsm_add_mmap(struct IWFS_FSM *f, off_t off, size_t maxlen, iwfs_ext_mmap_opts_t opts) {
1589   FSM_ENSURE_OPEN2(f);
1590   return f->impl->pool.add_mmap(&f->impl->pool, off, maxlen, opts);
1591 }
1592 
1593 static iwrc _fsm_remap_all(struct IWFS_FSM *f) {
1594   FSM_ENSURE_OPEN2(f);
1595   return f->impl->pool.remap_all(&f->impl->pool);
1596 }
1597 
1598 iwrc _fsm_acquire_mmap(struct IWFS_FSM *f, off_t off, uint8_t **mm, size_t *sp) {
1599   return f->impl->pool.acquire_mmap(&f->impl->pool, off, mm, sp);
1600 }
1601 
1602 iwrc _fsm_release_mmap(struct IWFS_FSM *f) {
1603   return f->impl->pool.release_mmap(&f->impl->pool);
1604 }
1605 
1606 static iwrc _fsm_probe_mmap(struct IWFS_FSM *f, off_t off, uint8_t **mm, size_t *sp) {
1607   FSM_ENSURE_OPEN2(f);
1608   return f->impl->pool.probe_mmap(&f->impl->pool, off, mm, sp);
1609 }
1610 
1611 static iwrc _fsm_remove_mmap(struct IWFS_FSM *f, off_t off) {
1612   FSM_ENSURE_OPEN2(f);
1613   return f->impl->pool.remove_mmap(&f->impl->pool, off);
1614 }
1615 
1616 static iwrc _fsm_sync_mmap(struct IWFS_FSM *f, off_t off, iwfs_sync_flags flags) {
1617   FSM_ENSURE_OPEN2(f);
1618   return f->impl->pool.sync_mmap(&f->impl->pool, off, flags);
1619 }
1620 
1621 static iwrc _fsm_allocate(struct IWFS_FSM *f, off_t len, off_t *oaddr, off_t *olen, iwfs_fsm_aflags opts) {
1622   FSM_ENSURE_OPEN2(f);
1623   iwrc rc;
1624   uint64_t sbnum, nlen;
1625   FSM *impl = f->impl;
1626 
1627   *olen = 0;
1628   if (!(impl->omode & IWFS_OWRITE)) {
1629     return IW_ERROR_READONLY;
1630   }
1631   if (len <= 0) {
1632     return IW_ERROR_INVALID_ARGS;
1633   }
1634   /* Required blocks number */
1635   sbnum = (uint64_t) *oaddr >> impl->bpow;
1636   len = IW_ROUNDUP(len, 1ULL << impl->bpow);
1637 
1638   rc = _fsm_ctrl_wlock(impl);
1639   RCRET(rc);
1640   rc = _fsm_blk_allocate_lw(f->impl, (uint64_t) len >> impl->bpow, &sbnum, &nlen, opts);
1641   if (!rc) {
1642     *olen = (nlen << impl->bpow);
1643     *oaddr = (sbnum << impl->bpow);
1644   }
1645   IWRC(_fsm_ctrl_unlock(impl), rc);
1646   return rc;
1647 }
1648 
1649 static iwrc _fsm_reallocate(struct IWFS_FSM *f, off_t nlen, off_t *oaddr, off_t *olen, iwfs_fsm_aflags opts) {
1650   FSM_ENSURE_OPEN2(f);
1651   iwrc rc;
1652   FSM *impl = f->impl;
1653 
1654   if (!(impl->omode & IWFS_OWRITE)) {
1655     return IW_ERROR_READONLY;
1656   }
1657   if ((*oaddr & ((1ULL << impl->bpow) - 1)) || (*olen & ((1ULL << impl->bpow) - 1))) {
1658     return IWFS_ERROR_RANGE_NOT_ALIGNED;
1659   }
1660   uint64_t sp;
1661   uint64_t nlen_blk = IW_ROUNDUP((uint64_t) nlen, 1ULL << impl->bpow) >> impl->bpow;
1662   uint64_t olen_blk = (uint64_t) *olen >> impl->bpow;
1663   uint64_t oaddr_blk = (uint64_t) *oaddr >> impl->bpow;
1664   uint64_t naddr_blk = oaddr_blk;
1665 
1666   if (nlen_blk == olen_blk) {
1667     return 0;
1668   }
1669   rc = _fsm_ctrl_wlock(impl);
1670   RCRET(rc);
1671   if (nlen_blk < olen_blk) {
1672     rc = _fsm_blk_deallocate_lw(impl, oaddr_blk + nlen_blk, olen_blk - nlen_blk);
1673     if (!rc) {
1674       *oaddr = oaddr_blk << impl->bpow;
1675       *olen = nlen_blk << impl->bpow;
1676     }
1677   } else {
1678     rc = _fsm_blk_allocate_lw(impl, nlen_blk, &naddr_blk, &sp, opts);
1679     RCGO(rc, finish);
1680     if (naddr_blk != oaddr_blk) {
1681       rc = impl->pool.copy(&impl->pool, *oaddr, (size_t) *olen, naddr_blk << impl->bpow);
1682       RCGO(rc, finish);
1683     }
1684     rc = _fsm_blk_deallocate_lw(impl, oaddr_blk, olen_blk);
1685     RCGO(rc, finish);
1686     *oaddr = naddr_blk << impl->bpow;
1687     *olen = sp << impl->bpow;
1688   }
1689 
1690 finish:
1691   IWRC(_fsm_ctrl_unlock(impl), rc);
1692   return rc;
1693 }
1694 
1695 static iwrc _fsm_deallocate(struct IWFS_FSM *f, off_t addr, off_t len) {
1696   FSM_ENSURE_OPEN2(f);
1697   iwrc rc;
1698   FSM *impl = f->impl;
1699   off_t offset_blk = (uint64_t) addr >> impl->bpow;
1700   off_t length_blk = (uint64_t) len >> impl->bpow;
1701 
1702   if (!(impl->omode & IWFS_OWRITE)) {
1703     return IW_ERROR_READONLY;
1704   }
1705   if (addr & ((1ULL << impl->bpow) - 1)) {
1706     return IWFS_ERROR_RANGE_NOT_ALIGNED;
1707   }
1708   rc = _fsm_ctrl_wlock(impl);
1709   RCRET(rc);
1710   if (  IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, 0, (impl->hdrlen >> impl->bpow))
1711      || IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, (impl->bmoff >> impl->bpow),
1712                           (impl->bmoff >> impl->bpow) + (impl->bmlen >> impl->bpow))) {
1713     // Deny deallocations in header or free-space bitmap itself
1714     IWRC(_fsm_ctrl_unlock(impl), rc);
1715     return IWFS_ERROR_FSM_SEGMENTATION;
1716   }
1717   rc = _fsm_blk_deallocate_lw(impl, (uint64_t) offset_blk, (uint64_t) length_blk);
1718   IWRC(_fsm_ctrl_unlock(impl), rc);
1719   return rc;
1720 }
1721 
1722 static iwrc _fsm_check_allocation_status(struct IWFS_FSM *f, off_t addr, off_t len, bool allocated) {
1723   FSM *impl = f->impl;
1724   if ((addr & ((1ULL << impl->bpow) - 1)) || (len & ((1ULL << impl->bpow) - 1))) {
1725     return IWFS_ERROR_RANGE_NOT_ALIGNED;
1726   }
1727   iwrc rc = _fsm_ctrl_rlock(impl);
1728   RCRET(rc);
1729   off_t offset_blk = (uint64_t) addr >> impl->bpow;
1730   off_t length_blk = (uint64_t) len >> impl->bpow;
1731   if (  IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, 0, (impl->hdrlen >> impl->bpow))
1732      || IW_RANGES_OVERLAP(offset_blk, offset_blk + length_blk, (impl->bmoff >> impl->bpow),
1733                           (impl->bmoff >> impl->bpow) + (impl->bmlen >> impl->bpow))) {
1734     IWRC(_fsm_ctrl_unlock(impl), rc);
1735     return IWFS_ERROR_FSM_SEGMENTATION;
1736   }
1737   rc = _fsm_set_bit_status_lw(impl, (uint64_t) offset_blk, (uint64_t) length_blk,
1738                               allocated ? 0 : 1, FSM_BM_DRY_RUN | FSM_BM_STRICT);
1739   IWRC(_fsm_ctrl_unlock(impl), rc);
1740   return rc;
1741 }
1742 
1743 static iwrc _fsm_writehdr(struct IWFS_FSM *f, off_t off, const void *buf, off_t siz) {
1744   FSM_ENSURE_OPEN2(f);
1745   iwrc rc;
1746   uint8_t *mm;
1747   if (siz < 1) {
1748     return 0;
1749   }
1750   FSM *impl = f->impl;
1751   if ((IWFSM_CUSTOM_HDR_DATA_OFFSET + off + siz) > impl->hdrlen) {
1752     return IW_ERROR_OUT_OF_BOUNDS;
1753   }
1754   rc = impl->pool.acquire_mmap(&impl->pool, 0, &mm, 0);
1755   if (!rc) {
1756     if (impl->dlsnr) {
1757       rc = impl->dlsnr->onwrite(impl->dlsnr, IWFSM_CUSTOM_HDR_DATA_OFFSET + off, buf, siz, 0);
1758     }
1759     memmove(mm + IWFSM_CUSTOM_HDR_DATA_OFFSET + off, buf, (size_t) siz);
1760     IWRC(impl->pool.release_mmap(&impl->pool), rc);
1761   }
1762   return rc;
1763 }
1764 
1765 static iwrc _fsm_readhdr(struct IWFS_FSM *f, off_t off, void *buf, off_t siz) {
1766   FSM_ENSURE_OPEN2(f);
1767   iwrc rc;
1768   uint8_t *mm;
1769   if (siz < 1) {
1770     return 0;
1771   }
1772   FSM *impl = f->impl;
1773   if ((IWFSM_CUSTOM_HDR_DATA_OFFSET + off + siz) > impl->hdrlen) {
1774     return IW_ERROR_OUT_OF_BOUNDS;
1775   }
1776   rc = impl->pool.acquire_mmap(&impl->pool, 0, &mm, 0);
1777   if (!rc) {
1778     memmove(buf, mm + IWFSM_CUSTOM_HDR_DATA_OFFSET + off, (size_t) siz);
1779     rc = impl->pool.release_mmap(&impl->pool);
1780   }
1781   return rc;
1782 }
1783 
1784 static iwrc _fsm_clear(struct IWFS_FSM *f, iwfs_fsm_clrfalgs clrflags) {
1785   FSM_ENSURE_OPEN2(f);
1786   FSM *impl = f->impl;
1787   uint64_t bmoff, bmlen;
1788   iwrc rc = _fsm_ctrl_wlock(impl);
1789   bmlen = impl->bmlen;
1790   if (!bmlen) {
1791     goto finish;
1792   }
1793   if (!impl->mmap_all && impl->bmoff) {
1794     IWRC(impl->pool.remove_mmap(&impl->pool, impl->bmoff), rc);
1795   }
1796   bmoff = IW_ROUNDUP(impl->hdrlen, impl->aunit);
1797   if (!impl->mmap_all) {
1798     IWRC(impl->pool.add_mmap(&impl->pool, bmoff, bmlen, impl->mmap_opts), rc);
1799   }
1800   RCGO(rc, finish);
1801   impl->bmlen = 0;
1802   impl->bmoff = 0;
1803   rc = _fsm_init_lw(impl, bmoff, bmlen);
1804   if (!rc && (clrflags & IWFSM_CLEAR_TRIM)) {
1805     rc = _fsm_trim_tail_lw(impl);
1806   }
1807 
1808 finish:
1809   IWRC(_fsm_ctrl_unlock(impl), rc);
1810   return rc;
1811 }
1812 
1813 static iwrc _fsm_extfile(struct IWFS_FSM *f, IWFS_EXT **ext) {
1814   FSM_ENSURE_OPEN2(f);
1815   *ext = &f->impl->pool;
1816   return 0;
1817 }
1818 
1819 static iwrc _fsm_state(struct IWFS_FSM *f, IWFS_FSM_STATE *state) {
1820   FSM_ENSURE_OPEN2(f);
1821   FSM *impl = f->impl;
1822   iwrc rc = _fsm_ctrl_rlock(impl);
1823   memset(state, 0, sizeof(*state));
1824   IWRC(impl->pool.state(&impl->pool, &state->exfile), rc);
1825   state->block_size = 1U << impl->bpow;
1826   state->oflags = impl->oflags;
1827   state->hdrlen = impl->hdrlen;
1828   state->blocks_num = impl->bmlen << 3;
1829   state->free_segments_num = (uint64_t) kb_size(impl->fsm);
1830   state->avg_alloc_size = impl->crznum > 0 ? (double_t) impl->crzsum / (double_t) impl->crznum : 0;
1831   state->alloc_dispersion = impl->crznum > 0 ? (double_t) impl->crzvar / (double_t) impl->crznum : 0;
1832   IWRC(_fsm_ctrl_unlock(impl), rc);
1833   return rc;
1834 }
1835 
1836 iwrc iwfs_fsmfile_open(IWFS_FSM *f, const IWFS_FSM_OPTS *opts) {
1837   assert(f && opts);
1838   iwrc rc = 0;
1839   IWFS_EXT_STATE fstate = { 0 };
1840   const char *path = opts->exfile.file.path;
1841 
1842   memset(f, 0, sizeof(*f));
1843   rc = iwfs_fsmfile_init();
1844   RCGO(rc, finish);
1845 
1846   f->write = _fsm_write;
1847   f->read = _fsm_read;
1848   f->close = _fsm_close;
1849   f->sync = _fsm_sync;
1850   f->state = _fsm_state;
1851 
1852   f->ensure_size = _fsm_ensure_size;
1853   f->add_mmap = _fsm_add_mmap;
1854   f->remap_all = _fsm_remap_all;
1855   f->acquire_mmap = _fsm_acquire_mmap;
1856   f->probe_mmap = _fsm_probe_mmap;
1857   f->release_mmap = _fsm_release_mmap;
1858   f->remove_mmap = _fsm_remove_mmap;
1859   f->sync_mmap = _fsm_sync_mmap;
1860 
1861   f->allocate = _fsm_allocate;
1862   f->reallocate = _fsm_reallocate;
1863   f->deallocate = _fsm_deallocate;
1864   f->check_allocation_status = _fsm_check_allocation_status;
1865   f->writehdr = _fsm_writehdr;
1866   f->readhdr = _fsm_readhdr;
1867   f->clear = _fsm_clear;
1868   f->extfile = _fsm_extfile;
1869 
1870   if (!path) {
1871     return IW_ERROR_INVALID_ARGS;
1872   }
1873   FSM *impl = f->impl = calloc(1, sizeof(*f->impl));
1874   if (!impl) {
1875     return iwrc_set_errno(IW_ERROR_ALLOC, errno);
1876   }
1877   impl->f = f;
1878   impl->dlsnr = opts->exfile.file.dlsnr; // Copy data changes listener address
1879   impl->mmap_opts = opts->mmap_opts;
1880 
1881   IWFS_EXT_OPTS rwl_opts = opts->exfile;
1882   rwl_opts.use_locks = !(opts->oflags & IWFSM_NOLOCKS);
1883 
1884   rc = _fsm_init_impl(impl, opts);
1885   RCGO(rc, finish);
1886 
1887   rc = _fsm_init_locks(impl, opts);
1888   RCGO(rc, finish);
1889 
1890   rc = iwfs_exfile_open(&impl->pool, &rwl_opts);
1891   RCGO(rc, finish);
1892 
1893   rc = impl->pool.state(&impl->pool, &fstate);
1894   RCGO(rc, finish);
1895 
1896   impl->omode = fstate.file.opts.omode;
1897 
1898   if (fstate.file.ostatus & IWFS_OPEN_NEW) {
1899     rc = _fsm_init_new_lw(impl, opts);
1900   } else {
1901     rc = _fsm_init_existing_lw(impl);
1902   }
1903 
1904 finish:
1905   if (rc) {
1906     if (f->impl) {
1907       IWRC(_fsm_destroy_locks(f->impl), rc);  // we are not locked
1908       IWRC(_fsm_close(f), rc);
1909     }
1910   }
1911   return rc;
1912 }
1913 
1914 static const char *_fsmfile_ecodefn(locale_t locale, uint32_t ecode) {
1915   if (!((ecode > _IWFS_FSM_ERROR_START) && (ecode < _IWFS_FSM_ERROR_END))) {
1916     return 0;
1917   }
1918   switch (ecode) {
1919     case IWFS_ERROR_NO_FREE_SPACE:
1920       return "No free space. (IWFS_ERROR_NO_FREE_SPACE)";
1921     case IWFS_ERROR_INVALID_BLOCK_SIZE:
1922       return "Invalid block size specified. (IWFS_ERROR_INVALID_BLOCK_SIZE)";
1923     case IWFS_ERROR_RANGE_NOT_ALIGNED:
1924       return "Specified range/offset is not aligned with page/block. "
1925              "(IWFS_ERROR_RANGE_NOT_ALIGNED)";
1926     case IWFS_ERROR_FSM_SEGMENTATION:
1927       return "Free-space map segmentation error. (IWFS_ERROR_FSM_SEGMENTATION)";
1928     case IWFS_ERROR_INVALID_FILEMETA:
1929       return "Invalid file metadata. (IWFS_ERROR_INVALID_FILEMETA)";
1930     case IWFS_ERROR_PLATFORM_PAGE:
1931       return "The block size incompatible with platform page size, data "
1932              "migration required. (IWFS_ERROR_PLATFORM_PAGE)";
1933     case IWFS_ERROR_RESIZE_FAIL:
1934       return "Failed to resize file, "
1935              "conflicting with free-space map location (IWFS_ERROR_RESIZE_FAIL)";
1936     default:
1937       break;
1938   }
1939   return 0;
1940 }
1941 
1942 iwrc iwfs_fsmfile_init(void) {
1943   static int _fsmfile_initialized = 0;
1944   iwrc rc = iw_init();
1945   RCRET(rc);
1946   if (!__sync_bool_compare_and_swap(&_fsmfile_initialized, 0, 1)) {
1947     return 0;  // initialized already
1948   }
1949   return iwlog_register_ecodefn(_fsmfile_ecodefn);
1950 }
1951 
1952 /*************************************************************************************************
1953 *                                      Debug API                                                *
1954 *************************************************************************************************/
1955 
1956 uint64_t iwfs_fsmdbg_number_of_free_areas(IWFS_FSM *f) {
1957   int ret;
1958   assert(f);
1959   FSM *impl = f->impl;
1960   _fsm_ctrl_rlock(impl);
1961   ret = kb_size(impl->fsm);
1962   _fsm_ctrl_unlock(impl);
1963   return (uint64_t) ret;
1964 }
1965 
1966 uint64_t iwfs_fsmdbg_find_next_set_bit(
1967   const uint64_t *addr, uint64_t offset_bit, uint64_t max_offset_bit,
1968   int *found) {
1969   return _fsm_find_next_set_bit(addr, offset_bit, max_offset_bit, found);
1970 }
1971 
1972 uint64_t iwfs_fsmdbg_find_prev_set_bit(
1973   const uint64_t *addr, uint64_t offset_bit, uint64_t min_offset_bit,
1974   int *found) {
1975   return _fsm_find_prev_set_bit(addr, offset_bit, min_offset_bit, found);
1976 }
1977 
1978 void iwfs_fsmdbg_dump_fsm_tree(IWFS_FSM *f, const char *hdr) {
1979   assert(f);
1980   FSM *impl = f->impl;
1981   fprintf(stderr, "FSM TREE: %s\n", hdr);
1982   if (!impl->fsm) {
1983     fprintf(stderr, "NONE\n");
1984     return;
1985   }
1986 #define _fsm_traverse(k)                                                                                    \
1987   {                                                                                                         \
1988     uint64_t koff = FSMBK_OFFSET(k);                                                                        \
1989     uint64_t klen = FSMBK_LENGTH(k);                                                                        \
1990     fprintf(stderr, "[%" PRIu64 " %" PRIu64 "]\n", koff, klen);                                             \
1991   }
1992   __kb_traverse(FSMBK, impl->fsm, _fsm_traverse);
1993 #undef _fsm_traverse
1994 }
1995 
1996 const char *byte_to_binary(int x) {
1997   static char b[9];
1998   b[0] = '\0';
1999   int z;
2000   for (z = 1; z <= 128; z <<= 1) {
2001     strcat(b, ((x & z) == z) ? "1" : "0");
2002   }
2003   return b;
2004 }
2005 
2006 iwrc iwfs_fsmdb_dump_fsm_bitmap(IWFS_FSM *f) {
2007   assert(f);
2008   size_t sp;
2009   uint8_t *mm;
2010   FSM *impl = f->impl;
2011   iwrc rc;
2012   if (impl->mmap_all) {
2013     rc = impl->pool.probe_mmap(&impl->pool, 0, &mm, &sp);
2014     if (!rc) {
2015       if (sp <= impl->bmoff) {
2016         rc = IWFS_ERROR_NOT_MMAPED;
2017       } else {
2018         mm += impl->bmoff;
2019         sp = sp - impl->bmoff;
2020       }
2021     }
2022   } else {
2023     rc = impl->pool.probe_mmap(&impl->pool, impl->bmoff, &mm, &sp);
2024   }
2025   if (rc) {
2026     iwlog_ecode_error3(rc);
2027     return rc;
2028   }
2029   int i = ((impl->hdrlen >> impl->bpow) >> 3);
2030   // if (impl->bmoff == impl->aunit) {
2031   //   i += ((impl->bmlen >> impl->bpow) >> 3);
2032   // }
2033   for ( ; i < sp && i < impl->bmlen; ++i) {
2034     uint8_t b = *(mm + i);
2035     fprintf(stderr, "%s", byte_to_binary(b));
2036   }
2037   printf("\n");
2038   return 0;
2039 }
2040 
2041 iwrc iwfs_fsmdbg_state(IWFS_FSM *f, IWFS_FSMDBG_STATE *d) {
2042   FSM_ENSURE_OPEN2(f);
2043   FSM *impl = f->impl;
2044   iwrc rc = _fsm_ctrl_rlock(impl);
2045   memset(d, 0, sizeof(*d));
2046   IWRC(impl->pool.state(&impl->pool, &d->state.exfile), rc);
2047   d->state.block_size = 1U << impl->bpow;
2048   d->state.oflags = impl->oflags;
2049   d->state.hdrlen = impl->hdrlen;
2050   d->state.blocks_num = impl->bmlen << 3;
2051   d->state.free_segments_num = (uint64_t) kb_size(impl->fsm);
2052   d->state.avg_alloc_size = impl->crznum > 0 ? (double_t) impl->crzsum / (double_t) impl->crznum : 0;
2053   d->state.alloc_dispersion = impl->crznum > 0 ? (double_t) impl->crzvar / (double_t) impl->crznum : 0;
2054   d->bmoff = impl->bmoff;
2055   d->bmlen = impl->bmlen;
2056   d->lfbkoff = impl->lfbkoff;
2057   d->lfbklen = impl->lfbklen;
2058   IWRC(_fsm_ctrl_unlock(impl), rc);
2059   return rc;
2060 }
2061