1// Package bitseq provides a structure and utilities for representing long bitmask
2// as sequence of run-length encoded blocks. It operates directly on the encoded
3// representation, it does not decode/encode.
4package bitseq
5
6import (
7	"encoding/binary"
8	"encoding/json"
9	"errors"
10	"fmt"
11	"sync"
12
13	"github.com/docker/libnetwork/datastore"
14	"github.com/docker/libnetwork/types"
15	"github.com/sirupsen/logrus"
16)
17
18// block sequence constants
19// If needed we can think of making these configurable
20const (
21	blockLen      = uint32(32)
22	blockBytes    = uint64(blockLen / 8)
23	blockMAX      = uint32(1<<blockLen - 1)
24	blockFirstBit = uint32(1) << (blockLen - 1)
25	invalidPos    = uint64(0xFFFFFFFFFFFFFFFF)
26)
27
28var (
29	// ErrNoBitAvailable is returned when no more bits are available to set
30	ErrNoBitAvailable = errors.New("no bit available")
31	// ErrBitAllocated is returned when the specific bit requested is already set
32	ErrBitAllocated = errors.New("requested bit is already allocated")
33)
34
35// Handle contains the sequece representing the bitmask and its identifier
36type Handle struct {
37	bits       uint64
38	unselected uint64
39	head       *sequence
40	app        string
41	id         string
42	dbIndex    uint64
43	dbExists   bool
44	curr       uint64
45	store      datastore.DataStore
46	sync.Mutex
47}
48
49// NewHandle returns a thread-safe instance of the bitmask handler
50func NewHandle(app string, ds datastore.DataStore, id string, numElements uint64) (*Handle, error) {
51	h := &Handle{
52		app:        app,
53		id:         id,
54		store:      ds,
55		bits:       numElements,
56		unselected: numElements,
57		head: &sequence{
58			block: 0x0,
59			count: getNumBlocks(numElements),
60		},
61	}
62
63	if h.store == nil {
64		return h, nil
65	}
66
67	// Get the initial status from the ds if present.
68	if err := h.store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
69		return nil, err
70	}
71
72	// If the handle is not in store, write it.
73	if !h.Exists() {
74		if err := h.writeToStore(); err != nil {
75			return nil, fmt.Errorf("failed to write bitsequence to store: %v", err)
76		}
77	}
78
79	return h, nil
80}
81
82// sequence represents a recurring sequence of 32 bits long bitmasks
83type sequence struct {
84	block uint32    // block is a symbol representing 4 byte long allocation bitmask
85	count uint64    // number of consecutive blocks (symbols)
86	next  *sequence // next sequence
87}
88
89// String returns a string representation of the block sequence starting from this block
90func (s *sequence) toString() string {
91	var nextBlock string
92	if s.next == nil {
93		nextBlock = "end"
94	} else {
95		nextBlock = s.next.toString()
96	}
97	return fmt.Sprintf("(0x%x, %d)->%s", s.block, s.count, nextBlock)
98}
99
100// GetAvailableBit returns the position of the first unset bit in the bitmask represented by this sequence
101func (s *sequence) getAvailableBit(from uint64) (uint64, uint64, error) {
102	if s.block == blockMAX || s.count == 0 {
103		return invalidPos, invalidPos, ErrNoBitAvailable
104	}
105	bits := from
106	bitSel := blockFirstBit >> from
107	for bitSel > 0 && s.block&bitSel != 0 {
108		bitSel >>= 1
109		bits++
110	}
111	// Check if the loop exited because it could not
112	// find any available bit int block  starting from
113	// "from". Return invalid pos in that case.
114	if bitSel == 0 {
115		return invalidPos, invalidPos, ErrNoBitAvailable
116	}
117	return bits / 8, bits % 8, nil
118}
119
120// GetCopy returns a copy of the linked list rooted at this node
121func (s *sequence) getCopy() *sequence {
122	n := &sequence{block: s.block, count: s.count}
123	pn := n
124	ps := s.next
125	for ps != nil {
126		pn.next = &sequence{block: ps.block, count: ps.count}
127		pn = pn.next
128		ps = ps.next
129	}
130	return n
131}
132
133// Equal checks if this sequence is equal to the passed one
134func (s *sequence) equal(o *sequence) bool {
135	this := s
136	other := o
137	for this != nil {
138		if other == nil {
139			return false
140		}
141		if this.block != other.block || this.count != other.count {
142			return false
143		}
144		this = this.next
145		other = other.next
146	}
147	// Check if other is longer than this
148	if other != nil {
149		return false
150	}
151	return true
152}
153
154// ToByteArray converts the sequence into a byte array
155func (s *sequence) toByteArray() ([]byte, error) {
156	var bb []byte
157
158	p := s
159	for p != nil {
160		b := make([]byte, 12)
161		binary.BigEndian.PutUint32(b[0:], p.block)
162		binary.BigEndian.PutUint64(b[4:], p.count)
163		bb = append(bb, b...)
164		p = p.next
165	}
166
167	return bb, nil
168}
169
170// fromByteArray construct the sequence from the byte array
171func (s *sequence) fromByteArray(data []byte) error {
172	l := len(data)
173	if l%12 != 0 {
174		return fmt.Errorf("cannot deserialize byte sequence of length %d (%v)", l, data)
175	}
176
177	p := s
178	i := 0
179	for {
180		p.block = binary.BigEndian.Uint32(data[i : i+4])
181		p.count = binary.BigEndian.Uint64(data[i+4 : i+12])
182		i += 12
183		if i == l {
184			break
185		}
186		p.next = &sequence{}
187		p = p.next
188	}
189
190	return nil
191}
192
193func (h *Handle) getCopy() *Handle {
194	return &Handle{
195		bits:       h.bits,
196		unselected: h.unselected,
197		head:       h.head.getCopy(),
198		app:        h.app,
199		id:         h.id,
200		dbIndex:    h.dbIndex,
201		dbExists:   h.dbExists,
202		store:      h.store,
203		curr:       h.curr,
204	}
205}
206
207// SetAnyInRange atomically sets the first unset bit in the specified range in the sequence and returns the corresponding ordinal
208func (h *Handle) SetAnyInRange(start, end uint64, serial bool) (uint64, error) {
209	if end < start || end >= h.bits {
210		return invalidPos, fmt.Errorf("invalid bit range [%d, %d]", start, end)
211	}
212	if h.Unselected() == 0 {
213		return invalidPos, ErrNoBitAvailable
214	}
215	return h.set(0, start, end, true, false, serial)
216}
217
218// SetAny atomically sets the first unset bit in the sequence and returns the corresponding ordinal
219func (h *Handle) SetAny(serial bool) (uint64, error) {
220	if h.Unselected() == 0 {
221		return invalidPos, ErrNoBitAvailable
222	}
223	return h.set(0, 0, h.bits-1, true, false, serial)
224}
225
226// Set atomically sets the corresponding bit in the sequence
227func (h *Handle) Set(ordinal uint64) error {
228	if err := h.validateOrdinal(ordinal); err != nil {
229		return err
230	}
231	_, err := h.set(ordinal, 0, 0, false, false, false)
232	return err
233}
234
235// Unset atomically unsets the corresponding bit in the sequence
236func (h *Handle) Unset(ordinal uint64) error {
237	if err := h.validateOrdinal(ordinal); err != nil {
238		return err
239	}
240	_, err := h.set(ordinal, 0, 0, false, true, false)
241	return err
242}
243
244// IsSet atomically checks if the ordinal bit is set. In case ordinal
245// is outside of the bit sequence limits, false is returned.
246func (h *Handle) IsSet(ordinal uint64) bool {
247	if err := h.validateOrdinal(ordinal); err != nil {
248		return false
249	}
250	h.Lock()
251	_, _, err := checkIfAvailable(h.head, ordinal)
252	h.Unlock()
253	return err != nil
254}
255
256func (h *Handle) runConsistencyCheck() bool {
257	corrupted := false
258	for p, c := h.head, h.head.next; c != nil; c = c.next {
259		if c.count == 0 {
260			corrupted = true
261			p.next = c.next
262			continue // keep same p
263		}
264		p = c
265	}
266	return corrupted
267}
268
269// CheckConsistency checks if the bit sequence is in an inconsistent state and attempts to fix it.
270// It looks for a corruption signature that may happen in docker 1.9.0 and 1.9.1.
271func (h *Handle) CheckConsistency() error {
272	for {
273		h.Lock()
274		store := h.store
275		h.Unlock()
276
277		if store != nil {
278			if err := store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
279				return err
280			}
281		}
282
283		h.Lock()
284		nh := h.getCopy()
285		h.Unlock()
286
287		if !nh.runConsistencyCheck() {
288			return nil
289		}
290
291		if err := nh.writeToStore(); err != nil {
292			if _, ok := err.(types.RetryError); !ok {
293				return fmt.Errorf("internal failure while fixing inconsistent bitsequence: %v", err)
294			}
295			continue
296		}
297
298		logrus.Infof("Fixed inconsistent bit sequence in datastore:\n%s\n%s", h, nh)
299
300		h.Lock()
301		h.head = nh.head
302		h.Unlock()
303
304		return nil
305	}
306}
307
308// set/reset the bit
309func (h *Handle) set(ordinal, start, end uint64, any bool, release bool, serial bool) (uint64, error) {
310	var (
311		bitPos  uint64
312		bytePos uint64
313		ret     uint64
314		err     error
315	)
316
317	for {
318		var store datastore.DataStore
319		curr := uint64(0)
320		h.Lock()
321		store = h.store
322		if store != nil {
323			h.Unlock() // The lock is acquired in the GetObject
324			if err := store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
325				return ret, err
326			}
327			h.Lock() // Acquire the lock back
328		}
329		if serial {
330			curr = h.curr
331		}
332		// Get position if available
333		if release {
334			bytePos, bitPos = ordinalToPos(ordinal)
335		} else {
336			if any {
337				bytePos, bitPos, err = getAvailableFromCurrent(h.head, start, curr, end)
338				ret = posToOrdinal(bytePos, bitPos)
339				if err == nil {
340					h.curr = ret + 1
341				}
342			} else {
343				bytePos, bitPos, err = checkIfAvailable(h.head, ordinal)
344				ret = ordinal
345			}
346		}
347		if err != nil {
348			h.Unlock()
349			return ret, err
350		}
351
352		// Create a private copy of h and work on it
353		nh := h.getCopy()
354
355		nh.head = pushReservation(bytePos, bitPos, nh.head, release)
356		if release {
357			nh.unselected++
358		} else {
359			nh.unselected--
360		}
361
362		if h.store != nil {
363			h.Unlock()
364			// Attempt to write private copy to store
365			if err := nh.writeToStore(); err != nil {
366				if _, ok := err.(types.RetryError); !ok {
367					return ret, fmt.Errorf("internal failure while setting the bit: %v", err)
368				}
369				// Retry
370				continue
371			}
372			h.Lock()
373		}
374
375		// Previous atomic push was successful. Save private copy to local copy
376		h.unselected = nh.unselected
377		h.head = nh.head
378		h.dbExists = nh.dbExists
379		h.dbIndex = nh.dbIndex
380		h.Unlock()
381		return ret, nil
382	}
383}
384
385// checks is needed because to cover the case where the number of bits is not a multiple of blockLen
386func (h *Handle) validateOrdinal(ordinal uint64) error {
387	h.Lock()
388	defer h.Unlock()
389	if ordinal >= h.bits {
390		return errors.New("bit does not belong to the sequence")
391	}
392	return nil
393}
394
395// Destroy removes from the datastore the data belonging to this handle
396func (h *Handle) Destroy() error {
397	for {
398		if err := h.deleteFromStore(); err != nil {
399			if _, ok := err.(types.RetryError); !ok {
400				return fmt.Errorf("internal failure while destroying the sequence: %v", err)
401			}
402			// Fetch latest
403			if err := h.store.GetObject(datastore.Key(h.Key()...), h); err != nil {
404				if err == datastore.ErrKeyNotFound { // already removed
405					return nil
406				}
407				return fmt.Errorf("failed to fetch from store when destroying the sequence: %v", err)
408			}
409			continue
410		}
411		return nil
412	}
413}
414
415// ToByteArray converts this handle's data into a byte array
416func (h *Handle) ToByteArray() ([]byte, error) {
417
418	h.Lock()
419	defer h.Unlock()
420	ba := make([]byte, 16)
421	binary.BigEndian.PutUint64(ba[0:], h.bits)
422	binary.BigEndian.PutUint64(ba[8:], h.unselected)
423	bm, err := h.head.toByteArray()
424	if err != nil {
425		return nil, fmt.Errorf("failed to serialize head: %s", err.Error())
426	}
427	ba = append(ba, bm...)
428
429	return ba, nil
430}
431
432// FromByteArray reads his handle's data from a byte array
433func (h *Handle) FromByteArray(ba []byte) error {
434	if ba == nil {
435		return errors.New("nil byte array")
436	}
437
438	nh := &sequence{}
439	err := nh.fromByteArray(ba[16:])
440	if err != nil {
441		return fmt.Errorf("failed to deserialize head: %s", err.Error())
442	}
443
444	h.Lock()
445	h.head = nh
446	h.bits = binary.BigEndian.Uint64(ba[0:8])
447	h.unselected = binary.BigEndian.Uint64(ba[8:16])
448	h.Unlock()
449
450	return nil
451}
452
453// Bits returns the length of the bit sequence
454func (h *Handle) Bits() uint64 {
455	return h.bits
456}
457
458// Unselected returns the number of bits which are not selected
459func (h *Handle) Unselected() uint64 {
460	h.Lock()
461	defer h.Unlock()
462	return h.unselected
463}
464
465func (h *Handle) String() string {
466	h.Lock()
467	defer h.Unlock()
468	return fmt.Sprintf("App: %s, ID: %s, DBIndex: 0x%x, Bits: %d, Unselected: %d, Sequence: %s Curr:%d",
469		h.app, h.id, h.dbIndex, h.bits, h.unselected, h.head.toString(), h.curr)
470}
471
472// MarshalJSON encodes Handle into json message
473func (h *Handle) MarshalJSON() ([]byte, error) {
474	m := map[string]interface{}{
475		"id": h.id,
476	}
477
478	b, err := h.ToByteArray()
479	if err != nil {
480		return nil, err
481	}
482	m["sequence"] = b
483	return json.Marshal(m)
484}
485
486// UnmarshalJSON decodes json message into Handle
487func (h *Handle) UnmarshalJSON(data []byte) error {
488	var (
489		m   map[string]interface{}
490		b   []byte
491		err error
492	)
493	if err = json.Unmarshal(data, &m); err != nil {
494		return err
495	}
496	h.id = m["id"].(string)
497	bi, _ := json.Marshal(m["sequence"])
498	if err := json.Unmarshal(bi, &b); err != nil {
499		return err
500	}
501	return h.FromByteArray(b)
502}
503
504// getFirstAvailable looks for the first unset bit in passed mask starting from start
505func getFirstAvailable(head *sequence, start uint64) (uint64, uint64, error) {
506	// Find sequence which contains the start bit
507	byteStart, bitStart := ordinalToPos(start)
508	current, _, precBlocks, inBlockBytePos := findSequence(head, byteStart)
509	// Derive the this sequence offsets
510	byteOffset := byteStart - inBlockBytePos
511	bitOffset := inBlockBytePos*8 + bitStart
512	for current != nil {
513		if current.block != blockMAX {
514			// If the current block is not full, check if there is any bit
515			// from the current bit in the current block. If not, before proceeding to the
516			// next block node, make sure we check for available bit in the next
517			// instance of the same block. Due to RLE same block signature will be
518			// compressed.
519		retry:
520			bytePos, bitPos, err := current.getAvailableBit(bitOffset)
521			if err != nil && precBlocks == current.count-1 {
522				// This is the last instance in the same block node,
523				// so move to the next block.
524				goto next
525			}
526			if err != nil {
527				// There are some more instances of the same block, so add the offset
528				// and be optimistic that you will find the available bit in the next
529				// instance of the same block.
530				bitOffset = 0
531				byteOffset += blockBytes
532				precBlocks++
533				goto retry
534			}
535			return byteOffset + bytePos, bitPos, err
536		}
537		// Moving to next block: Reset bit offset.
538	next:
539		bitOffset = 0
540		byteOffset += (current.count * blockBytes) - (precBlocks * blockBytes)
541		precBlocks = 0
542		current = current.next
543	}
544	return invalidPos, invalidPos, ErrNoBitAvailable
545}
546
547// getAvailableFromCurrent will look for available ordinal from the current ordinal.
548// If none found then it will loop back to the start to check of the available bit.
549// This can be further optimized to check from start till curr in case of a rollover
550func getAvailableFromCurrent(head *sequence, start, curr, end uint64) (uint64, uint64, error) {
551	var bytePos, bitPos uint64
552	var err error
553	if curr != 0 && curr > start {
554		bytePos, bitPos, err = getFirstAvailable(head, curr)
555		ret := posToOrdinal(bytePos, bitPos)
556		if end < ret || err != nil {
557			goto begin
558		}
559		return bytePos, bitPos, nil
560	}
561
562begin:
563	bytePos, bitPos, err = getFirstAvailable(head, start)
564	ret := posToOrdinal(bytePos, bitPos)
565	if end < ret || err != nil {
566		return invalidPos, invalidPos, ErrNoBitAvailable
567	}
568	return bytePos, bitPos, nil
569}
570
571// checkIfAvailable checks if the bit correspondent to the specified ordinal is unset
572// If the ordinal is beyond the sequence limits, a negative response is returned
573func checkIfAvailable(head *sequence, ordinal uint64) (uint64, uint64, error) {
574	bytePos, bitPos := ordinalToPos(ordinal)
575
576	// Find the sequence containing this byte
577	current, _, _, inBlockBytePos := findSequence(head, bytePos)
578	if current != nil {
579		// Check whether the bit corresponding to the ordinal address is unset
580		bitSel := blockFirstBit >> (inBlockBytePos*8 + bitPos)
581		if current.block&bitSel == 0 {
582			return bytePos, bitPos, nil
583		}
584	}
585
586	return invalidPos, invalidPos, ErrBitAllocated
587}
588
589// Given the byte position and the sequences list head, return the pointer to the
590// sequence containing the byte (current), the pointer to the previous sequence,
591// the number of blocks preceding the block containing the byte inside the current sequence.
592// If bytePos is outside of the list, function will return (nil, nil, 0, invalidPos)
593func findSequence(head *sequence, bytePos uint64) (*sequence, *sequence, uint64, uint64) {
594	// Find the sequence containing this byte
595	previous := head
596	current := head
597	n := bytePos
598	for current.next != nil && n >= (current.count*blockBytes) { // Nil check for less than 32 addresses masks
599		n -= (current.count * blockBytes)
600		previous = current
601		current = current.next
602	}
603
604	// If byte is outside of the list, let caller know
605	if n >= (current.count * blockBytes) {
606		return nil, nil, 0, invalidPos
607	}
608
609	// Find the byte position inside the block and the number of blocks
610	// preceding the block containing the byte inside this sequence
611	precBlocks := n / blockBytes
612	inBlockBytePos := bytePos % blockBytes
613
614	return current, previous, precBlocks, inBlockBytePos
615}
616
617// PushReservation pushes the bit reservation inside the bitmask.
618// Given byte and bit positions, identify the sequence (current) which holds the block containing the affected bit.
619// Create a new block with the modified bit according to the operation (allocate/release).
620// Create a new sequence containing the new block and insert it in the proper position.
621// Remove current sequence if empty.
622// Check if new sequence can be merged with neighbour (previous/next) sequences.
623//
624//
625// Identify "current" sequence containing block:
626//                                      [prev seq] [current seq] [next seq]
627//
628// Based on block position, resulting list of sequences can be any of three forms:
629//
630//        block position                        Resulting list of sequences
631// A) block is first in current:         [prev seq] [new] [modified current seq] [next seq]
632// B) block is last in current:          [prev seq] [modified current seq] [new] [next seq]
633// C) block is in the middle of current: [prev seq] [curr pre] [new] [curr post] [next seq]
634func pushReservation(bytePos, bitPos uint64, head *sequence, release bool) *sequence {
635	// Store list's head
636	newHead := head
637
638	// Find the sequence containing this byte
639	current, previous, precBlocks, inBlockBytePos := findSequence(head, bytePos)
640	if current == nil {
641		return newHead
642	}
643
644	// Construct updated block
645	bitSel := blockFirstBit >> (inBlockBytePos*8 + bitPos)
646	newBlock := current.block
647	if release {
648		newBlock &^= bitSel
649	} else {
650		newBlock |= bitSel
651	}
652
653	// Quit if it was a redundant request
654	if current.block == newBlock {
655		return newHead
656	}
657
658	// Current sequence inevitably looses one block, upadate count
659	current.count--
660
661	// Create new sequence
662	newSequence := &sequence{block: newBlock, count: 1}
663
664	// Insert the new sequence in the list based on block position
665	if precBlocks == 0 { // First in sequence (A)
666		newSequence.next = current
667		if current == head {
668			newHead = newSequence
669			previous = newHead
670		} else {
671			previous.next = newSequence
672		}
673		removeCurrentIfEmpty(&newHead, newSequence, current)
674		mergeSequences(previous)
675	} else if precBlocks == current.count { // Last in sequence (B)
676		newSequence.next = current.next
677		current.next = newSequence
678		mergeSequences(current)
679	} else { // In between the sequence (C)
680		currPre := &sequence{block: current.block, count: precBlocks, next: newSequence}
681		currPost := current
682		currPost.count -= precBlocks
683		newSequence.next = currPost
684		if currPost == head {
685			newHead = currPre
686		} else {
687			previous.next = currPre
688		}
689		// No merging or empty current possible here
690	}
691
692	return newHead
693}
694
695// Removes the current sequence from the list if empty, adjusting the head pointer if needed
696func removeCurrentIfEmpty(head **sequence, previous, current *sequence) {
697	if current.count == 0 {
698		if current == *head {
699			*head = current.next
700		} else {
701			previous.next = current.next
702			current = current.next
703		}
704	}
705}
706
707// Given a pointer to a sequence, it checks if it can be merged with any following sequences
708// It stops when no more merging is possible.
709// TODO: Optimization: only attempt merge from start to end sequence, no need to scan till the end of the list
710func mergeSequences(seq *sequence) {
711	if seq != nil {
712		// Merge all what possible from seq
713		for seq.next != nil && seq.block == seq.next.block {
714			seq.count += seq.next.count
715			seq.next = seq.next.next
716		}
717		// Move to next
718		mergeSequences(seq.next)
719	}
720}
721
722func getNumBlocks(numBits uint64) uint64 {
723	numBlocks := numBits / uint64(blockLen)
724	if numBits%uint64(blockLen) != 0 {
725		numBlocks++
726	}
727	return numBlocks
728}
729
730func ordinalToPos(ordinal uint64) (uint64, uint64) {
731	return ordinal / 8, ordinal % 8
732}
733
734func posToOrdinal(bytePos, bitPos uint64) uint64 {
735	return bytePos*8 + bitPos
736}
737