1package ksuid
2
3import (
4	"bytes"
5	"encoding/binary"
6)
7
8// CompressedSet is an immutable data type which stores a set of KSUIDs.
9type CompressedSet []byte
10
11// Iter returns an iterator that produces all KSUIDs in the set.
12func (set CompressedSet) Iter() CompressedSetIter {
13	return CompressedSetIter{
14		content: []byte(set),
15	}
16}
17
18// String satisfies the fmt.Stringer interface, returns a human-readable string
19// representation of the set.
20func (set CompressedSet) String() string {
21	b := bytes.Buffer{}
22	b.WriteByte('[')
23	set.writeTo(&b)
24	b.WriteByte(']')
25	return b.String()
26}
27
28// String satisfies the fmt.GoStringer interface, returns a Go representation of
29// the set.
30func (set CompressedSet) GoString() string {
31	b := bytes.Buffer{}
32	b.WriteString("ksuid.CompressedSet{")
33	set.writeTo(&b)
34	b.WriteByte('}')
35	return b.String()
36}
37
38func (set CompressedSet) writeTo(b *bytes.Buffer) {
39	a := [27]byte{}
40
41	for i, it := 0, set.Iter(); it.Next(); i++ {
42		if i != 0 {
43			b.WriteString(", ")
44		}
45		b.WriteByte('"')
46		it.KSUID.Append(a[:0])
47		b.Write(a[:])
48		b.WriteByte('"')
49	}
50}
51
52// Compress creates and returns a compressed set of KSUIDs from the list given
53// as arguments.
54func Compress(ids ...KSUID) CompressedSet {
55	c := 1 + byteLength + (len(ids) / 5)
56	b := make([]byte, 0, c)
57	return AppendCompressed(b, ids...)
58}
59
60// AppendCompressed uses the given byte slice as pre-allocated storage space to
61// build a KSUID set.
62//
63// Note that the set uses a compression technique to store the KSUIDs, so the
64// resuling length is not 20 x len(ids). The rule of thumb here is for the given
65// byte slice to reserve the amount of memory that the application would be OK
66// to waste.
67func AppendCompressed(set []byte, ids ...KSUID) CompressedSet {
68	if len(ids) != 0 {
69		if !IsSorted(ids) {
70			Sort(ids)
71		}
72		one := makeUint128(0, 1)
73
74		// The first KSUID is always written to the set, this is the starting
75		// point for all deltas.
76		set = append(set, byte(rawKSUID))
77		set = append(set, ids[0][:]...)
78
79		timestamp := ids[0].Timestamp()
80		lastKSUID := ids[0]
81		lastValue := uint128Payload(ids[0])
82
83		for i := 1; i != len(ids); i++ {
84			id := ids[i]
85
86			if id == lastKSUID {
87				continue
88			}
89
90			t := id.Timestamp()
91			v := uint128Payload(id)
92
93			if t != timestamp {
94				d := t - timestamp
95				n := varintLength32(d)
96
97				set = append(set, timeDelta|byte(n))
98				set = appendVarint32(set, d, n)
99				set = append(set, id[timestampLengthInBytes:]...)
100
101				timestamp = t
102			} else {
103				d := sub128(v, lastValue)
104
105				if d != one {
106					n := varintLength128(d)
107
108					set = append(set, payloadDelta|byte(n))
109					set = appendVarint128(set, d, n)
110				} else {
111					l, c := rangeLength(ids[i+1:], t, id, v)
112					m := uint64(l + 1)
113					n := varintLength64(m)
114
115					set = append(set, payloadRange|byte(n))
116					set = appendVarint64(set, m, n)
117
118					i += c
119					id = ids[i]
120					v = uint128Payload(id)
121				}
122			}
123
124			lastKSUID = id
125			lastValue = v
126		}
127	}
128	return CompressedSet(set)
129}
130
131func rangeLength(ids []KSUID, timestamp uint32, lastKSUID KSUID, lastValue uint128) (length int, count int) {
132	one := makeUint128(0, 1)
133
134	for i := range ids {
135		id := ids[i]
136
137		if id == lastKSUID {
138			continue
139		}
140
141		if id.Timestamp() != timestamp {
142			count = i
143			return
144		}
145
146		v := uint128Payload(id)
147
148		if sub128(v, lastValue) != one {
149			count = i
150			return
151		}
152
153		lastKSUID = id
154		lastValue = v
155		length++
156	}
157
158	count = len(ids)
159	return
160}
161
162func appendVarint128(b []byte, v uint128, n int) []byte {
163	c := v.bytes()
164	return append(b, c[len(c)-n:]...)
165}
166
167func appendVarint64(b []byte, v uint64, n int) []byte {
168	c := [8]byte{}
169	binary.BigEndian.PutUint64(c[:], v)
170	return append(b, c[len(c)-n:]...)
171}
172
173func appendVarint32(b []byte, v uint32, n int) []byte {
174	c := [4]byte{}
175	binary.BigEndian.PutUint32(c[:], v)
176	return append(b, c[len(c)-n:]...)
177}
178
179func varint128(b []byte) uint128 {
180	a := [16]byte{}
181	copy(a[16-len(b):], b)
182	return makeUint128FromPayload(a[:])
183}
184
185func varint64(b []byte) uint64 {
186	a := [8]byte{}
187	copy(a[8-len(b):], b)
188	return binary.BigEndian.Uint64(a[:])
189}
190
191func varint32(b []byte) uint32 {
192	a := [4]byte{}
193	copy(a[4-len(b):], b)
194	return binary.BigEndian.Uint32(a[:])
195}
196
197func varintLength128(v uint128) int {
198	if v[1] != 0 {
199		return 8 + varintLength64(v[1])
200	}
201	return varintLength64(v[0])
202}
203
204func varintLength64(v uint64) int {
205	switch {
206	case (v & 0xFFFFFFFFFFFFFF00) == 0:
207		return 1
208	case (v & 0xFFFFFFFFFFFF0000) == 0:
209		return 2
210	case (v & 0xFFFFFFFFFF000000) == 0:
211		return 3
212	case (v & 0xFFFFFFFF00000000) == 0:
213		return 4
214	case (v & 0xFFFFFF0000000000) == 0:
215		return 5
216	case (v & 0xFFFF000000000000) == 0:
217		return 6
218	case (v & 0xFF00000000000000) == 0:
219		return 7
220	default:
221		return 8
222	}
223}
224
225func varintLength32(v uint32) int {
226	switch {
227	case (v & 0xFFFFFF00) == 0:
228		return 1
229	case (v & 0xFFFF0000) == 0:
230		return 2
231	case (v & 0xFF000000) == 0:
232		return 3
233	default:
234		return 4
235	}
236}
237
238const (
239	rawKSUID     = 0
240	timeDelta    = (1 << 6)
241	payloadDelta = (1 << 7)
242	payloadRange = (1 << 6) | (1 << 7)
243)
244
245// CompressedSetIter is an iterator type returned by Set.Iter to produce the
246// list of KSUIDs stored in a set.
247//
248// Here's is how the iterator type is commonly used:
249//
250//	for it := set.Iter(); it.Next(); {
251//		id := it.KSUID
252//		// ...
253//	}
254//
255// CompressedSetIter values are not safe to use concurrently from multiple
256// goroutines.
257type CompressedSetIter struct {
258	// KSUID is modified by calls to the Next method to hold the KSUID loaded
259	// by the iterator.
260	KSUID KSUID
261
262	content []byte
263	offset  int
264
265	seqlength uint64
266	timestamp uint32
267	lastValue uint128
268}
269
270// Next moves the iterator forward, returning true if there a KSUID was found,
271// or false if the iterator as reached the end of the set it was created from.
272func (it *CompressedSetIter) Next() bool {
273	if it.seqlength != 0 {
274		value := incr128(it.lastValue)
275		it.KSUID = value.ksuid(it.timestamp)
276		it.seqlength--
277		it.lastValue = value
278		return true
279	}
280
281	if it.offset == len(it.content) {
282		return false
283	}
284
285	b := it.content[it.offset]
286	it.offset++
287
288	const mask = rawKSUID | timeDelta | payloadDelta | payloadRange
289	tag := int(b) & mask
290	cnt := int(b) & ^mask
291
292	switch tag {
293	case rawKSUID:
294		off0 := it.offset
295		off1 := off0 + byteLength
296
297		copy(it.KSUID[:], it.content[off0:off1])
298
299		it.offset = off1
300		it.timestamp = it.KSUID.Timestamp()
301		it.lastValue = uint128Payload(it.KSUID)
302
303	case timeDelta:
304		off0 := it.offset
305		off1 := off0 + cnt
306		off2 := off1 + payloadLengthInBytes
307
308		it.timestamp += varint32(it.content[off0:off1])
309
310		binary.BigEndian.PutUint32(it.KSUID[:timestampLengthInBytes], it.timestamp)
311		copy(it.KSUID[timestampLengthInBytes:], it.content[off1:off2])
312
313		it.offset = off2
314		it.lastValue = uint128Payload(it.KSUID)
315
316	case payloadDelta:
317		off0 := it.offset
318		off1 := off0 + cnt
319
320		delta := varint128(it.content[off0:off1])
321		value := add128(it.lastValue, delta)
322
323		it.KSUID = value.ksuid(it.timestamp)
324		it.offset = off1
325		it.lastValue = value
326
327	case payloadRange:
328		off0 := it.offset
329		off1 := off0 + cnt
330
331		value := incr128(it.lastValue)
332		it.KSUID = value.ksuid(it.timestamp)
333		it.seqlength = varint64(it.content[off0:off1])
334		it.offset = off1
335		it.seqlength--
336		it.lastValue = value
337
338	default:
339		panic("KSUID set iterator is reading malformed data")
340	}
341
342	return true
343}
344