1// Package xid is a globally unique id generator suited for web scale
2//
3// Xid is using Mongo Object ID algorithm to generate globally unique ids:
4// https://docs.mongodb.org/manual/reference/object-id/
5//
6//   - 4-byte value representing the seconds since the Unix epoch,
7//   - 3-byte machine identifier,
8//   - 2-byte process id, and
9//   - 3-byte counter, starting with a random value.
10//
11// The binary representation of the id is compatible with Mongo 12 bytes Object IDs.
12// The string representation is using base32 hex (w/o padding) for better space efficiency
13// when stored in that form (20 bytes). The hex variant of base32 is used to retain the
14// sortable property of the id.
15//
16// Xid doesn't use base64 because case sensitivity and the 2 non alphanum chars may be an
17// issue when transported as a string between various systems. Base36 wasn't retained either
18// because 1/ it's not standard 2/ the resulting size is not predictable (not bit aligned)
19// and 3/ it would not remain sortable. To validate a base32 `xid`, expect a 20 chars long,
20// all lowercase sequence of `a` to `v` letters and `0` to `9` numbers (`[0-9a-v]{20}`).
21//
22// UUID is 16 bytes (128 bits), snowflake is 8 bytes (64 bits), xid stands in between
23// with 12 bytes with a more compact string representation ready for the web and no
24// required configuration or central generation server.
25//
26// Features:
27//
28//   - Size: 12 bytes (96 bits), smaller than UUID, larger than snowflake
29//   - Base32 hex encoded by default (16 bytes storage when transported as printable string)
30//   - Non configured, you don't need set a unique machine and/or data center id
31//   - K-ordered
32//   - Embedded time with 1 second precision
33//   - Unicity guaranteed for 16,777,216 (24 bits) unique ids per second and per host/process
34//
35// Best used with xlog's RequestIDHandler (https://godoc.org/github.com/rs/xlog#RequestIDHandler).
36//
37// References:
38//
39//   - http://www.slideshare.net/davegardnerisme/unique-id-generation-in-distributed-systems
40//   - https://en.wikipedia.org/wiki/Universally_unique_identifier
41//   - https://blog.twitter.com/2010/announcing-snowflake
42package xid
43
44import (
45	"bytes"
46	"crypto/md5"
47	"crypto/rand"
48	"database/sql/driver"
49	"encoding/binary"
50	"errors"
51	"fmt"
52	"hash/crc32"
53	"io/ioutil"
54	"os"
55	"sort"
56	"sync/atomic"
57	"time"
58	"unsafe"
59)
60
61// Code inspired from mgo/bson ObjectId
62
63// ID represents a unique request id
64type ID [rawLen]byte
65
66const (
67	encodedLen = 20 // string encoded len
68	rawLen     = 12 // binary raw len
69
70	// encoding stores a custom version of the base32 encoding with lower case
71	// letters.
72	encoding = "0123456789abcdefghijklmnopqrstuv"
73)
74
75var (
76	// ErrInvalidID is returned when trying to unmarshal an invalid ID
77	ErrInvalidID = errors.New("xid: invalid ID")
78
79	// objectIDCounter is atomically incremented when generating a new ObjectId
80	// using NewObjectId() function. It's used as a counter part of an id.
81	// This id is initialized with a random value.
82	objectIDCounter = randInt()
83
84	// machineId stores machine id generated once and used in subsequent calls
85	// to NewObjectId function.
86	machineID = readMachineID()
87
88	// pid stores the current process id
89	pid = os.Getpid()
90
91	nilID ID
92
93	// dec is the decoding map for base32 encoding
94	dec [256]byte
95)
96
97func init() {
98	for i := 0; i < len(dec); i++ {
99		dec[i] = 0xFF
100	}
101	for i := 0; i < len(encoding); i++ {
102		dec[encoding[i]] = byte(i)
103	}
104
105	// If /proc/self/cpuset exists and is not /, we can assume that we are in a
106	// form of container and use the content of cpuset xor-ed with the PID in
107	// order get a reasonable machine global unique PID.
108	b, err := ioutil.ReadFile("/proc/self/cpuset")
109	if err == nil && len(b) > 1 {
110		pid ^= int(crc32.ChecksumIEEE(b))
111	}
112}
113
114// readMachineId generates machine id and puts it into the machineId global
115// variable. If this function fails to get the hostname, it will cause
116// a runtime error.
117func readMachineID() []byte {
118	id := make([]byte, 3)
119	hid, err := readPlatformMachineID()
120	if err != nil || len(hid) == 0 {
121		hid, err = os.Hostname()
122	}
123	if err == nil && len(hid) != 0 {
124		hw := md5.New()
125		hw.Write([]byte(hid))
126		copy(id, hw.Sum(nil))
127	} else {
128		// Fallback to rand number if machine id can't be gathered
129		if _, randErr := rand.Reader.Read(id); randErr != nil {
130			panic(fmt.Errorf("xid: cannot get hostname nor generate a random number: %v; %v", err, randErr))
131		}
132	}
133	return id
134}
135
136// randInt generates a random uint32
137func randInt() uint32 {
138	b := make([]byte, 3)
139	if _, err := rand.Reader.Read(b); err != nil {
140		panic(fmt.Errorf("xid: cannot generate random number: %v;", err))
141	}
142	return uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2])
143}
144
145// New generates a globally unique ID
146func New() ID {
147	return NewWithTime(time.Now())
148}
149
150// NewWithTime generates a globally unique ID with the passed in time
151func NewWithTime(t time.Time) ID {
152	var id ID
153	// Timestamp, 4 bytes, big endian
154	binary.BigEndian.PutUint32(id[:], uint32(t.Unix()))
155	// Machine, first 3 bytes of md5(hostname)
156	id[4] = machineID[0]
157	id[5] = machineID[1]
158	id[6] = machineID[2]
159	// Pid, 2 bytes, specs don't specify endianness, but we use big endian.
160	id[7] = byte(pid >> 8)
161	id[8] = byte(pid)
162	// Increment, 3 bytes, big endian
163	i := atomic.AddUint32(&objectIDCounter, 1)
164	id[9] = byte(i >> 16)
165	id[10] = byte(i >> 8)
166	id[11] = byte(i)
167	return id
168}
169
170// FromString reads an ID from its string representation
171func FromString(id string) (ID, error) {
172	i := &ID{}
173	err := i.UnmarshalText([]byte(id))
174	return *i, err
175}
176
177// String returns a base32 hex lowercased with no padding representation of the id (char set is 0-9, a-v).
178func (id ID) String() string {
179	text := make([]byte, encodedLen)
180	encode(text, id[:])
181	return *(*string)(unsafe.Pointer(&text))
182}
183
184// Encode encodes the id using base32 encoding, writing 20 bytes to dst and return it.
185func (id ID) Encode(dst []byte) []byte {
186	encode(dst, id[:])
187	return dst
188}
189
190// MarshalText implements encoding/text TextMarshaler interface
191func (id ID) MarshalText() ([]byte, error) {
192	text := make([]byte, encodedLen)
193	encode(text, id[:])
194	return text, nil
195}
196
197// MarshalJSON implements encoding/json Marshaler interface
198func (id ID) MarshalJSON() ([]byte, error) {
199	if id.IsNil() {
200		return []byte("null"), nil
201	}
202	text := make([]byte, encodedLen+2)
203	encode(text[1:encodedLen+1], id[:])
204	text[0], text[encodedLen+1] = '"', '"'
205	return text, nil
206}
207
208// encode by unrolling the stdlib base32 algorithm + removing all safe checks
209func encode(dst, id []byte) {
210	_ = dst[19]
211	_ = id[11]
212
213	dst[19] = encoding[(id[11]<<4)&0x1F]
214	dst[18] = encoding[(id[11]>>1)&0x1F]
215	dst[17] = encoding[(id[11]>>6)&0x1F|(id[10]<<2)&0x1F]
216	dst[16] = encoding[id[10]>>3]
217	dst[15] = encoding[id[9]&0x1F]
218	dst[14] = encoding[(id[9]>>5)|(id[8]<<3)&0x1F]
219	dst[13] = encoding[(id[8]>>2)&0x1F]
220	dst[12] = encoding[id[8]>>7|(id[7]<<1)&0x1F]
221	dst[11] = encoding[(id[7]>>4)&0x1F|(id[6]<<4)&0x1F]
222	dst[10] = encoding[(id[6]>>1)&0x1F]
223	dst[9] = encoding[(id[6]>>6)&0x1F|(id[5]<<2)&0x1F]
224	dst[8] = encoding[id[5]>>3]
225	dst[7] = encoding[id[4]&0x1F]
226	dst[6] = encoding[id[4]>>5|(id[3]<<3)&0x1F]
227	dst[5] = encoding[(id[3]>>2)&0x1F]
228	dst[4] = encoding[id[3]>>7|(id[2]<<1)&0x1F]
229	dst[3] = encoding[(id[2]>>4)&0x1F|(id[1]<<4)&0x1F]
230	dst[2] = encoding[(id[1]>>1)&0x1F]
231	dst[1] = encoding[(id[1]>>6)&0x1F|(id[0]<<2)&0x1F]
232	dst[0] = encoding[id[0]>>3]
233}
234
235// UnmarshalText implements encoding/text TextUnmarshaler interface
236func (id *ID) UnmarshalText(text []byte) error {
237	if len(text) != encodedLen {
238		return ErrInvalidID
239	}
240	for _, c := range text {
241		if dec[c] == 0xFF {
242			return ErrInvalidID
243		}
244	}
245	decode(id, text)
246	return nil
247}
248
249// UnmarshalJSON implements encoding/json Unmarshaler interface
250func (id *ID) UnmarshalJSON(b []byte) error {
251	s := string(b)
252	if s == "null" {
253		*id = nilID
254		return nil
255	}
256	return id.UnmarshalText(b[1 : len(b)-1])
257}
258
259// decode by unrolling the stdlib base32 algorithm + removing all safe checks
260func decode(id *ID, src []byte) {
261	_ = src[19]
262	_ = id[11]
263
264	id[11] = dec[src[17]]<<6 | dec[src[18]]<<1 | dec[src[19]]>>4
265	id[10] = dec[src[16]]<<3 | dec[src[17]]>>2
266	id[9] = dec[src[14]]<<5 | dec[src[15]]
267	id[8] = dec[src[12]]<<7 | dec[src[13]]<<2 | dec[src[14]]>>3
268	id[7] = dec[src[11]]<<4 | dec[src[12]]>>1
269	id[6] = dec[src[9]]<<6 | dec[src[10]]<<1 | dec[src[11]]>>4
270	id[5] = dec[src[8]]<<3 | dec[src[9]]>>2
271	id[4] = dec[src[6]]<<5 | dec[src[7]]
272	id[3] = dec[src[4]]<<7 | dec[src[5]]<<2 | dec[src[6]]>>3
273	id[2] = dec[src[3]]<<4 | dec[src[4]]>>1
274	id[1] = dec[src[1]]<<6 | dec[src[2]]<<1 | dec[src[3]]>>4
275	id[0] = dec[src[0]]<<3 | dec[src[1]]>>2
276}
277
278// Time returns the timestamp part of the id.
279// It's a runtime error to call this method with an invalid id.
280func (id ID) Time() time.Time {
281	// First 4 bytes of ObjectId is 32-bit big-endian seconds from epoch.
282	secs := int64(binary.BigEndian.Uint32(id[0:4]))
283	return time.Unix(secs, 0)
284}
285
286// Machine returns the 3-byte machine id part of the id.
287// It's a runtime error to call this method with an invalid id.
288func (id ID) Machine() []byte {
289	return id[4:7]
290}
291
292// Pid returns the process id part of the id.
293// It's a runtime error to call this method with an invalid id.
294func (id ID) Pid() uint16 {
295	return binary.BigEndian.Uint16(id[7:9])
296}
297
298// Counter returns the incrementing value part of the id.
299// It's a runtime error to call this method with an invalid id.
300func (id ID) Counter() int32 {
301	b := id[9:12]
302	// Counter is stored as big-endian 3-byte value
303	return int32(uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2]))
304}
305
306// Value implements the driver.Valuer interface.
307func (id ID) Value() (driver.Value, error) {
308	if id.IsNil() {
309		return nil, nil
310	}
311	b, err := id.MarshalText()
312	return string(b), err
313}
314
315// Scan implements the sql.Scanner interface.
316func (id *ID) Scan(value interface{}) (err error) {
317	switch val := value.(type) {
318	case string:
319		return id.UnmarshalText([]byte(val))
320	case []byte:
321		return id.UnmarshalText(val)
322	case nil:
323		*id = nilID
324		return nil
325	default:
326		return fmt.Errorf("xid: scanning unsupported type: %T", value)
327	}
328}
329
330// IsNil Returns true if this is a "nil" ID
331func (id ID) IsNil() bool {
332	return id == nilID
333}
334
335// NilID returns a zero value for `xid.ID`.
336func NilID() ID {
337	return nilID
338}
339
340// Bytes returns the byte array representation of `ID`
341func (id ID) Bytes() []byte {
342	return id[:]
343}
344
345// FromBytes convert the byte array representation of `ID` back to `ID`
346func FromBytes(b []byte) (ID, error) {
347	var id ID
348	if len(b) != rawLen {
349		return id, ErrInvalidID
350	}
351	copy(id[:], b)
352	return id, nil
353}
354
355// Compare returns an integer comparing two IDs. It behaves just like `bytes.Compare`.
356// The result will be 0 if two IDs are identical, -1 if current id is less than the other one,
357// and 1 if current id is greater than the other.
358func (id ID) Compare(other ID) int {
359	return bytes.Compare(id[:], other[:])
360}
361
362type sorter []ID
363
364func (s sorter) Len() int {
365	return len(s)
366}
367
368func (s sorter) Less(i, j int) bool {
369	return s[i].Compare(s[j]) < 0
370}
371
372func (s sorter) Swap(i, j int) {
373	s[i], s[j] = s[j], s[i]
374}
375
376// Sort sorts an array of IDs inplace.
377// It works by wrapping `[]ID` and use `sort.Sort`.
378func Sort(ids []ID) {
379	sort.Sort(sorter(ids))
380}
381