1// Package xid is a globally unique id generator suited for web scale 2// 3// Xid is using Mongo Object ID algorithm to generate globally unique ids: 4// https://docs.mongodb.org/manual/reference/object-id/ 5// 6// - 4-byte value representing the seconds since the Unix epoch, 7// - 3-byte machine identifier, 8// - 2-byte process id, and 9// - 3-byte counter, starting with a random value. 10// 11// The binary representation of the id is compatible with Mongo 12 bytes Object IDs. 12// The string representation is using base32 hex (w/o padding) for better space efficiency 13// when stored in that form (20 bytes). The hex variant of base32 is used to retain the 14// sortable property of the id. 15// 16// Xid doesn't use base64 because case sensitivity and the 2 non alphanum chars may be an 17// issue when transported as a string between various systems. Base36 wasn't retained either 18// because 1/ it's not standard 2/ the resulting size is not predictable (not bit aligned) 19// and 3/ it would not remain sortable. To validate a base32 `xid`, expect a 20 chars long, 20// all lowercase sequence of `a` to `v` letters and `0` to `9` numbers (`[0-9a-v]{20}`). 21// 22// UUID is 16 bytes (128 bits), snowflake is 8 bytes (64 bits), xid stands in between 23// with 12 bytes with a more compact string representation ready for the web and no 24// required configuration or central generation server. 25// 26// Features: 27// 28// - Size: 12 bytes (96 bits), smaller than UUID, larger than snowflake 29// - Base32 hex encoded by default (16 bytes storage when transported as printable string) 30// - Non configured, you don't need set a unique machine and/or data center id 31// - K-ordered 32// - Embedded time with 1 second precision 33// - Unicity guaranteed for 16,777,216 (24 bits) unique ids per second and per host/process 34// 35// Best used with xlog's RequestIDHandler (https://godoc.org/github.com/rs/xlog#RequestIDHandler). 36// 37// References: 38// 39// - http://www.slideshare.net/davegardnerisme/unique-id-generation-in-distributed-systems 40// - https://en.wikipedia.org/wiki/Universally_unique_identifier 41// - https://blog.twitter.com/2010/announcing-snowflake 42package xid 43 44import ( 45 "bytes" 46 "crypto/md5" 47 "crypto/rand" 48 "database/sql/driver" 49 "encoding/binary" 50 "errors" 51 "fmt" 52 "hash/crc32" 53 "io/ioutil" 54 "os" 55 "sort" 56 "sync/atomic" 57 "time" 58 "unsafe" 59) 60 61// Code inspired from mgo/bson ObjectId 62 63// ID represents a unique request id 64type ID [rawLen]byte 65 66const ( 67 encodedLen = 20 // string encoded len 68 rawLen = 12 // binary raw len 69 70 // encoding stores a custom version of the base32 encoding with lower case 71 // letters. 72 encoding = "0123456789abcdefghijklmnopqrstuv" 73) 74 75var ( 76 // ErrInvalidID is returned when trying to unmarshal an invalid ID 77 ErrInvalidID = errors.New("xid: invalid ID") 78 79 // objectIDCounter is atomically incremented when generating a new ObjectId 80 // using NewObjectId() function. It's used as a counter part of an id. 81 // This id is initialized with a random value. 82 objectIDCounter = randInt() 83 84 // machineId stores machine id generated once and used in subsequent calls 85 // to NewObjectId function. 86 machineID = readMachineID() 87 88 // pid stores the current process id 89 pid = os.Getpid() 90 91 nilID ID 92 93 // dec is the decoding map for base32 encoding 94 dec [256]byte 95) 96 97func init() { 98 for i := 0; i < len(dec); i++ { 99 dec[i] = 0xFF 100 } 101 for i := 0; i < len(encoding); i++ { 102 dec[encoding[i]] = byte(i) 103 } 104 105 // If /proc/self/cpuset exists and is not /, we can assume that we are in a 106 // form of container and use the content of cpuset xor-ed with the PID in 107 // order get a reasonable machine global unique PID. 108 b, err := ioutil.ReadFile("/proc/self/cpuset") 109 if err == nil && len(b) > 1 { 110 pid ^= int(crc32.ChecksumIEEE(b)) 111 } 112} 113 114// readMachineId generates machine id and puts it into the machineId global 115// variable. If this function fails to get the hostname, it will cause 116// a runtime error. 117func readMachineID() []byte { 118 id := make([]byte, 3) 119 hid, err := readPlatformMachineID() 120 if err != nil || len(hid) == 0 { 121 hid, err = os.Hostname() 122 } 123 if err == nil && len(hid) != 0 { 124 hw := md5.New() 125 hw.Write([]byte(hid)) 126 copy(id, hw.Sum(nil)) 127 } else { 128 // Fallback to rand number if machine id can't be gathered 129 if _, randErr := rand.Reader.Read(id); randErr != nil { 130 panic(fmt.Errorf("xid: cannot get hostname nor generate a random number: %v; %v", err, randErr)) 131 } 132 } 133 return id 134} 135 136// randInt generates a random uint32 137func randInt() uint32 { 138 b := make([]byte, 3) 139 if _, err := rand.Reader.Read(b); err != nil { 140 panic(fmt.Errorf("xid: cannot generate random number: %v;", err)) 141 } 142 return uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2]) 143} 144 145// New generates a globally unique ID 146func New() ID { 147 return NewWithTime(time.Now()) 148} 149 150// NewWithTime generates a globally unique ID with the passed in time 151func NewWithTime(t time.Time) ID { 152 var id ID 153 // Timestamp, 4 bytes, big endian 154 binary.BigEndian.PutUint32(id[:], uint32(t.Unix())) 155 // Machine, first 3 bytes of md5(hostname) 156 id[4] = machineID[0] 157 id[5] = machineID[1] 158 id[6] = machineID[2] 159 // Pid, 2 bytes, specs don't specify endianness, but we use big endian. 160 id[7] = byte(pid >> 8) 161 id[8] = byte(pid) 162 // Increment, 3 bytes, big endian 163 i := atomic.AddUint32(&objectIDCounter, 1) 164 id[9] = byte(i >> 16) 165 id[10] = byte(i >> 8) 166 id[11] = byte(i) 167 return id 168} 169 170// FromString reads an ID from its string representation 171func FromString(id string) (ID, error) { 172 i := &ID{} 173 err := i.UnmarshalText([]byte(id)) 174 return *i, err 175} 176 177// String returns a base32 hex lowercased with no padding representation of the id (char set is 0-9, a-v). 178func (id ID) String() string { 179 text := make([]byte, encodedLen) 180 encode(text, id[:]) 181 return *(*string)(unsafe.Pointer(&text)) 182} 183 184// Encode encodes the id using base32 encoding, writing 20 bytes to dst and return it. 185func (id ID) Encode(dst []byte) []byte { 186 encode(dst, id[:]) 187 return dst 188} 189 190// MarshalText implements encoding/text TextMarshaler interface 191func (id ID) MarshalText() ([]byte, error) { 192 text := make([]byte, encodedLen) 193 encode(text, id[:]) 194 return text, nil 195} 196 197// MarshalJSON implements encoding/json Marshaler interface 198func (id ID) MarshalJSON() ([]byte, error) { 199 if id.IsNil() { 200 return []byte("null"), nil 201 } 202 text := make([]byte, encodedLen+2) 203 encode(text[1:encodedLen+1], id[:]) 204 text[0], text[encodedLen+1] = '"', '"' 205 return text, nil 206} 207 208// encode by unrolling the stdlib base32 algorithm + removing all safe checks 209func encode(dst, id []byte) { 210 _ = dst[19] 211 _ = id[11] 212 213 dst[19] = encoding[(id[11]<<4)&0x1F] 214 dst[18] = encoding[(id[11]>>1)&0x1F] 215 dst[17] = encoding[(id[11]>>6)&0x1F|(id[10]<<2)&0x1F] 216 dst[16] = encoding[id[10]>>3] 217 dst[15] = encoding[id[9]&0x1F] 218 dst[14] = encoding[(id[9]>>5)|(id[8]<<3)&0x1F] 219 dst[13] = encoding[(id[8]>>2)&0x1F] 220 dst[12] = encoding[id[8]>>7|(id[7]<<1)&0x1F] 221 dst[11] = encoding[(id[7]>>4)&0x1F|(id[6]<<4)&0x1F] 222 dst[10] = encoding[(id[6]>>1)&0x1F] 223 dst[9] = encoding[(id[6]>>6)&0x1F|(id[5]<<2)&0x1F] 224 dst[8] = encoding[id[5]>>3] 225 dst[7] = encoding[id[4]&0x1F] 226 dst[6] = encoding[id[4]>>5|(id[3]<<3)&0x1F] 227 dst[5] = encoding[(id[3]>>2)&0x1F] 228 dst[4] = encoding[id[3]>>7|(id[2]<<1)&0x1F] 229 dst[3] = encoding[(id[2]>>4)&0x1F|(id[1]<<4)&0x1F] 230 dst[2] = encoding[(id[1]>>1)&0x1F] 231 dst[1] = encoding[(id[1]>>6)&0x1F|(id[0]<<2)&0x1F] 232 dst[0] = encoding[id[0]>>3] 233} 234 235// UnmarshalText implements encoding/text TextUnmarshaler interface 236func (id *ID) UnmarshalText(text []byte) error { 237 if len(text) != encodedLen { 238 return ErrInvalidID 239 } 240 for _, c := range text { 241 if dec[c] == 0xFF { 242 return ErrInvalidID 243 } 244 } 245 decode(id, text) 246 return nil 247} 248 249// UnmarshalJSON implements encoding/json Unmarshaler interface 250func (id *ID) UnmarshalJSON(b []byte) error { 251 s := string(b) 252 if s == "null" { 253 *id = nilID 254 return nil 255 } 256 return id.UnmarshalText(b[1 : len(b)-1]) 257} 258 259// decode by unrolling the stdlib base32 algorithm + removing all safe checks 260func decode(id *ID, src []byte) { 261 _ = src[19] 262 _ = id[11] 263 264 id[11] = dec[src[17]]<<6 | dec[src[18]]<<1 | dec[src[19]]>>4 265 id[10] = dec[src[16]]<<3 | dec[src[17]]>>2 266 id[9] = dec[src[14]]<<5 | dec[src[15]] 267 id[8] = dec[src[12]]<<7 | dec[src[13]]<<2 | dec[src[14]]>>3 268 id[7] = dec[src[11]]<<4 | dec[src[12]]>>1 269 id[6] = dec[src[9]]<<6 | dec[src[10]]<<1 | dec[src[11]]>>4 270 id[5] = dec[src[8]]<<3 | dec[src[9]]>>2 271 id[4] = dec[src[6]]<<5 | dec[src[7]] 272 id[3] = dec[src[4]]<<7 | dec[src[5]]<<2 | dec[src[6]]>>3 273 id[2] = dec[src[3]]<<4 | dec[src[4]]>>1 274 id[1] = dec[src[1]]<<6 | dec[src[2]]<<1 | dec[src[3]]>>4 275 id[0] = dec[src[0]]<<3 | dec[src[1]]>>2 276} 277 278// Time returns the timestamp part of the id. 279// It's a runtime error to call this method with an invalid id. 280func (id ID) Time() time.Time { 281 // First 4 bytes of ObjectId is 32-bit big-endian seconds from epoch. 282 secs := int64(binary.BigEndian.Uint32(id[0:4])) 283 return time.Unix(secs, 0) 284} 285 286// Machine returns the 3-byte machine id part of the id. 287// It's a runtime error to call this method with an invalid id. 288func (id ID) Machine() []byte { 289 return id[4:7] 290} 291 292// Pid returns the process id part of the id. 293// It's a runtime error to call this method with an invalid id. 294func (id ID) Pid() uint16 { 295 return binary.BigEndian.Uint16(id[7:9]) 296} 297 298// Counter returns the incrementing value part of the id. 299// It's a runtime error to call this method with an invalid id. 300func (id ID) Counter() int32 { 301 b := id[9:12] 302 // Counter is stored as big-endian 3-byte value 303 return int32(uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2])) 304} 305 306// Value implements the driver.Valuer interface. 307func (id ID) Value() (driver.Value, error) { 308 if id.IsNil() { 309 return nil, nil 310 } 311 b, err := id.MarshalText() 312 return string(b), err 313} 314 315// Scan implements the sql.Scanner interface. 316func (id *ID) Scan(value interface{}) (err error) { 317 switch val := value.(type) { 318 case string: 319 return id.UnmarshalText([]byte(val)) 320 case []byte: 321 return id.UnmarshalText(val) 322 case nil: 323 *id = nilID 324 return nil 325 default: 326 return fmt.Errorf("xid: scanning unsupported type: %T", value) 327 } 328} 329 330// IsNil Returns true if this is a "nil" ID 331func (id ID) IsNil() bool { 332 return id == nilID 333} 334 335// NilID returns a zero value for `xid.ID`. 336func NilID() ID { 337 return nilID 338} 339 340// Bytes returns the byte array representation of `ID` 341func (id ID) Bytes() []byte { 342 return id[:] 343} 344 345// FromBytes convert the byte array representation of `ID` back to `ID` 346func FromBytes(b []byte) (ID, error) { 347 var id ID 348 if len(b) != rawLen { 349 return id, ErrInvalidID 350 } 351 copy(id[:], b) 352 return id, nil 353} 354 355// Compare returns an integer comparing two IDs. It behaves just like `bytes.Compare`. 356// The result will be 0 if two IDs are identical, -1 if current id is less than the other one, 357// and 1 if current id is greater than the other. 358func (id ID) Compare(other ID) int { 359 return bytes.Compare(id[:], other[:]) 360} 361 362type sorter []ID 363 364func (s sorter) Len() int { 365 return len(s) 366} 367 368func (s sorter) Less(i, j int) bool { 369 return s[i].Compare(s[j]) < 0 370} 371 372func (s sorter) Swap(i, j int) { 373 s[i], s[j] = s[j], s[i] 374} 375 376// Sort sorts an array of IDs inplace. 377// It works by wrapping `[]ID` and use `sort.Sort`. 378func Sort(ids []ID) { 379 sort.Sort(sorter(ids)) 380} 381