1// Copyright 2017 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package argon2 implements the key derivation function Argon2. 6// Argon2 was selected as the winner of the Password Hashing Competition and can 7// be used to derive cryptographic keys from passwords. 8// 9// For a detailed specification of Argon2 see [1]. 10// 11// If you aren't sure which function you need, use Argon2id (IDKey) and 12// the parameter recommendations for your scenario. 13// 14// 15// Argon2i 16// 17// Argon2i (implemented by Key) is the side-channel resistant version of Argon2. 18// It uses data-independent memory access, which is preferred for password 19// hashing and password-based key derivation. Argon2i requires more passes over 20// memory than Argon2id to protect from trade-off attacks. The recommended 21// parameters (taken from [2]) for non-interactive operations are time=3 and to 22// use the maximum available memory. 23// 24// 25// Argon2id 26// 27// Argon2id (implemented by IDKey) is a hybrid version of Argon2 combining 28// Argon2i and Argon2d. It uses data-independent memory access for the first 29// half of the first iteration over the memory and data-dependent memory access 30// for the rest. Argon2id is side-channel resistant and provides better brute- 31// force cost savings due to time-memory tradeoffs than Argon2i. The recommended 32// parameters for non-interactive operations (taken from [2]) are time=1 and to 33// use the maximum available memory. 34// 35// [1] https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf 36// [2] https://tools.ietf.org/html/draft-irtf-cfrg-argon2-03#section-9.3 37package argon2 38 39import ( 40 "encoding/binary" 41 "sync" 42 43 "golang.org/x/crypto/blake2b" 44) 45 46// The Argon2 version implemented by this package. 47const Version = 0x13 48 49const ( 50 argon2d = iota 51 argon2i 52 argon2id 53) 54 55// Key derives a key from the password, salt, and cost parameters using Argon2i 56// returning a byte slice of length keyLen that can be used as cryptographic 57// key. The CPU cost and parallelism degree must be greater than zero. 58// 59// For example, you can get a derived key for e.g. AES-256 (which needs a 60// 32-byte key) by doing: 61// 62// key := argon2.Key([]byte("some password"), salt, 3, 32*1024, 4, 32) 63// 64// The draft RFC recommends[2] time=3, and memory=32*1024 is a sensible number. 65// If using that amount of memory (32 MB) is not possible in some contexts then 66// the time parameter can be increased to compensate. 67// 68// The time parameter specifies the number of passes over the memory and the 69// memory parameter specifies the size of the memory in KiB. For example 70// memory=32*1024 sets the memory cost to ~32 MB. The number of threads can be 71// adjusted to the number of available CPUs. The cost parameters should be 72// increased as memory latency and CPU parallelism increases. Remember to get a 73// good random salt. 74func Key(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { 75 return deriveKey(argon2i, password, salt, nil, nil, time, memory, threads, keyLen) 76} 77 78// IDKey derives a key from the password, salt, and cost parameters using 79// Argon2id returning a byte slice of length keyLen that can be used as 80// cryptographic key. The CPU cost and parallelism degree must be greater than 81// zero. 82// 83// For example, you can get a derived key for e.g. AES-256 (which needs a 84// 32-byte key) by doing: 85// 86// key := argon2.IDKey([]byte("some password"), salt, 1, 64*1024, 4, 32) 87// 88// The draft RFC recommends[2] time=1, and memory=64*1024 is a sensible number. 89// If using that amount of memory (64 MB) is not possible in some contexts then 90// the time parameter can be increased to compensate. 91// 92// The time parameter specifies the number of passes over the memory and the 93// memory parameter specifies the size of the memory in KiB. For example 94// memory=64*1024 sets the memory cost to ~64 MB. The number of threads can be 95// adjusted to the numbers of available CPUs. The cost parameters should be 96// increased as memory latency and CPU parallelism increases. Remember to get a 97// good random salt. 98func IDKey(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { 99 return deriveKey(argon2id, password, salt, nil, nil, time, memory, threads, keyLen) 100} 101 102func deriveKey(mode int, password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { 103 if time < 1 { 104 panic("argon2: number of rounds too small") 105 } 106 if threads < 1 { 107 panic("argon2: parallelism degree too low") 108 } 109 h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode) 110 111 memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads)) 112 if memory < 2*syncPoints*uint32(threads) { 113 memory = 2 * syncPoints * uint32(threads) 114 } 115 B := initBlocks(&h0, memory, uint32(threads)) 116 processBlocks(B, time, memory, uint32(threads), mode) 117 return extractKey(B, memory, uint32(threads), keyLen) 118} 119 120const ( 121 blockLength = 128 122 syncPoints = 4 123) 124 125type block [blockLength]uint64 126 127func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte { 128 var ( 129 h0 [blake2b.Size + 8]byte 130 params [24]byte 131 tmp [4]byte 132 ) 133 134 b2, _ := blake2b.New512(nil) 135 binary.LittleEndian.PutUint32(params[0:4], threads) 136 binary.LittleEndian.PutUint32(params[4:8], keyLen) 137 binary.LittleEndian.PutUint32(params[8:12], memory) 138 binary.LittleEndian.PutUint32(params[12:16], time) 139 binary.LittleEndian.PutUint32(params[16:20], uint32(Version)) 140 binary.LittleEndian.PutUint32(params[20:24], uint32(mode)) 141 b2.Write(params[:]) 142 binary.LittleEndian.PutUint32(tmp[:], uint32(len(password))) 143 b2.Write(tmp[:]) 144 b2.Write(password) 145 binary.LittleEndian.PutUint32(tmp[:], uint32(len(salt))) 146 b2.Write(tmp[:]) 147 b2.Write(salt) 148 binary.LittleEndian.PutUint32(tmp[:], uint32(len(key))) 149 b2.Write(tmp[:]) 150 b2.Write(key) 151 binary.LittleEndian.PutUint32(tmp[:], uint32(len(data))) 152 b2.Write(tmp[:]) 153 b2.Write(data) 154 b2.Sum(h0[:0]) 155 return h0 156} 157 158func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block { 159 var block0 [1024]byte 160 B := make([]block, memory) 161 for lane := uint32(0); lane < threads; lane++ { 162 j := lane * (memory / threads) 163 binary.LittleEndian.PutUint32(h0[blake2b.Size+4:], lane) 164 165 binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0) 166 blake2bHash(block0[:], h0[:]) 167 for i := range B[j+0] { 168 B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:]) 169 } 170 171 binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1) 172 blake2bHash(block0[:], h0[:]) 173 for i := range B[j+1] { 174 B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:]) 175 } 176 } 177 return B 178} 179 180func processBlocks(B []block, time, memory, threads uint32, mode int) { 181 lanes := memory / threads 182 segments := lanes / syncPoints 183 184 processSegment := func(n, slice, lane uint32, wg *sync.WaitGroup) { 185 var addresses, in, zero block 186 if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) { 187 in[0] = uint64(n) 188 in[1] = uint64(lane) 189 in[2] = uint64(slice) 190 in[3] = uint64(memory) 191 in[4] = uint64(time) 192 in[5] = uint64(mode) 193 } 194 195 index := uint32(0) 196 if n == 0 && slice == 0 { 197 index = 2 // we have already generated the first two blocks 198 if mode == argon2i || mode == argon2id { 199 in[6]++ 200 processBlock(&addresses, &in, &zero) 201 processBlock(&addresses, &addresses, &zero) 202 } 203 } 204 205 offset := lane*lanes + slice*segments + index 206 var random uint64 207 for index < segments { 208 prev := offset - 1 209 if index == 0 && slice == 0 { 210 prev += lanes // last block in lane 211 } 212 if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) { 213 if index%blockLength == 0 { 214 in[6]++ 215 processBlock(&addresses, &in, &zero) 216 processBlock(&addresses, &addresses, &zero) 217 } 218 random = addresses[index%blockLength] 219 } else { 220 random = B[prev][0] 221 } 222 newOffset := indexAlpha(random, lanes, segments, threads, n, slice, lane, index) 223 processBlockXOR(&B[offset], &B[prev], &B[newOffset]) 224 index, offset = index+1, offset+1 225 } 226 wg.Done() 227 } 228 229 for n := uint32(0); n < time; n++ { 230 for slice := uint32(0); slice < syncPoints; slice++ { 231 var wg sync.WaitGroup 232 for lane := uint32(0); lane < threads; lane++ { 233 wg.Add(1) 234 go processSegment(n, slice, lane, &wg) 235 } 236 wg.Wait() 237 } 238 } 239 240} 241 242func extractKey(B []block, memory, threads, keyLen uint32) []byte { 243 lanes := memory / threads 244 for lane := uint32(0); lane < threads-1; lane++ { 245 for i, v := range B[(lane*lanes)+lanes-1] { 246 B[memory-1][i] ^= v 247 } 248 } 249 250 var block [1024]byte 251 for i, v := range B[memory-1] { 252 binary.LittleEndian.PutUint64(block[i*8:], v) 253 } 254 key := make([]byte, keyLen) 255 blake2bHash(key, block[:]) 256 return key 257} 258 259func indexAlpha(rand uint64, lanes, segments, threads, n, slice, lane, index uint32) uint32 { 260 refLane := uint32(rand>>32) % threads 261 if n == 0 && slice == 0 { 262 refLane = lane 263 } 264 m, s := 3*segments, ((slice+1)%syncPoints)*segments 265 if lane == refLane { 266 m += index 267 } 268 if n == 0 { 269 m, s = slice*segments, 0 270 if slice == 0 || lane == refLane { 271 m += index 272 } 273 } 274 if index == 0 || lane == refLane { 275 m-- 276 } 277 return phi(rand, uint64(m), uint64(s), refLane, lanes) 278} 279 280func phi(rand, m, s uint64, lane, lanes uint32) uint32 { 281 p := rand & 0xFFFFFFFF 282 p = (p * p) >> 32 283 p = (p * m) >> 32 284 return lane*lanes + uint32((s+m-(p+1))%uint64(lanes)) 285} 286