1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package argon2 implements the key derivation function Argon2.
6// Argon2 was selected as the winner of the Password Hashing Competition and can
7// be used to derive cryptographic keys from passwords.
8//
9// For a detailed specification of Argon2 see [1].
10//
11// If you aren't sure which function you need, use Argon2id (IDKey) and
12// the parameter recommendations for your scenario.
13//
14//
15// Argon2i
16//
17// Argon2i (implemented by Key) is the side-channel resistant version of Argon2.
18// It uses data-independent memory access, which is preferred for password
19// hashing and password-based key derivation. Argon2i requires more passes over
20// memory than Argon2id to protect from trade-off attacks. The recommended
21// parameters (taken from [2]) for non-interactive operations are time=3 and to
22// use the maximum available memory.
23//
24//
25// Argon2id
26//
27// Argon2id (implemented by IDKey) is a hybrid version of Argon2 combining
28// Argon2i and Argon2d. It uses data-independent memory access for the first
29// half of the first iteration over the memory and data-dependent memory access
30// for the rest. Argon2id is side-channel resistant and provides better brute-
31// force cost savings due to time-memory tradeoffs than Argon2i. The recommended
32// parameters for non-interactive operations (taken from [2]) are time=1 and to
33// use the maximum available memory.
34//
35// [1] https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf
36// [2] https://tools.ietf.org/html/draft-irtf-cfrg-argon2-03#section-9.3
37package argon2
38
39import (
40	"encoding/binary"
41	"sync"
42
43	"golang.org/x/crypto/blake2b"
44)
45
46// The Argon2 version implemented by this package.
47const Version = 0x13
48
49const (
50	argon2d = iota
51	argon2i
52	argon2id
53)
54
55// Key derives a key from the password, salt, and cost parameters using Argon2i
56// returning a byte slice of length keyLen that can be used as cryptographic
57// key. The CPU cost and parallelism degree must be greater than zero.
58//
59// For example, you can get a derived key for e.g. AES-256 (which needs a
60// 32-byte key) by doing:
61//
62//      key := argon2.Key([]byte("some password"), salt, 3, 32*1024, 4, 32)
63//
64// The draft RFC recommends[2] time=3, and memory=32*1024 is a sensible number.
65// If using that amount of memory (32 MB) is not possible in some contexts then
66// the time parameter can be increased to compensate.
67//
68// The time parameter specifies the number of passes over the memory and the
69// memory parameter specifies the size of the memory in KiB. For example
70// memory=32*1024 sets the memory cost to ~32 MB. The number of threads can be
71// adjusted to the number of available CPUs. The cost parameters should be
72// increased as memory latency and CPU parallelism increases. Remember to get a
73// good random salt.
74func Key(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
75	return deriveKey(argon2i, password, salt, nil, nil, time, memory, threads, keyLen)
76}
77
78// IDKey derives a key from the password, salt, and cost parameters using
79// Argon2id returning a byte slice of length keyLen that can be used as
80// cryptographic key. The CPU cost and parallelism degree must be greater than
81// zero.
82//
83// For example, you can get a derived key for e.g. AES-256 (which needs a
84// 32-byte key) by doing:
85//
86//      key := argon2.IDKey([]byte("some password"), salt, 1, 64*1024, 4, 32)
87//
88// The draft RFC recommends[2] time=1, and memory=64*1024 is a sensible number.
89// If using that amount of memory (64 MB) is not possible in some contexts then
90// the time parameter can be increased to compensate.
91//
92// The time parameter specifies the number of passes over the memory and the
93// memory parameter specifies the size of the memory in KiB. For example
94// memory=64*1024 sets the memory cost to ~64 MB. The number of threads can be
95// adjusted to the numbers of available CPUs. The cost parameters should be
96// increased as memory latency and CPU parallelism increases. Remember to get a
97// good random salt.
98func IDKey(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
99	return deriveKey(argon2id, password, salt, nil, nil, time, memory, threads, keyLen)
100}
101
102func deriveKey(mode int, password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
103	if time < 1 {
104		panic("argon2: number of rounds too small")
105	}
106	if threads < 1 {
107		panic("argon2: parallelism degree too low")
108	}
109	h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
110
111	memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
112	if memory < 2*syncPoints*uint32(threads) {
113		memory = 2 * syncPoints * uint32(threads)
114	}
115	B := initBlocks(&h0, memory, uint32(threads))
116	processBlocks(B, time, memory, uint32(threads), mode)
117	return extractKey(B, memory, uint32(threads), keyLen)
118}
119
120const (
121	blockLength = 128
122	syncPoints  = 4
123)
124
125type block [blockLength]uint64
126
127func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte {
128	var (
129		h0     [blake2b.Size + 8]byte
130		params [24]byte
131		tmp    [4]byte
132	)
133
134	b2, _ := blake2b.New512(nil)
135	binary.LittleEndian.PutUint32(params[0:4], threads)
136	binary.LittleEndian.PutUint32(params[4:8], keyLen)
137	binary.LittleEndian.PutUint32(params[8:12], memory)
138	binary.LittleEndian.PutUint32(params[12:16], time)
139	binary.LittleEndian.PutUint32(params[16:20], uint32(Version))
140	binary.LittleEndian.PutUint32(params[20:24], uint32(mode))
141	b2.Write(params[:])
142	binary.LittleEndian.PutUint32(tmp[:], uint32(len(password)))
143	b2.Write(tmp[:])
144	b2.Write(password)
145	binary.LittleEndian.PutUint32(tmp[:], uint32(len(salt)))
146	b2.Write(tmp[:])
147	b2.Write(salt)
148	binary.LittleEndian.PutUint32(tmp[:], uint32(len(key)))
149	b2.Write(tmp[:])
150	b2.Write(key)
151	binary.LittleEndian.PutUint32(tmp[:], uint32(len(data)))
152	b2.Write(tmp[:])
153	b2.Write(data)
154	b2.Sum(h0[:0])
155	return h0
156}
157
158func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block {
159	var block0 [1024]byte
160	B := make([]block, memory)
161	for lane := uint32(0); lane < threads; lane++ {
162		j := lane * (memory / threads)
163		binary.LittleEndian.PutUint32(h0[blake2b.Size+4:], lane)
164
165		binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0)
166		blake2bHash(block0[:], h0[:])
167		for i := range B[j+0] {
168			B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:])
169		}
170
171		binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1)
172		blake2bHash(block0[:], h0[:])
173		for i := range B[j+1] {
174			B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:])
175		}
176	}
177	return B
178}
179
180func processBlocks(B []block, time, memory, threads uint32, mode int) {
181	lanes := memory / threads
182	segments := lanes / syncPoints
183
184	processSegment := func(n, slice, lane uint32, wg *sync.WaitGroup) {
185		var addresses, in, zero block
186		if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
187			in[0] = uint64(n)
188			in[1] = uint64(lane)
189			in[2] = uint64(slice)
190			in[3] = uint64(memory)
191			in[4] = uint64(time)
192			in[5] = uint64(mode)
193		}
194
195		index := uint32(0)
196		if n == 0 && slice == 0 {
197			index = 2 // we have already generated the first two blocks
198			if mode == argon2i || mode == argon2id {
199				in[6]++
200				processBlock(&addresses, &in, &zero)
201				processBlock(&addresses, &addresses, &zero)
202			}
203		}
204
205		offset := lane*lanes + slice*segments + index
206		var random uint64
207		for index < segments {
208			prev := offset - 1
209			if index == 0 && slice == 0 {
210				prev += lanes // last block in lane
211			}
212			if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
213				if index%blockLength == 0 {
214					in[6]++
215					processBlock(&addresses, &in, &zero)
216					processBlock(&addresses, &addresses, &zero)
217				}
218				random = addresses[index%blockLength]
219			} else {
220				random = B[prev][0]
221			}
222			newOffset := indexAlpha(random, lanes, segments, threads, n, slice, lane, index)
223			processBlockXOR(&B[offset], &B[prev], &B[newOffset])
224			index, offset = index+1, offset+1
225		}
226		wg.Done()
227	}
228
229	for n := uint32(0); n < time; n++ {
230		for slice := uint32(0); slice < syncPoints; slice++ {
231			var wg sync.WaitGroup
232			for lane := uint32(0); lane < threads; lane++ {
233				wg.Add(1)
234				go processSegment(n, slice, lane, &wg)
235			}
236			wg.Wait()
237		}
238	}
239
240}
241
242func extractKey(B []block, memory, threads, keyLen uint32) []byte {
243	lanes := memory / threads
244	for lane := uint32(0); lane < threads-1; lane++ {
245		for i, v := range B[(lane*lanes)+lanes-1] {
246			B[memory-1][i] ^= v
247		}
248	}
249
250	var block [1024]byte
251	for i, v := range B[memory-1] {
252		binary.LittleEndian.PutUint64(block[i*8:], v)
253	}
254	key := make([]byte, keyLen)
255	blake2bHash(key, block[:])
256	return key
257}
258
259func indexAlpha(rand uint64, lanes, segments, threads, n, slice, lane, index uint32) uint32 {
260	refLane := uint32(rand>>32) % threads
261	if n == 0 && slice == 0 {
262		refLane = lane
263	}
264	m, s := 3*segments, ((slice+1)%syncPoints)*segments
265	if lane == refLane {
266		m += index
267	}
268	if n == 0 {
269		m, s = slice*segments, 0
270		if slice == 0 || lane == refLane {
271			m += index
272		}
273	}
274	if index == 0 || lane == refLane {
275		m--
276	}
277	return phi(rand, uint64(m), uint64(s), refLane, lanes)
278}
279
280func phi(rand, m, s uint64, lane, lanes uint32) uint32 {
281	p := rand & 0xFFFFFFFF
282	p = (p * p) >> 32
283	p = (p * m) >> 32
284	return lane*lanes + uint32((s+m-(p+1))%uint64(lanes))
285}
286