1 // Copyright 2019 The CryptoCorrosion Contributors
2 // Copyright 2020 Developers of the Rand project.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 //! The ChaCha random number generator.
11 
12 use ppv_lite86::{dispatch, dispatch_light128};
13 
14 pub use ppv_lite86::Machine;
15 use ppv_lite86::{vec128_storage, ArithOps, BitOps32, LaneWords4, MultiLane, StoreBytes, Vec4};
16 
17 pub(crate) const BLOCK: usize = 64;
18 pub(crate) const BLOCK64: u64 = BLOCK as u64;
19 const LOG2_BUFBLOCKS: u64 = 2;
20 const BUFBLOCKS: u64 = 1 << LOG2_BUFBLOCKS;
21 pub(crate) const BUFSZ64: u64 = BLOCK64 * BUFBLOCKS;
22 pub(crate) const BUFSZ: usize = BUFSZ64 as usize;
23 
24 const STREAM_PARAM_NONCE: u32 = 1;
25 const STREAM_PARAM_BLOCK: u32 = 0;
26 
27 #[derive(Clone, PartialEq, Eq)]
28 pub struct ChaCha {
29     pub(crate) b: vec128_storage,
30     pub(crate) c: vec128_storage,
31     pub(crate) d: vec128_storage,
32 }
33 
34 #[derive(Clone)]
35 pub struct State<V> {
36     pub(crate) a: V,
37     pub(crate) b: V,
38     pub(crate) c: V,
39     pub(crate) d: V,
40 }
41 
42 #[inline(always)]
round<V: ArithOps + BitOps32>(mut x: State<V>) -> State<V>43 pub(crate) fn round<V: ArithOps + BitOps32>(mut x: State<V>) -> State<V> {
44     x.a += x.b;
45     x.d = (x.d ^ x.a).rotate_each_word_right16();
46     x.c += x.d;
47     x.b = (x.b ^ x.c).rotate_each_word_right20();
48     x.a += x.b;
49     x.d = (x.d ^ x.a).rotate_each_word_right24();
50     x.c += x.d;
51     x.b = (x.b ^ x.c).rotate_each_word_right25();
52     x
53 }
54 
55 #[inline(always)]
diagonalize<V: LaneWords4>(mut x: State<V>) -> State<V>56 pub(crate) fn diagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> {
57     x.b = x.b.shuffle_lane_words3012();
58     x.c = x.c.shuffle_lane_words2301();
59     x.d = x.d.shuffle_lane_words1230();
60     x
61 }
62 #[inline(always)]
undiagonalize<V: LaneWords4>(mut x: State<V>) -> State<V>63 pub(crate) fn undiagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> {
64     x.b = x.b.shuffle_lane_words1230();
65     x.c = x.c.shuffle_lane_words2301();
66     x.d = x.d.shuffle_lane_words3012();
67     x
68 }
69 
70 impl ChaCha {
71     #[inline(always)]
new(key: &[u8; 32], nonce: &[u8]) -> Self72     pub fn new(key: &[u8; 32], nonce: &[u8]) -> Self {
73         init_chacha(key, nonce)
74     }
75 
76     #[inline(always)]
pos64<M: Machine>(&self, m: M) -> u6477     fn pos64<M: Machine>(&self, m: M) -> u64 {
78         let d: M::u32x4 = m.unpack(self.d);
79         ((d.extract(1) as u64) << 32) | d.extract(0) as u64
80     }
81 
82     /// Produce 4 blocks of output, advancing the state
83     #[inline(always)]
refill4(&mut self, drounds: u32, out: &mut [u8; BUFSZ])84     pub fn refill4(&mut self, drounds: u32, out: &mut [u8; BUFSZ]) {
85         refill_wide(self, drounds, out)
86     }
87 
88     #[inline(always)]
set_block_pos(&mut self, value: u64)89     pub fn set_block_pos(&mut self, value: u64) {
90         set_stream_param(self, STREAM_PARAM_BLOCK, value)
91     }
92 
93     #[inline(always)]
get_block_pos(&self) -> u6494     pub fn get_block_pos(&self) -> u64 {
95         get_stream_param(self, STREAM_PARAM_BLOCK)
96     }
97 
98     #[inline(always)]
set_nonce(&mut self, value: u64)99     pub fn set_nonce(&mut self, value: u64) {
100         set_stream_param(self, STREAM_PARAM_NONCE, value)
101     }
102 
103     #[inline(always)]
get_nonce(&self) -> u64104     pub fn get_nonce(&self) -> u64 {
105         get_stream_param(self, STREAM_PARAM_NONCE)
106     }
107 
108     #[inline(always)]
get_seed(&self) -> [u8; 32]109     pub fn get_seed(&self) -> [u8; 32] {
110         get_seed(self)
111     }
112 }
113 
114 #[allow(clippy::many_single_char_names)]
115 #[inline(always)]
refill_wide_impl<Mach: Machine>( m: Mach, state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ], )116 fn refill_wide_impl<Mach: Machine>(
117     m: Mach, state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ],
118 ) {
119     let k = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]);
120     let mut pos = state.pos64(m);
121     let d0: Mach::u32x4 = m.unpack(state.d);
122     pos = pos.wrapping_add(1);
123     let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
124     pos = pos.wrapping_add(1);
125     let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
126     pos = pos.wrapping_add(1);
127     let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
128 
129     let b = m.unpack(state.b);
130     let c = m.unpack(state.c);
131     let mut x = State {
132         a: Mach::u32x4x4::from_lanes([k, k, k, k]),
133         b: Mach::u32x4x4::from_lanes([b, b, b, b]),
134         c: Mach::u32x4x4::from_lanes([c, c, c, c]),
135         d: m.unpack(Mach::u32x4x4::from_lanes([d0, d1, d2, d3]).into()),
136     };
137     for _ in 0..drounds {
138         x = round(x);
139         x = undiagonalize(round(diagonalize(x)));
140     }
141     let mut pos = state.pos64(m);
142     let d0: Mach::u32x4 = m.unpack(state.d);
143     pos = pos.wrapping_add(1);
144     let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
145     pos = pos.wrapping_add(1);
146     let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
147     pos = pos.wrapping_add(1);
148     let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
149     pos = pos.wrapping_add(1);
150     let d4 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
151 
152     let (a, b, c, d) = (
153         x.a.to_lanes(),
154         x.b.to_lanes(),
155         x.c.to_lanes(),
156         x.d.to_lanes(),
157     );
158     let sb = m.unpack(state.b);
159     let sc = m.unpack(state.c);
160     let sd = [m.unpack(state.d), d1, d2, d3];
161     state.d = d4.into();
162     let mut words = out.chunks_exact_mut(16);
163     for ((((&a, &b), &c), &d), &sd) in a.iter().zip(&b).zip(&c).zip(&d).zip(&sd) {
164         (a + k).write_le(words.next().unwrap());
165         (b + sb).write_le(words.next().unwrap());
166         (c + sc).write_le(words.next().unwrap());
167         (d + sd).write_le(words.next().unwrap());
168     }
169 }
170 
171 dispatch!(m, Mach, {
172     fn refill_wide(state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ]) {
173         refill_wide_impl(m, state, drounds, out);
174     }
175 });
176 
177 // Single-block, rounds-only; shared by try_apply_keystream for tails shorter than BUFSZ
178 // and XChaCha's setup step.
179 dispatch!(m, Mach, {
180     fn refill_narrow_rounds(state: &mut ChaCha, drounds: u32) -> State<vec128_storage> {
181         let k: Mach::u32x4 = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]);
182         let mut x = State {
183             a: k,
184             b: m.unpack(state.b),
185             c: m.unpack(state.c),
186             d: m.unpack(state.d),
187         };
188         for _ in 0..drounds {
189             x = round(x);
190             x = undiagonalize(round(diagonalize(x)));
191         }
192         State {
193             a: x.a.into(),
194             b: x.b.into(),
195             c: x.c.into(),
196             d: x.d.into(),
197         }
198     }
199 });
200 
201 dispatch_light128!(m, Mach, {
202     fn set_stream_param(state: &mut ChaCha, param: u32, value: u64) {
203         let d: Mach::u32x4 = m.unpack(state.d);
204         state.d = d
205             .insert((value >> 32) as u32, (param << 1) | 1)
206             .insert(value as u32, param << 1)
207             .into();
208     }
209 });
210 
211 dispatch_light128!(m, Mach, {
212     fn get_stream_param(state: &ChaCha, param: u32) -> u64 {
213         let d: Mach::u32x4 = m.unpack(state.d);
214         ((d.extract((param << 1) | 1) as u64) << 32) | d.extract(param << 1) as u64
215     }
216 });
217 
218 dispatch_light128!(m, Mach, {
219     fn get_seed(state: &ChaCha) -> [u8; 32] {
220         let b: Mach::u32x4 = m.unpack(state.b);
221         let c: Mach::u32x4 = m.unpack(state.c);
222         let mut key = [0u8; 32];
223         b.write_le(&mut key[..16]);
224         c.write_le(&mut key[16..]);
225         key
226     }
227 });
228 
read_u32le(xs: &[u8]) -> u32229 fn read_u32le(xs: &[u8]) -> u32 {
230     assert_eq!(xs.len(), 4);
231     u32::from(xs[0]) | (u32::from(xs[1]) << 8) | (u32::from(xs[2]) << 16) | (u32::from(xs[3]) << 24)
232 }
233 
234 dispatch_light128!(m, Mach, {
235     fn init_chacha(key: &[u8; 32], nonce: &[u8]) -> ChaCha {
236         let ctr_nonce = [
237             0,
238             if nonce.len() == 12 {
239                 read_u32le(&nonce[0..4])
240             } else {
241                 0
242             },
243             read_u32le(&nonce[nonce.len() - 8..nonce.len() - 4]),
244             read_u32le(&nonce[nonce.len() - 4..]),
245         ];
246         let key0: Mach::u32x4 = m.read_le(&key[..16]);
247         let key1: Mach::u32x4 = m.read_le(&key[16..]);
248         ChaCha {
249             b: key0.into(),
250             c: key1.into(),
251             d: ctr_nonce.into(),
252         }
253     }
254 });
255 
256 dispatch_light128!(m, Mach, {
257     fn init_chacha_x(key: &[u8; 32], nonce: &[u8; 24], rounds: u32) -> ChaCha {
258         let key0: Mach::u32x4 = m.read_le(&key[..16]);
259         let key1: Mach::u32x4 = m.read_le(&key[16..]);
260         let nonce0: Mach::u32x4 = m.read_le(&nonce[..16]);
261         let mut state = ChaCha {
262             b: key0.into(),
263             c: key1.into(),
264             d: nonce0.into(),
265         };
266         let x = refill_narrow_rounds(&mut state, rounds);
267         let ctr_nonce1 = [0, 0, read_u32le(&nonce[16..20]), read_u32le(&nonce[20..24])];
268         state.b = x.a;
269         state.c = x.d;
270         state.d = ctr_nonce1.into();
271         state
272     }
273 });
274