1 use crate::{
2 encode::{add_padding, encode_to_slice},
3 Config,
4 };
5 #[cfg(any(feature = "alloc", feature = "std", test))]
6 use alloc::string::String;
7 use core::cmp;
8 #[cfg(any(feature = "alloc", feature = "std", test))]
9 use core::str;
10
11 /// The output mechanism for ChunkedEncoder's encoded bytes.
12 pub trait Sink {
13 type Error;
14
15 /// Handle a chunk of encoded base64 data (as UTF-8 bytes)
write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>16 fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>;
17 }
18
19 const BUF_SIZE: usize = 1024;
20
21 /// A base64 encoder that emits encoded bytes in chunks without heap allocation.
22 pub struct ChunkedEncoder {
23 config: Config,
24 max_input_chunk_len: usize,
25 }
26
27 impl ChunkedEncoder {
new(config: Config) -> ChunkedEncoder28 pub fn new(config: Config) -> ChunkedEncoder {
29 ChunkedEncoder {
30 config,
31 max_input_chunk_len: max_input_length(BUF_SIZE, config),
32 }
33 }
34
encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error>35 pub fn encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error> {
36 let mut encode_buf: [u8; BUF_SIZE] = [0; BUF_SIZE];
37 let encode_table = self.config.char_set.encode_table();
38
39 let mut input_index = 0;
40
41 while input_index < bytes.len() {
42 // either the full input chunk size, or it's the last iteration
43 let input_chunk_len = cmp::min(self.max_input_chunk_len, bytes.len() - input_index);
44
45 let chunk = &bytes[input_index..(input_index + input_chunk_len)];
46
47 let mut b64_bytes_written = encode_to_slice(chunk, &mut encode_buf, encode_table);
48
49 input_index += input_chunk_len;
50 let more_input_left = input_index < bytes.len();
51
52 if self.config.pad && !more_input_left {
53 // no more input, add padding if needed. Buffer will have room because
54 // max_input_length leaves room for it.
55 b64_bytes_written += add_padding(bytes.len(), &mut encode_buf[b64_bytes_written..]);
56 }
57
58 sink.write_encoded_bytes(&encode_buf[0..b64_bytes_written])?;
59 }
60
61 Ok(())
62 }
63 }
64
65 /// Calculate the longest input that can be encoded for the given output buffer size.
66 ///
67 /// If the config requires padding, two bytes of buffer space will be set aside so that the last
68 /// chunk of input can be encoded safely.
69 ///
70 /// The input length will always be a multiple of 3 so that no encoding state has to be carried over
71 /// between chunks.
max_input_length(encoded_buf_len: usize, config: Config) -> usize72 fn max_input_length(encoded_buf_len: usize, config: Config) -> usize {
73 let effective_buf_len = if config.pad {
74 // make room for padding
75 encoded_buf_len
76 .checked_sub(2)
77 .expect("Don't use a tiny buffer")
78 } else {
79 encoded_buf_len
80 };
81
82 // No padding, so just normal base64 expansion.
83 (effective_buf_len / 4) * 3
84 }
85
86 // A really simple sink that just appends to a string
87 #[cfg(any(feature = "alloc", feature = "std", test))]
88 pub(crate) struct StringSink<'a> {
89 string: &'a mut String,
90 }
91
92 #[cfg(any(feature = "alloc", feature = "std", test))]
93 impl<'a> StringSink<'a> {
new(s: &mut String) -> StringSink94 pub(crate) fn new(s: &mut String) -> StringSink {
95 StringSink { string: s }
96 }
97 }
98
99 #[cfg(any(feature = "alloc", feature = "std", test))]
100 impl<'a> Sink for StringSink<'a> {
101 type Error = ();
102
write_encoded_bytes(&mut self, s: &[u8]) -> Result<(), Self::Error>103 fn write_encoded_bytes(&mut self, s: &[u8]) -> Result<(), Self::Error> {
104 self.string.push_str(str::from_utf8(s).unwrap());
105
106 Ok(())
107 }
108 }
109
110 #[cfg(test)]
111 pub mod tests {
112 use super::*;
113 use crate::{encode_config_buf, tests::random_config, CharacterSet, STANDARD};
114
115 use rand::{
116 distributions::{Distribution, Uniform},
117 FromEntropy, Rng,
118 };
119
120 #[test]
chunked_encode_empty()121 fn chunked_encode_empty() {
122 assert_eq!("", chunked_encode_str(&[], STANDARD));
123 }
124
125 #[test]
chunked_encode_intermediate_fast_loop()126 fn chunked_encode_intermediate_fast_loop() {
127 // > 8 bytes input, will enter the pretty fast loop
128 assert_eq!(
129 "Zm9vYmFyYmF6cXV4",
130 chunked_encode_str(b"foobarbazqux", STANDARD)
131 );
132 }
133
134 #[test]
chunked_encode_fast_loop()135 fn chunked_encode_fast_loop() {
136 // > 32 bytes input, will enter the uber fast loop
137 assert_eq!(
138 "Zm9vYmFyYmF6cXV4cXV1eGNvcmdlZ3JhdWx0Z2FycGx5eg==",
139 chunked_encode_str(b"foobarbazquxquuxcorgegraultgarplyz", STANDARD)
140 );
141 }
142
143 #[test]
chunked_encode_slow_loop_only()144 fn chunked_encode_slow_loop_only() {
145 // < 8 bytes input, slow loop only
146 assert_eq!("Zm9vYmFy", chunked_encode_str(b"foobar", STANDARD));
147 }
148
149 #[test]
chunked_encode_matches_normal_encode_random_string_sink()150 fn chunked_encode_matches_normal_encode_random_string_sink() {
151 let helper = StringSinkTestHelper;
152 chunked_encode_matches_normal_encode_random(&helper);
153 }
154
155 #[test]
max_input_length_no_pad()156 fn max_input_length_no_pad() {
157 let config = config_with_pad(false);
158 assert_eq!(768, max_input_length(1024, config));
159 }
160
161 #[test]
max_input_length_with_pad_decrements_one_triple()162 fn max_input_length_with_pad_decrements_one_triple() {
163 let config = config_with_pad(true);
164 assert_eq!(765, max_input_length(1024, config));
165 }
166
167 #[test]
max_input_length_with_pad_one_byte_short()168 fn max_input_length_with_pad_one_byte_short() {
169 let config = config_with_pad(true);
170 assert_eq!(765, max_input_length(1025, config));
171 }
172
173 #[test]
max_input_length_with_pad_fits_exactly()174 fn max_input_length_with_pad_fits_exactly() {
175 let config = config_with_pad(true);
176 assert_eq!(768, max_input_length(1026, config));
177 }
178
179 #[test]
max_input_length_cant_use_extra_single_encoded_byte()180 fn max_input_length_cant_use_extra_single_encoded_byte() {
181 let config = Config::new(crate::CharacterSet::Standard, false);
182 assert_eq!(300, max_input_length(401, config));
183 }
184
chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S)185 pub fn chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S) {
186 let mut input_buf: Vec<u8> = Vec::new();
187 let mut output_buf = String::new();
188 let mut rng = rand::rngs::SmallRng::from_entropy();
189 let input_len_range = Uniform::new(1, 10_000);
190
191 for _ in 0..5_000 {
192 input_buf.clear();
193 output_buf.clear();
194
195 let buf_len = input_len_range.sample(&mut rng);
196 for _ in 0..buf_len {
197 input_buf.push(rng.gen());
198 }
199
200 let config = random_config(&mut rng);
201
202 let chunk_encoded_string = sink_test_helper.encode_to_string(config, &input_buf);
203 encode_config_buf(&input_buf, config, &mut output_buf);
204
205 assert_eq!(
206 output_buf, chunk_encoded_string,
207 "input len={}, config: pad={}",
208 buf_len, config.pad
209 );
210 }
211 }
212
chunked_encode_str(bytes: &[u8], config: Config) -> String213 fn chunked_encode_str(bytes: &[u8], config: Config) -> String {
214 let mut s = String::new();
215 {
216 let mut sink = StringSink::new(&mut s);
217 let encoder = ChunkedEncoder::new(config);
218 encoder.encode(bytes, &mut sink).unwrap();
219 }
220
221 return s;
222 }
223
config_with_pad(pad: bool) -> Config224 fn config_with_pad(pad: bool) -> Config {
225 Config::new(CharacterSet::Standard, pad)
226 }
227
228 // An abstraction around sinks so that we can have tests that easily to any sink implementation
229 pub trait SinkTestHelper {
encode_to_string(&self, config: Config, bytes: &[u8]) -> String230 fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String;
231 }
232
233 struct StringSinkTestHelper;
234
235 impl SinkTestHelper for StringSinkTestHelper {
encode_to_string(&self, config: Config, bytes: &[u8]) -> String236 fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String {
237 let encoder = ChunkedEncoder::new(config);
238 let mut s = String::new();
239 {
240 let mut sink = StringSink::new(&mut s);
241 encoder.encode(bytes, &mut sink).unwrap();
242 }
243
244 s
245 }
246 }
247 }
248