1 use encode::encode_to_slice;
2 use std::io::{ErrorKind, Result, Write};
3 use std::{cmp, fmt};
4 use {encode_config_slice, Config};
5 
6 pub(crate) const BUF_SIZE: usize = 1024;
7 /// The most bytes whose encoding will fit in `BUF_SIZE`
8 const MAX_INPUT_LEN: usize = BUF_SIZE / 4 * 3;
9 // 3 bytes of input = 4 bytes of base64, always (because we don't allow line wrapping)
10 const MIN_ENCODE_CHUNK_SIZE: usize = 3;
11 
12 /// A `Write` implementation that base64 encodes data before delegating to the wrapped writer.
13 ///
14 /// Because base64 has special handling for the end of the input data (padding, etc), there's a
15 /// `finish()` method on this type that encodes any leftover input bytes and adds padding if
16 /// appropriate. It's called automatically when deallocated (see the `Drop` implementation), but
17 /// any error that occurs when invoking the underlying writer will be suppressed. If you want to
18 /// handle such errors, call `finish()` yourself.
19 ///
20 /// # Examples
21 ///
22 /// ```
23 /// use std::io::Write;
24 ///
25 /// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
26 /// let mut wrapped_writer = Vec::new();
27 /// {
28 ///     let mut enc = base64::write::EncoderWriter::new(
29 ///         &mut wrapped_writer, base64::STANDARD);
30 ///
31 ///     // handle errors as you normally would
32 ///     enc.write_all(b"asdf").unwrap();
33 ///     // could leave this out to be called by Drop, if you don't care
34 ///     // about handling errors
35 ///     enc.finish().unwrap();
36 ///
37 /// }
38 ///
39 /// // base64 was written to the writer
40 /// assert_eq!(b"YXNkZg==", &wrapped_writer[..]);
41 ///
42 /// ```
43 ///
44 /// # Panics
45 ///
46 /// Calling `write()` after `finish()` is invalid and will panic.
47 ///
48 /// # Errors
49 ///
50 /// Base64 encoding itself does not generate errors, but errors from the wrapped writer will be
51 /// returned as per the contract of `Write`.
52 ///
53 /// # Performance
54 ///
55 /// It has some minor performance loss compared to encoding slices (a couple percent).
56 /// It does not do any heap allocation.
57 pub struct EncoderWriter<'a, W: 'a + Write> {
58     config: Config,
59     /// Where encoded data is written to
60     w: &'a mut W,
61     /// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
62     /// with the next `write()`, encode it, then proceed with the rest of the input normally.
63     extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
64     /// How much of `extra` is occupied, in `[0, MIN_ENCODE_CHUNK_SIZE]`.
65     extra_input_occupied_len: usize,
66     /// Buffer to encode into. May hold leftover encoded bytes from a previous write call that the underlying writer
67     /// did not write last time.
68     output: [u8; BUF_SIZE],
69     /// How much of `output` is occupied with encoded data that couldn't be written last time
70     output_occupied_len: usize,
71     /// True iff padding / partial last chunk has been written.
72     finished: bool,
73     /// panic safety: don't write again in destructor if writer panicked while we were writing to it
74     panicked: bool,
75 }
76 
77 impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result78     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
79         write!(
80             f,
81             "extra_input: {:?} extra_input_occupied_len:{:?} output[..5]: {:?} output_occupied_len: {:?}",
82             self.extra_input,
83             self.extra_input_occupied_len,
84             &self.output[0..5],
85             self.output_occupied_len
86         )
87     }
88 }
89 
90 impl<'a, W: Write> EncoderWriter<'a, W> {
91     /// Create a new encoder that will write to the provided delegate writer `w`.
new(w: &'a mut W, config: Config) -> EncoderWriter<'a, W>92     pub fn new(w: &'a mut W, config: Config) -> EncoderWriter<'a, W> {
93         EncoderWriter {
94             config,
95             w,
96             extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE],
97             extra_input_occupied_len: 0,
98             output: [0u8; BUF_SIZE],
99             output_occupied_len: 0,
100             finished: false,
101             panicked: false,
102         }
103     }
104 
105     /// Encode all remaining buffered data and write it, including any trailing incomplete input
106     /// triples and associated padding.
107     ///
108     /// Once this succeeds, no further writes can be performed, as that would produce invalid
109     /// base64.
110     ///
111     /// This may write to the delegate writer multiple times if the delegate writer does not accept all input provided
112     /// to its `write` each invocation.
113     ///
114     /// # Errors
115     ///
116     /// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
finish(&mut self) -> Result<()>117     pub fn finish(&mut self) -> Result<()> {
118         if self.finished {
119             return Ok(());
120         };
121 
122         self.write_all_encoded_output()?;
123 
124         if self.extra_input_occupied_len > 0 {
125             let encoded_len = encode_config_slice(
126                 &self.extra_input[..self.extra_input_occupied_len],
127                 self.config,
128                 &mut self.output[..],
129             );
130 
131             self.output_occupied_len = encoded_len;
132 
133             self.write_all_encoded_output()?;
134 
135             // write succeeded, do not write the encoding of extra again if finish() is retried
136             self.extra_input_occupied_len = 0;
137         }
138 
139         self.finished = true;
140         Ok(())
141     }
142 
143     /// Write as much of the encoded output to the delegate writer as it will accept, and store the
144     /// leftovers to be attempted at the next write() call. Updates `self.output_occupied_len`.
145     ///
146     /// # Errors
147     ///
148     /// Errors from the delegate writer are returned. In the case of an error,
149     /// `self.output_occupied_len` will not be updated, as errors from `write` are specified to mean
150     /// that no write took place.
write_to_delegate(&mut self, current_output_len: usize) -> Result<()>151     fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
152         self.panicked = true;
153         let res = self.w.write(&self.output[..current_output_len]);
154         self.panicked = false;
155 
156         return res.map(|consumed| {
157             debug_assert!(consumed <= current_output_len);
158 
159             if consumed < current_output_len {
160                 self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap();
161                 // If we're blocking on I/O, the minor inefficiency of copying bytes to the
162                 // start of the buffer is the least of our concerns...
163                 // Rotate moves more than we need to, but copy_within isn't stabilized yet.
164                 self.output.rotate_left(consumed);
165             } else {
166                 self.output_occupied_len = 0;
167             }
168 
169             ()
170         });
171     }
172 
173     /// Write all buffered encoded output. If this returns `Ok`, `self.output_occupied_len` is `0`.
174     ///
175     /// This is basically write_all for the remaining buffered data but without the undesirable
176     /// abort-on-`Ok(0)` behavior.
177     ///
178     /// # Errors
179     ///
180     /// Any error emitted by the delegate writer abort the write loop and is returned, unless it's
181     /// `Interrupted`, in which case the error is ignored and writes will continue.
write_all_encoded_output(&mut self) -> Result<()>182     fn write_all_encoded_output(&mut self) -> Result<()> {
183         while self.output_occupied_len > 0 {
184             let remaining_len = self.output_occupied_len;
185             match self.write_to_delegate(remaining_len) {
186                 // try again on interrupts ala write_all
187                 Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
188                 // other errors return
189                 Err(e) => return Err(e),
190                 // success no-ops because remaining length is already updated
191                 Ok(_) => {}
192             };
193         }
194 
195         debug_assert_eq!(0, self.output_occupied_len);
196         Ok(())
197     }
198 }
199 
200 impl<'a, W: Write> Write for EncoderWriter<'a, W> {
201     /// Encode input and then write to the delegate writer.
202     ///
203     /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
204     /// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which
205     /// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See
206     /// https://github.com/rust-lang/rust/issues/56889 for more on that.
207     ///
208     /// If the previous call to `write` provided more (encoded) data than the delegate writer could
209     /// accept in a single call to its `write`, the remaining data is buffered. As long as buffered
210     /// data is present, subsequent calls to `write` will try to write the remaining buffered data
211     /// to the delegate and return either `Ok(0)` -- and therefore not consume any of `input` -- or
212     /// an error.
213     ///
214     /// # Errors
215     ///
216     /// Any errors emitted by the delegate writer are returned.
write(&mut self, input: &[u8]) -> Result<usize>217     fn write(&mut self, input: &[u8]) -> Result<usize> {
218         if self.finished {
219             panic!("Cannot write more after calling finish()");
220         }
221 
222         if input.is_empty() {
223             return Ok(0);
224         }
225 
226         // The contract of `Write::write` places some constraints on this implementation:
227         // - a call to `write()` represents at most one call to a wrapped `Write`, so we can't
228         // iterate over the input and encode multiple chunks.
229         // - Errors mean that "no bytes were written to this writer", so we need to reset the
230         // internal state to what it was before the error occurred
231 
232         // before reading any input, write any leftover encoded output from last time
233         if self.output_occupied_len > 0 {
234             let current_len = self.output_occupied_len;
235             return self.write_to_delegate(current_len)
236                 // did not read any input
237                 .map(|_| 0)
238 
239         }
240 
241         debug_assert_eq!(0, self.output_occupied_len);
242 
243         // how many bytes, if any, were read into `extra` to create a triple to encode
244         let mut extra_input_read_len = 0;
245         let mut input = input;
246 
247         let orig_extra_len = self.extra_input_occupied_len;
248 
249         let mut encoded_size = 0;
250         // always a multiple of MIN_ENCODE_CHUNK_SIZE
251         let mut max_input_len = MAX_INPUT_LEN;
252 
253         // process leftover un-encoded input from last write
254         if self.extra_input_occupied_len > 0 {
255             debug_assert!(self.extra_input_occupied_len < 3);
256             if input.len() + self.extra_input_occupied_len >= MIN_ENCODE_CHUNK_SIZE {
257                 // Fill up `extra`, encode that into `output`, and consume as much of the rest of
258                 // `input` as possible.
259                 // We could write just the encoding of `extra` by itself but then we'd have to
260                 // return after writing only 4 bytes, which is inefficient if the underlying writer
261                 // would make a syscall.
262                 extra_input_read_len = MIN_ENCODE_CHUNK_SIZE - self.extra_input_occupied_len;
263                 debug_assert!(extra_input_read_len > 0);
264                 // overwrite only bytes that weren't already used. If we need to rollback extra_len
265                 // (when the subsequent write errors), the old leading bytes will still be there.
266                 self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE]
267                     .copy_from_slice(&input[0..extra_input_read_len]);
268 
269                 let len = encode_to_slice(
270                     &self.extra_input[0..MIN_ENCODE_CHUNK_SIZE],
271                     &mut self.output[..],
272                     self.config.char_set.encode_table(),
273                 );
274                 debug_assert_eq!(4, len);
275 
276                 input = &input[extra_input_read_len..];
277 
278                 // consider extra to be used up, since we encoded it
279                 self.extra_input_occupied_len = 0;
280                 // don't clobber where we just encoded to
281                 encoded_size = 4;
282                 // and don't read more than can be encoded
283                 max_input_len = MAX_INPUT_LEN - MIN_ENCODE_CHUNK_SIZE;
284 
285                 // fall through to normal encoding
286             } else {
287                 // `extra` and `input` are non empty, but `|extra| + |input| < 3`, so there must be
288                 // 1 byte in each.
289                 debug_assert_eq!(1, input.len());
290                 debug_assert_eq!(1, self.extra_input_occupied_len);
291 
292                 self.extra_input[self.extra_input_occupied_len] = input[0];
293                 self.extra_input_occupied_len += 1;
294                 return Ok(1);
295             };
296         } else if input.len() < MIN_ENCODE_CHUNK_SIZE {
297             // `extra` is empty, and `input` fits inside it
298             self.extra_input[0..input.len()].copy_from_slice(input);
299             self.extra_input_occupied_len = input.len();
300             return Ok(input.len());
301         };
302 
303         // either 0 or 1 complete chunks encoded from extra
304         debug_assert!(encoded_size == 0 || encoded_size == 4);
305         debug_assert!(
306             // didn't encode extra input
307             MAX_INPUT_LEN == max_input_len
308                 // encoded one triple
309                 || MAX_INPUT_LEN == max_input_len + MIN_ENCODE_CHUNK_SIZE
310         );
311 
312         // encode complete triples only
313         let input_complete_chunks_len = input.len() - (input.len() % MIN_ENCODE_CHUNK_SIZE);
314         let input_chunks_to_encode_len = cmp::min(input_complete_chunks_len, max_input_len);
315         debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE);
316         debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE);
317 
318         encoded_size += encode_to_slice(
319             &input[..(input_chunks_to_encode_len)],
320             &mut self.output[encoded_size..],
321             self.config.char_set.encode_table(),
322         );
323 
324         // not updating `self.output_occupied_len` here because if the below write fails, it should
325         // "never take place" -- the buffer contents we encoded are ignored and perhaps retried
326         // later, if the consumer chooses.
327 
328         self.write_to_delegate(encoded_size)
329             // no matter whether we wrote the full encoded buffer or not, we consumed the same
330             // input
331             .map(|_| extra_input_read_len + input_chunks_to_encode_len)
332             .map_err( |e| {
333                 // in case we filled and encoded `extra`, reset extra_len
334                 self.extra_input_occupied_len = orig_extra_len;
335 
336                 e
337             })
338     }
339 
340     /// Because this is usually treated as OK to call multiple times, it will *not* flush any
341     /// incomplete chunks of input or write padding.
flush(&mut self) -> Result<()>342     fn flush(&mut self) -> Result<()> {
343         self.write_all_encoded_output()?;
344         self.w.flush()
345     }
346 }
347 
348 impl<'a, W: Write> Drop for EncoderWriter<'a, W> {
drop(&mut self)349     fn drop(&mut self) {
350         if !self.panicked {
351             // like `BufWriter`, ignore errors during drop
352             let _ = self.finish();
353         }
354     }
355 }
356