1 use encode::encode_to_slice; 2 use std::io::{ErrorKind, Result, Write}; 3 use std::{cmp, fmt}; 4 use {encode_config_slice, Config}; 5 6 pub(crate) const BUF_SIZE: usize = 1024; 7 /// The most bytes whose encoding will fit in `BUF_SIZE` 8 const MAX_INPUT_LEN: usize = BUF_SIZE / 4 * 3; 9 // 3 bytes of input = 4 bytes of base64, always (because we don't allow line wrapping) 10 const MIN_ENCODE_CHUNK_SIZE: usize = 3; 11 12 /// A `Write` implementation that base64 encodes data before delegating to the wrapped writer. 13 /// 14 /// Because base64 has special handling for the end of the input data (padding, etc), there's a 15 /// `finish()` method on this type that encodes any leftover input bytes and adds padding if 16 /// appropriate. It's called automatically when deallocated (see the `Drop` implementation), but 17 /// any error that occurs when invoking the underlying writer will be suppressed. If you want to 18 /// handle such errors, call `finish()` yourself. 19 /// 20 /// # Examples 21 /// 22 /// ``` 23 /// use std::io::Write; 24 /// 25 /// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc. 26 /// let mut wrapped_writer = Vec::new(); 27 /// { 28 /// let mut enc = base64::write::EncoderWriter::new( 29 /// &mut wrapped_writer, base64::STANDARD); 30 /// 31 /// // handle errors as you normally would 32 /// enc.write_all(b"asdf").unwrap(); 33 /// // could leave this out to be called by Drop, if you don't care 34 /// // about handling errors 35 /// enc.finish().unwrap(); 36 /// 37 /// } 38 /// 39 /// // base64 was written to the writer 40 /// assert_eq!(b"YXNkZg==", &wrapped_writer[..]); 41 /// 42 /// ``` 43 /// 44 /// # Panics 45 /// 46 /// Calling `write()` after `finish()` is invalid and will panic. 47 /// 48 /// # Errors 49 /// 50 /// Base64 encoding itself does not generate errors, but errors from the wrapped writer will be 51 /// returned as per the contract of `Write`. 52 /// 53 /// # Performance 54 /// 55 /// It has some minor performance loss compared to encoding slices (a couple percent). 56 /// It does not do any heap allocation. 57 pub struct EncoderWriter<'a, W: 'a + Write> { 58 config: Config, 59 /// Where encoded data is written to 60 w: &'a mut W, 61 /// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk 62 /// with the next `write()`, encode it, then proceed with the rest of the input normally. 63 extra_input: [u8; MIN_ENCODE_CHUNK_SIZE], 64 /// How much of `extra` is occupied, in `[0, MIN_ENCODE_CHUNK_SIZE]`. 65 extra_input_occupied_len: usize, 66 /// Buffer to encode into. May hold leftover encoded bytes from a previous write call that the underlying writer 67 /// did not write last time. 68 output: [u8; BUF_SIZE], 69 /// How much of `output` is occupied with encoded data that couldn't be written last time 70 output_occupied_len: usize, 71 /// True iff padding / partial last chunk has been written. 72 finished: bool, 73 /// panic safety: don't write again in destructor if writer panicked while we were writing to it 74 panicked: bool, 75 } 76 77 impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result78 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 79 write!( 80 f, 81 "extra_input: {:?} extra_input_occupied_len:{:?} output[..5]: {:?} output_occupied_len: {:?}", 82 self.extra_input, 83 self.extra_input_occupied_len, 84 &self.output[0..5], 85 self.output_occupied_len 86 ) 87 } 88 } 89 90 impl<'a, W: Write> EncoderWriter<'a, W> { 91 /// Create a new encoder that will write to the provided delegate writer `w`. new(w: &'a mut W, config: Config) -> EncoderWriter<'a, W>92 pub fn new(w: &'a mut W, config: Config) -> EncoderWriter<'a, W> { 93 EncoderWriter { 94 config, 95 w, 96 extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE], 97 extra_input_occupied_len: 0, 98 output: [0u8; BUF_SIZE], 99 output_occupied_len: 0, 100 finished: false, 101 panicked: false, 102 } 103 } 104 105 /// Encode all remaining buffered data and write it, including any trailing incomplete input 106 /// triples and associated padding. 107 /// 108 /// Once this succeeds, no further writes can be performed, as that would produce invalid 109 /// base64. 110 /// 111 /// This may write to the delegate writer multiple times if the delegate writer does not accept all input provided 112 /// to its `write` each invocation. 113 /// 114 /// # Errors 115 /// 116 /// The first error that is not of [`ErrorKind::Interrupted`] will be returned. finish(&mut self) -> Result<()>117 pub fn finish(&mut self) -> Result<()> { 118 if self.finished { 119 return Ok(()); 120 }; 121 122 self.write_all_encoded_output()?; 123 124 if self.extra_input_occupied_len > 0 { 125 let encoded_len = encode_config_slice( 126 &self.extra_input[..self.extra_input_occupied_len], 127 self.config, 128 &mut self.output[..], 129 ); 130 131 self.output_occupied_len = encoded_len; 132 133 self.write_all_encoded_output()?; 134 135 // write succeeded, do not write the encoding of extra again if finish() is retried 136 self.extra_input_occupied_len = 0; 137 } 138 139 self.finished = true; 140 Ok(()) 141 } 142 143 /// Write as much of the encoded output to the delegate writer as it will accept, and store the 144 /// leftovers to be attempted at the next write() call. Updates `self.output_occupied_len`. 145 /// 146 /// # Errors 147 /// 148 /// Errors from the delegate writer are returned. In the case of an error, 149 /// `self.output_occupied_len` will not be updated, as errors from `write` are specified to mean 150 /// that no write took place. write_to_delegate(&mut self, current_output_len: usize) -> Result<()>151 fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> { 152 self.panicked = true; 153 let res = self.w.write(&self.output[..current_output_len]); 154 self.panicked = false; 155 156 return res.map(|consumed| { 157 debug_assert!(consumed <= current_output_len); 158 159 if consumed < current_output_len { 160 self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap(); 161 // If we're blocking on I/O, the minor inefficiency of copying bytes to the 162 // start of the buffer is the least of our concerns... 163 // Rotate moves more than we need to, but copy_within isn't stabilized yet. 164 self.output.rotate_left(consumed); 165 } else { 166 self.output_occupied_len = 0; 167 } 168 169 () 170 }); 171 } 172 173 /// Write all buffered encoded output. If this returns `Ok`, `self.output_occupied_len` is `0`. 174 /// 175 /// This is basically write_all for the remaining buffered data but without the undesirable 176 /// abort-on-`Ok(0)` behavior. 177 /// 178 /// # Errors 179 /// 180 /// Any error emitted by the delegate writer abort the write loop and is returned, unless it's 181 /// `Interrupted`, in which case the error is ignored and writes will continue. write_all_encoded_output(&mut self) -> Result<()>182 fn write_all_encoded_output(&mut self) -> Result<()> { 183 while self.output_occupied_len > 0 { 184 let remaining_len = self.output_occupied_len; 185 match self.write_to_delegate(remaining_len) { 186 // try again on interrupts ala write_all 187 Err(ref e) if e.kind() == ErrorKind::Interrupted => {} 188 // other errors return 189 Err(e) => return Err(e), 190 // success no-ops because remaining length is already updated 191 Ok(_) => {} 192 }; 193 } 194 195 debug_assert_eq!(0, self.output_occupied_len); 196 Ok(()) 197 } 198 } 199 200 impl<'a, W: Write> Write for EncoderWriter<'a, W> { 201 /// Encode input and then write to the delegate writer. 202 /// 203 /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes 204 /// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which 205 /// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See 206 /// https://github.com/rust-lang/rust/issues/56889 for more on that. 207 /// 208 /// If the previous call to `write` provided more (encoded) data than the delegate writer could 209 /// accept in a single call to its `write`, the remaining data is buffered. As long as buffered 210 /// data is present, subsequent calls to `write` will try to write the remaining buffered data 211 /// to the delegate and return either `Ok(0)` -- and therefore not consume any of `input` -- or 212 /// an error. 213 /// 214 /// # Errors 215 /// 216 /// Any errors emitted by the delegate writer are returned. write(&mut self, input: &[u8]) -> Result<usize>217 fn write(&mut self, input: &[u8]) -> Result<usize> { 218 if self.finished { 219 panic!("Cannot write more after calling finish()"); 220 } 221 222 if input.is_empty() { 223 return Ok(0); 224 } 225 226 // The contract of `Write::write` places some constraints on this implementation: 227 // - a call to `write()` represents at most one call to a wrapped `Write`, so we can't 228 // iterate over the input and encode multiple chunks. 229 // - Errors mean that "no bytes were written to this writer", so we need to reset the 230 // internal state to what it was before the error occurred 231 232 // before reading any input, write any leftover encoded output from last time 233 if self.output_occupied_len > 0 { 234 let current_len = self.output_occupied_len; 235 return self.write_to_delegate(current_len) 236 // did not read any input 237 .map(|_| 0) 238 239 } 240 241 debug_assert_eq!(0, self.output_occupied_len); 242 243 // how many bytes, if any, were read into `extra` to create a triple to encode 244 let mut extra_input_read_len = 0; 245 let mut input = input; 246 247 let orig_extra_len = self.extra_input_occupied_len; 248 249 let mut encoded_size = 0; 250 // always a multiple of MIN_ENCODE_CHUNK_SIZE 251 let mut max_input_len = MAX_INPUT_LEN; 252 253 // process leftover un-encoded input from last write 254 if self.extra_input_occupied_len > 0 { 255 debug_assert!(self.extra_input_occupied_len < 3); 256 if input.len() + self.extra_input_occupied_len >= MIN_ENCODE_CHUNK_SIZE { 257 // Fill up `extra`, encode that into `output`, and consume as much of the rest of 258 // `input` as possible. 259 // We could write just the encoding of `extra` by itself but then we'd have to 260 // return after writing only 4 bytes, which is inefficient if the underlying writer 261 // would make a syscall. 262 extra_input_read_len = MIN_ENCODE_CHUNK_SIZE - self.extra_input_occupied_len; 263 debug_assert!(extra_input_read_len > 0); 264 // overwrite only bytes that weren't already used. If we need to rollback extra_len 265 // (when the subsequent write errors), the old leading bytes will still be there. 266 self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE] 267 .copy_from_slice(&input[0..extra_input_read_len]); 268 269 let len = encode_to_slice( 270 &self.extra_input[0..MIN_ENCODE_CHUNK_SIZE], 271 &mut self.output[..], 272 self.config.char_set.encode_table(), 273 ); 274 debug_assert_eq!(4, len); 275 276 input = &input[extra_input_read_len..]; 277 278 // consider extra to be used up, since we encoded it 279 self.extra_input_occupied_len = 0; 280 // don't clobber where we just encoded to 281 encoded_size = 4; 282 // and don't read more than can be encoded 283 max_input_len = MAX_INPUT_LEN - MIN_ENCODE_CHUNK_SIZE; 284 285 // fall through to normal encoding 286 } else { 287 // `extra` and `input` are non empty, but `|extra| + |input| < 3`, so there must be 288 // 1 byte in each. 289 debug_assert_eq!(1, input.len()); 290 debug_assert_eq!(1, self.extra_input_occupied_len); 291 292 self.extra_input[self.extra_input_occupied_len] = input[0]; 293 self.extra_input_occupied_len += 1; 294 return Ok(1); 295 }; 296 } else if input.len() < MIN_ENCODE_CHUNK_SIZE { 297 // `extra` is empty, and `input` fits inside it 298 self.extra_input[0..input.len()].copy_from_slice(input); 299 self.extra_input_occupied_len = input.len(); 300 return Ok(input.len()); 301 }; 302 303 // either 0 or 1 complete chunks encoded from extra 304 debug_assert!(encoded_size == 0 || encoded_size == 4); 305 debug_assert!( 306 // didn't encode extra input 307 MAX_INPUT_LEN == max_input_len 308 // encoded one triple 309 || MAX_INPUT_LEN == max_input_len + MIN_ENCODE_CHUNK_SIZE 310 ); 311 312 // encode complete triples only 313 let input_complete_chunks_len = input.len() - (input.len() % MIN_ENCODE_CHUNK_SIZE); 314 let input_chunks_to_encode_len = cmp::min(input_complete_chunks_len, max_input_len); 315 debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE); 316 debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE); 317 318 encoded_size += encode_to_slice( 319 &input[..(input_chunks_to_encode_len)], 320 &mut self.output[encoded_size..], 321 self.config.char_set.encode_table(), 322 ); 323 324 // not updating `self.output_occupied_len` here because if the below write fails, it should 325 // "never take place" -- the buffer contents we encoded are ignored and perhaps retried 326 // later, if the consumer chooses. 327 328 self.write_to_delegate(encoded_size) 329 // no matter whether we wrote the full encoded buffer or not, we consumed the same 330 // input 331 .map(|_| extra_input_read_len + input_chunks_to_encode_len) 332 .map_err( |e| { 333 // in case we filled and encoded `extra`, reset extra_len 334 self.extra_input_occupied_len = orig_extra_len; 335 336 e 337 }) 338 } 339 340 /// Because this is usually treated as OK to call multiple times, it will *not* flush any 341 /// incomplete chunks of input or write padding. flush(&mut self) -> Result<()>342 fn flush(&mut self) -> Result<()> { 343 self.write_all_encoded_output()?; 344 self.w.flush() 345 } 346 } 347 348 impl<'a, W: Write> Drop for EncoderWriter<'a, W> { drop(&mut self)349 fn drop(&mut self) { 350 if !self.panicked { 351 // like `BufWriter`, ignore errors during drop 352 let _ = self.finish(); 353 } 354 } 355 } 356