1 // Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
2 // file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 //! This module provides structs that use lifetimes to couple bounds checking
11 //! and space availability checking and detaching those from actual slice
12 //! reading/writing.
13 //!
14 //! At present, the internals of the implementation are safe code, so the
15 //! bound checks currently also happen on read/write. Once this code works,
16 //! the plan is to replace the internals with unsafe code that omits the
17 //! bound check at the read/write time.
18 
19 #[cfg(all(
20     feature = "simd-accel",
21     any(
22         target_feature = "sse2",
23         all(target_endian = "little", target_arch = "aarch64"),
24         all(target_endian = "little", target_feature = "neon")
25     )
26 ))]
27 use simd_funcs::*;
28 
29 #[cfg(all(
30     feature = "simd-accel",
31     any(
32         target_feature = "sse2",
33         all(target_endian = "little", target_arch = "aarch64"),
34         all(target_endian = "little", target_feature = "neon")
35     )
36 ))]
37 use packed_simd::u16x8;
38 
39 use super::DecoderResult;
40 use super::EncoderResult;
41 use ascii::*;
42 use utf_8::convert_utf8_to_utf16_up_to_invalid;
43 use utf_8::utf8_valid_up_to;
44 
45 pub enum Space<T> {
46     Available(T),
47     Full(usize),
48 }
49 
50 pub enum CopyAsciiResult<T, U> {
51     Stop(T),
52     GoOn(U),
53 }
54 
55 pub enum NonAscii {
56     BmpExclAscii(u16),
57     Astral(char),
58 }
59 
60 pub enum Unicode {
61     Ascii(u8),
62     NonAscii(NonAscii),
63 }
64 
65 // Start UTF-16LE/BE fast path
66 
67 pub trait Endian {
68     const OPPOSITE_ENDIAN: bool;
69 }
70 
71 pub struct BigEndian;
72 
73 impl Endian for BigEndian {
74     #[cfg(target_endian = "little")]
75     const OPPOSITE_ENDIAN: bool = true;
76 
77     #[cfg(target_endian = "big")]
78     const OPPOSITE_ENDIAN: bool = false;
79 }
80 
81 pub struct LittleEndian;
82 
83 impl Endian for LittleEndian {
84     #[cfg(target_endian = "little")]
85     const OPPOSITE_ENDIAN: bool = false;
86 
87     #[cfg(target_endian = "big")]
88     const OPPOSITE_ENDIAN: bool = true;
89 }
90 
91 #[derive(Debug, Copy, Clone)]
92 struct UnalignedU16Slice {
93     ptr: *const u8,
94     len: usize,
95 }
96 
97 impl UnalignedU16Slice {
98     #[inline(always)]
new(ptr: *const u8, len: usize) -> UnalignedU16Slice99     pub unsafe fn new(ptr: *const u8, len: usize) -> UnalignedU16Slice {
100         UnalignedU16Slice { ptr, len }
101     }
102 
103     #[inline(always)]
trim_last(&mut self)104     pub fn trim_last(&mut self) {
105         assert!(self.len > 0);
106         self.len -= 1;
107     }
108 
109     #[inline(always)]
at(&self, i: usize) -> u16110     pub fn at(&self, i: usize) -> u16 {
111         assert!(i < self.len);
112         unsafe {
113             let mut u: u16 = ::std::mem::uninitialized();
114             ::std::ptr::copy_nonoverlapping(self.ptr.add(i * 2), &mut u as *mut u16 as *mut u8, 2);
115             u
116         }
117     }
118 
119     #[cfg(feature = "simd-accel")]
120     #[inline(always)]
simd_at(&self, i: usize) -> u16x8121     pub fn simd_at(&self, i: usize) -> u16x8 {
122         assert!(i + SIMD_STRIDE_SIZE / 2 <= self.len);
123         let byte_index = i * 2;
124         unsafe { to_u16_lanes(load16_unaligned(self.ptr.add(byte_index))) }
125     }
126 
127     #[inline(always)]
len(&self) -> usize128     pub fn len(&self) -> usize {
129         self.len
130     }
131 
132     #[inline(always)]
tail(&self, from: usize) -> UnalignedU16Slice133     pub fn tail(&self, from: usize) -> UnalignedU16Slice {
134         // XXX the return value should be restricted not to
135         // outlive self.
136         assert!(from <= self.len);
137         unsafe { UnalignedU16Slice::new(self.ptr.add(from * 2), self.len - from) }
138     }
139 
140     #[cfg(feature = "simd-accel")]
141     #[inline(always)]
copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)>142     pub fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
143         assert!(self.len <= other.len());
144         let mut offset = 0;
145         if SIMD_STRIDE_SIZE / 2 <= self.len {
146             let len_minus_stride = self.len - SIMD_STRIDE_SIZE / 2;
147             loop {
148                 let mut simd = self.simd_at(offset);
149                 if E::OPPOSITE_ENDIAN {
150                     simd = simd_byte_swap(simd);
151                 }
152                 unsafe {
153                     store8_unaligned(other.as_mut_ptr().add(offset), simd);
154                 }
155                 if contains_surrogates(simd) {
156                     break;
157                 }
158                 offset += SIMD_STRIDE_SIZE / 2;
159                 if offset > len_minus_stride {
160                     break;
161                 }
162             }
163         }
164         while offset < self.len {
165             let unit = swap_if_opposite_endian::<E>(self.at(offset));
166             other[offset] = unit;
167             if super::in_range16(unit, 0xD800, 0xE000) {
168                 return Some((unit, offset));
169             }
170             offset += 1;
171         }
172         None
173     }
174 
175     #[cfg(not(feature = "simd-accel"))]
176     #[inline(always)]
copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)>177     fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
178         assert!(self.len <= other.len());
179         for (i, target) in other.iter_mut().enumerate().take(self.len) {
180             let unit = swap_if_opposite_endian::<E>(self.at(i));
181             *target = unit;
182             if super::in_range16(unit, 0xD800, 0xE000) {
183                 return Some((unit, i));
184             }
185         }
186         None
187     }
188 }
189 
190 #[inline(always)]
copy_unaligned_basic_latin_to_ascii_alu<E: Endian>( src: UnalignedU16Slice, dst: &mut [u8], offset: usize, ) -> CopyAsciiResult<usize, (u16, usize)>191 fn copy_unaligned_basic_latin_to_ascii_alu<E: Endian>(
192     src: UnalignedU16Slice,
193     dst: &mut [u8],
194     offset: usize,
195 ) -> CopyAsciiResult<usize, (u16, usize)> {
196     let len = ::std::cmp::min(src.len(), dst.len());
197     let mut i = 0usize;
198     loop {
199         if i == len {
200             return CopyAsciiResult::Stop(i + offset);
201         }
202         let unit = swap_if_opposite_endian::<E>(src.at(i));
203         if unit > 0x7F {
204             return CopyAsciiResult::GoOn((unit, i + offset));
205         }
206         dst[i] = unit as u8;
207         i += 1;
208     }
209 }
210 
211 #[inline(always)]
swap_if_opposite_endian<E: Endian>(unit: u16) -> u16212 fn swap_if_opposite_endian<E: Endian>(unit: u16) -> u16 {
213     if E::OPPOSITE_ENDIAN {
214         unit.swap_bytes()
215     } else {
216         unit
217     }
218 }
219 
220 #[cfg(not(feature = "simd-accel"))]
221 #[inline(always)]
copy_unaligned_basic_latin_to_ascii<E: Endian>( src: UnalignedU16Slice, dst: &mut [u8], ) -> CopyAsciiResult<usize, (u16, usize)>222 fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
223     src: UnalignedU16Slice,
224     dst: &mut [u8],
225 ) -> CopyAsciiResult<usize, (u16, usize)> {
226     copy_unaligned_basic_latin_to_ascii_alu::<E>(src, dst, 0)
227 }
228 
229 #[cfg(feature = "simd-accel")]
230 #[inline(always)]
copy_unaligned_basic_latin_to_ascii<E: Endian>( src: UnalignedU16Slice, dst: &mut [u8], ) -> CopyAsciiResult<usize, (u16, usize)>231 fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
232     src: UnalignedU16Slice,
233     dst: &mut [u8],
234 ) -> CopyAsciiResult<usize, (u16, usize)> {
235     let len = ::std::cmp::min(src.len(), dst.len());
236     let mut offset = 0;
237     if SIMD_STRIDE_SIZE <= len {
238         let len_minus_stride = len - SIMD_STRIDE_SIZE;
239         loop {
240             let mut first = src.simd_at(offset);
241             let mut second = src.simd_at(offset + (SIMD_STRIDE_SIZE / 2));
242             if E::OPPOSITE_ENDIAN {
243                 first = simd_byte_swap(first);
244                 second = simd_byte_swap(second);
245             }
246             if !simd_is_basic_latin(first | second) {
247                 break;
248             }
249             let packed = simd_pack(first, second);
250             unsafe {
251                 store16_unaligned(dst.as_mut_ptr().add(offset), packed);
252             }
253             offset += SIMD_STRIDE_SIZE;
254             if offset > len_minus_stride {
255                 break;
256             }
257         }
258     }
259     copy_unaligned_basic_latin_to_ascii_alu::<E>(src.tail(offset), &mut dst[offset..], offset)
260 }
261 
262 #[inline(always)]
convert_unaligned_utf16_to_utf8<E: Endian>( src: UnalignedU16Slice, dst: &mut [u8], ) -> (usize, usize, bool)263 fn convert_unaligned_utf16_to_utf8<E: Endian>(
264     src: UnalignedU16Slice,
265     dst: &mut [u8],
266 ) -> (usize, usize, bool) {
267     if dst.len() < 4 {
268         return (0, 0, false);
269     }
270     let mut src_pos = 0usize;
271     let mut dst_pos = 0usize;
272     let src_len = src.len();
273     let dst_len_minus_three = dst.len() - 3;
274     'outer: loop {
275         let mut non_ascii = match copy_unaligned_basic_latin_to_ascii::<E>(
276             src.tail(src_pos),
277             &mut dst[dst_pos..],
278         ) {
279             CopyAsciiResult::GoOn((unit, read_written)) => {
280                 src_pos += read_written;
281                 dst_pos += read_written;
282                 unit
283             }
284             CopyAsciiResult::Stop(read_written) => {
285                 return (src_pos + read_written, dst_pos + read_written, false);
286             }
287         };
288         if dst_pos >= dst_len_minus_three {
289             break 'outer;
290         }
291         // We have enough destination space to commit to
292         // having read `non_ascii`.
293         src_pos += 1;
294         'inner: loop {
295             let non_ascii_minus_surrogate_start = non_ascii.wrapping_sub(0xD800);
296             if non_ascii_minus_surrogate_start > (0xDFFF - 0xD800) {
297                 if non_ascii < 0x800 {
298                     dst[dst_pos] = ((non_ascii >> 6) | 0xC0) as u8;
299                     dst_pos += 1;
300                     dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8;
301                     dst_pos += 1;
302                 } else {
303                     dst[dst_pos] = ((non_ascii >> 12) | 0xE0) as u8;
304                     dst_pos += 1;
305                     dst[dst_pos] = (((non_ascii & 0xFC0) >> 6) | 0x80) as u8;
306                     dst_pos += 1;
307                     dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8;
308                     dst_pos += 1;
309                 }
310             } else if non_ascii_minus_surrogate_start <= (0xDBFF - 0xD800) {
311                 // high surrogate
312                 if src_pos < src_len {
313                     let second = swap_if_opposite_endian::<E>(src.at(src_pos));
314                     let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
315                     if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
316                         // The next code unit is a low surrogate. Advance position.
317                         src_pos += 1;
318                         let point = (u32::from(non_ascii) << 10) + u32::from(second)
319                             - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32);
320 
321                         dst[dst_pos] = ((point >> 18) | 0xF0u32) as u8;
322                         dst_pos += 1;
323                         dst[dst_pos] = (((point & 0x3F000u32) >> 12) | 0x80u32) as u8;
324                         dst_pos += 1;
325                         dst[dst_pos] = (((point & 0xFC0u32) >> 6) | 0x80u32) as u8;
326                         dst_pos += 1;
327                         dst[dst_pos] = ((point & 0x3Fu32) | 0x80u32) as u8;
328                         dst_pos += 1;
329                     } else {
330                         // The next code unit is not a low surrogate. Don't advance
331                         // position and treat the high surrogate as unpaired.
332                         return (src_pos, dst_pos, true);
333                     }
334                 } else {
335                     // Unpaired surrogate at the end of buffer
336                     return (src_pos, dst_pos, true);
337                 }
338             } else {
339                 // Unpaired low surrogate
340                 return (src_pos, dst_pos, true);
341             }
342             if dst_pos >= dst_len_minus_three || src_pos == src_len {
343                 break 'outer;
344             }
345             let unit = swap_if_opposite_endian::<E>(src.at(src_pos));
346             src_pos += 1;
347             if unit > 0x7F {
348                 non_ascii = unit;
349                 continue 'inner;
350             }
351             dst[dst_pos] = unit as u8;
352             dst_pos += 1;
353             continue 'outer;
354         }
355     }
356     (src_pos, dst_pos, false)
357 }
358 
359 // Byte source
360 
361 pub struct ByteSource<'a> {
362     slice: &'a [u8],
363     pos: usize,
364 }
365 
366 impl<'a> ByteSource<'a> {
367     #[inline(always)]
new(src: &[u8]) -> ByteSource368     pub fn new(src: &[u8]) -> ByteSource {
369         ByteSource { slice: src, pos: 0 }
370     }
371     #[inline(always)]
check_available<'b>(&'b mut self) -> Space<ByteReadHandle<'b, 'a>>372     pub fn check_available<'b>(&'b mut self) -> Space<ByteReadHandle<'b, 'a>> {
373         if self.pos < self.slice.len() {
374             Space::Available(ByteReadHandle::new(self))
375         } else {
376             Space::Full(self.consumed())
377         }
378     }
379     #[inline(always)]
read(&mut self) -> u8380     fn read(&mut self) -> u8 {
381         let ret = self.slice[self.pos];
382         self.pos += 1;
383         ret
384     }
385     #[inline(always)]
unread(&mut self) -> usize386     fn unread(&mut self) -> usize {
387         self.pos -= 1;
388         self.pos
389     }
390     #[inline(always)]
consumed(&self) -> usize391     pub fn consumed(&self) -> usize {
392         self.pos
393     }
394 }
395 
396 pub struct ByteReadHandle<'a, 'b>
397 where
398     'b: 'a,
399 {
400     source: &'a mut ByteSource<'b>,
401 }
402 
403 impl<'a, 'b> ByteReadHandle<'a, 'b>
404 where
405     'b: 'a,
406 {
407     #[inline(always)]
new(src: &'a mut ByteSource<'b>) -> ByteReadHandle<'a, 'b>408     fn new(src: &'a mut ByteSource<'b>) -> ByteReadHandle<'a, 'b> {
409         ByteReadHandle { source: src }
410     }
411     #[inline(always)]
read(self) -> (u8, ByteUnreadHandle<'a, 'b>)412     pub fn read(self) -> (u8, ByteUnreadHandle<'a, 'b>) {
413         let byte = self.source.read();
414         let handle = ByteUnreadHandle::new(self.source);
415         (byte, handle)
416     }
417     #[inline(always)]
consumed(&self) -> usize418     pub fn consumed(&self) -> usize {
419         self.source.consumed()
420     }
421 }
422 
423 pub struct ByteUnreadHandle<'a, 'b>
424 where
425     'b: 'a,
426 {
427     source: &'a mut ByteSource<'b>,
428 }
429 
430 impl<'a, 'b> ByteUnreadHandle<'a, 'b>
431 where
432     'b: 'a,
433 {
434     #[inline(always)]
new(src: &'a mut ByteSource<'b>) -> ByteUnreadHandle<'a, 'b>435     fn new(src: &'a mut ByteSource<'b>) -> ByteUnreadHandle<'a, 'b> {
436         ByteUnreadHandle { source: src }
437     }
438     #[inline(always)]
unread(self) -> usize439     pub fn unread(self) -> usize {
440         self.source.unread()
441     }
442     #[inline(always)]
consumed(&self) -> usize443     pub fn consumed(&self) -> usize {
444         self.source.consumed()
445     }
446     #[inline(always)]
commit(self) -> &'a mut ByteSource<'b>447     pub fn commit(self) -> &'a mut ByteSource<'b> {
448         self.source
449     }
450 }
451 
452 // UTF-16 destination
453 
454 pub struct Utf16BmpHandle<'a, 'b>
455 where
456     'b: 'a,
457 {
458     dest: &'a mut Utf16Destination<'b>,
459 }
460 
461 impl<'a, 'b> Utf16BmpHandle<'a, 'b>
462 where
463     'b: 'a,
464 {
465     #[inline(always)]
new(dst: &'a mut Utf16Destination<'b>) -> Utf16BmpHandle<'a, 'b>466     fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16BmpHandle<'a, 'b> {
467         Utf16BmpHandle { dest: dst }
468     }
469     #[inline(always)]
written(&self) -> usize470     pub fn written(&self) -> usize {
471         self.dest.written()
472     }
473     #[inline(always)]
write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b>474     pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> {
475         self.dest.write_ascii(ascii);
476         self.dest
477     }
478     #[inline(always)]
write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b>479     pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
480         self.dest.write_bmp(bmp);
481         self.dest
482     }
483     #[inline(always)]
write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b>484     pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
485         self.dest.write_bmp_excl_ascii(bmp);
486         self.dest
487     }
488     #[inline(always)]
write_mid_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b>489     pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
490         self.dest.write_mid_bmp(bmp);
491         self.dest
492     }
493     #[inline(always)]
write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b>494     pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
495         self.dest.write_upper_bmp(bmp);
496         self.dest
497     }
498     #[inline(always)]
commit(self) -> &'a mut Utf16Destination<'b>499     pub fn commit(self) -> &'a mut Utf16Destination<'b> {
500         self.dest
501     }
502 }
503 
504 pub struct Utf16AstralHandle<'a, 'b>
505 where
506     'b: 'a,
507 {
508     dest: &'a mut Utf16Destination<'b>,
509 }
510 
511 impl<'a, 'b> Utf16AstralHandle<'a, 'b>
512 where
513     'b: 'a,
514 {
515     #[inline(always)]
new(dst: &'a mut Utf16Destination<'b>) -> Utf16AstralHandle<'a, 'b>516     fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16AstralHandle<'a, 'b> {
517         Utf16AstralHandle { dest: dst }
518     }
519     #[inline(always)]
written(&self) -> usize520     pub fn written(&self) -> usize {
521         self.dest.written()
522     }
523     #[inline(always)]
write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b>524     pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> {
525         self.dest.write_ascii(ascii);
526         self.dest
527     }
528     #[inline(always)]
write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b>529     pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
530         self.dest.write_bmp(bmp);
531         self.dest
532     }
533     #[inline(always)]
write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b>534     pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
535         self.dest.write_bmp_excl_ascii(bmp);
536         self.dest
537     }
538     #[inline(always)]
write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b>539     pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
540         self.dest.write_upper_bmp(bmp);
541         self.dest
542     }
543     #[inline(always)]
write_astral(self, astral: u32) -> &'a mut Utf16Destination<'b>544     pub fn write_astral(self, astral: u32) -> &'a mut Utf16Destination<'b> {
545         self.dest.write_astral(astral);
546         self.dest
547     }
548     #[inline(always)]
write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf16Destination<'b>549     pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf16Destination<'b> {
550         self.dest.write_surrogate_pair(high, low);
551         self.dest
552     }
553     #[inline(always)]
write_big5_combination( self, combined: u16, combining: u16, ) -> &'a mut Utf16Destination<'b>554     pub fn write_big5_combination(
555         self,
556         combined: u16,
557         combining: u16,
558     ) -> &'a mut Utf16Destination<'b> {
559         self.dest.write_big5_combination(combined, combining);
560         self.dest
561     }
562     #[inline(always)]
commit(self) -> &'a mut Utf16Destination<'b>563     pub fn commit(self) -> &'a mut Utf16Destination<'b> {
564         self.dest
565     }
566 }
567 
568 pub struct Utf16Destination<'a> {
569     slice: &'a mut [u16],
570     pos: usize,
571 }
572 
573 impl<'a> Utf16Destination<'a> {
574     #[inline(always)]
new(dst: &mut [u16]) -> Utf16Destination575     pub fn new(dst: &mut [u16]) -> Utf16Destination {
576         Utf16Destination { slice: dst, pos: 0 }
577     }
578     #[inline(always)]
check_space_bmp<'b>(&'b mut self) -> Space<Utf16BmpHandle<'b, 'a>>579     pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf16BmpHandle<'b, 'a>> {
580         if self.pos < self.slice.len() {
581             Space::Available(Utf16BmpHandle::new(self))
582         } else {
583             Space::Full(self.written())
584         }
585     }
586     #[inline(always)]
check_space_astral<'b>(&'b mut self) -> Space<Utf16AstralHandle<'b, 'a>>587     pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf16AstralHandle<'b, 'a>> {
588         if self.pos + 1 < self.slice.len() {
589             Space::Available(Utf16AstralHandle::new(self))
590         } else {
591             Space::Full(self.written())
592         }
593     }
594     #[inline(always)]
written(&self) -> usize595     pub fn written(&self) -> usize {
596         self.pos
597     }
598     #[inline(always)]
write_code_unit(&mut self, u: u16)599     fn write_code_unit(&mut self, u: u16) {
600         unsafe {
601             // OK, because we checked before handing out a handle.
602             *(self.slice.get_unchecked_mut(self.pos)) = u;
603         }
604         self.pos += 1;
605     }
606     #[inline(always)]
write_ascii(&mut self, ascii: u8)607     fn write_ascii(&mut self, ascii: u8) {
608         debug_assert!(ascii < 0x80);
609         self.write_code_unit(u16::from(ascii));
610     }
611     #[inline(always)]
write_bmp(&mut self, bmp: u16)612     fn write_bmp(&mut self, bmp: u16) {
613         self.write_code_unit(bmp);
614     }
615     #[inline(always)]
write_bmp_excl_ascii(&mut self, bmp: u16)616     fn write_bmp_excl_ascii(&mut self, bmp: u16) {
617         debug_assert!(bmp >= 0x80);
618         self.write_code_unit(bmp);
619     }
620     #[inline(always)]
write_mid_bmp(&mut self, bmp: u16)621     fn write_mid_bmp(&mut self, bmp: u16) {
622         debug_assert!(bmp >= 0x80); // XXX
623         self.write_code_unit(bmp);
624     }
625     #[inline(always)]
write_upper_bmp(&mut self, bmp: u16)626     fn write_upper_bmp(&mut self, bmp: u16) {
627         debug_assert!(bmp >= 0x80);
628         self.write_code_unit(bmp);
629     }
630     #[inline(always)]
write_astral(&mut self, astral: u32)631     fn write_astral(&mut self, astral: u32) {
632         debug_assert!(astral > 0xFFFF);
633         debug_assert!(astral <= 0x10_FFFF);
634         self.write_code_unit((0xD7C0 + (astral >> 10)) as u16);
635         self.write_code_unit((0xDC00 + (astral & 0x3FF)) as u16);
636     }
637     #[inline(always)]
write_surrogate_pair(&mut self, high: u16, low: u16)638     pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
639         self.write_code_unit(high);
640         self.write_code_unit(low);
641     }
642     #[inline(always)]
write_big5_combination(&mut self, combined: u16, combining: u16)643     fn write_big5_combination(&mut self, combined: u16, combining: u16) {
644         self.write_bmp_excl_ascii(combined);
645         self.write_bmp_excl_ascii(combining);
646     }
647     #[inline(always)]
copy_ascii_from_check_space_bmp<'b>( &'b mut self, source: &mut ByteSource, ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16BmpHandle<'b, 'a>)>648     pub fn copy_ascii_from_check_space_bmp<'b>(
649         &'b mut self,
650         source: &mut ByteSource,
651     ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16BmpHandle<'b, 'a>)> {
652         let non_ascii_ret = {
653             let src_remaining = &source.slice[source.pos..];
654             let dst_remaining = &mut self.slice[self.pos..];
655             let (pending, length) = if dst_remaining.len() < src_remaining.len() {
656                 (DecoderResult::OutputFull, dst_remaining.len())
657             } else {
658                 (DecoderResult::InputEmpty, src_remaining.len())
659             };
660             match unsafe {
661                 ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
662             } {
663                 None => {
664                     source.pos += length;
665                     self.pos += length;
666                     return CopyAsciiResult::Stop((pending, source.pos, self.pos));
667                 }
668                 Some((non_ascii, consumed)) => {
669                     source.pos += consumed;
670                     self.pos += consumed;
671                     source.pos += 1; // +1 for non_ascii
672                     non_ascii
673                 }
674             }
675         };
676         CopyAsciiResult::GoOn((non_ascii_ret, Utf16BmpHandle::new(self)))
677     }
678     #[inline(always)]
copy_ascii_from_check_space_astral<'b>( &'b mut self, source: &mut ByteSource, ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16AstralHandle<'b, 'a>)>679     pub fn copy_ascii_from_check_space_astral<'b>(
680         &'b mut self,
681         source: &mut ByteSource,
682     ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16AstralHandle<'b, 'a>)> {
683         let non_ascii_ret = {
684             let dst_len = self.slice.len();
685             let src_remaining = &source.slice[source.pos..];
686             let dst_remaining = &mut self.slice[self.pos..];
687             let (pending, length) = if dst_remaining.len() < src_remaining.len() {
688                 (DecoderResult::OutputFull, dst_remaining.len())
689             } else {
690                 (DecoderResult::InputEmpty, src_remaining.len())
691             };
692             match unsafe {
693                 ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
694             } {
695                 None => {
696                     source.pos += length;
697                     self.pos += length;
698                     return CopyAsciiResult::Stop((pending, source.pos, self.pos));
699                 }
700                 Some((non_ascii, consumed)) => {
701                     source.pos += consumed;
702                     self.pos += consumed;
703                     if self.pos + 1 < dst_len {
704                         source.pos += 1; // +1 for non_ascii
705                         non_ascii
706                     } else {
707                         return CopyAsciiResult::Stop((
708                             DecoderResult::OutputFull,
709                             source.pos,
710                             self.pos,
711                         ));
712                     }
713                 }
714             }
715         };
716         CopyAsciiResult::GoOn((non_ascii_ret, Utf16AstralHandle::new(self)))
717     }
718     #[inline(always)]
copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource)719     pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) {
720         let src_remaining = &source.slice[source.pos..];
721         let dst_remaining = &mut self.slice[self.pos..];
722         let (read, written) = convert_utf8_to_utf16_up_to_invalid(src_remaining, dst_remaining);
723         source.pos += read;
724         self.pos += written;
725     }
726     #[inline(always)]
copy_utf16_from<E: Endian>( &mut self, source: &mut ByteSource, ) -> Option<(usize, usize)>727     pub fn copy_utf16_from<E: Endian>(
728         &mut self,
729         source: &mut ByteSource,
730     ) -> Option<(usize, usize)> {
731         let src_remaining = &source.slice[source.pos..];
732         let dst_remaining = &mut self.slice[self.pos..];
733 
734         let mut src_unaligned = unsafe {
735             UnalignedU16Slice::new(
736                 src_remaining.as_ptr(),
737                 ::std::cmp::min(src_remaining.len() / 2, dst_remaining.len()),
738             )
739         };
740         if src_unaligned.len() == 0 {
741             return None;
742         }
743         let last_unit = swap_if_opposite_endian::<E>(src_unaligned.at(src_unaligned.len() - 1));
744         if super::in_range16(last_unit, 0xD800, 0xDC00) {
745             // Last code unit is a high surrogate. It might
746             // legitimately form a pair later, so let's not
747             // include it.
748             src_unaligned.trim_last();
749         }
750         let mut offset = 0usize;
751         loop {
752             if let Some((surrogate, bmp_len)) = {
753                 let src_left = src_unaligned.tail(offset);
754                 let dst_left = &mut dst_remaining[offset..src_unaligned.len()];
755                 src_left.copy_bmp_to::<E>(dst_left)
756             } {
757                 offset += bmp_len; // surrogate has not been consumed yet
758                 let second_pos = offset + 1;
759                 if surrogate > 0xDBFF || second_pos == src_unaligned.len() {
760                     // Unpaired surrogate
761                     source.pos += second_pos * 2;
762                     self.pos += offset;
763                     return Some((source.pos, self.pos));
764                 }
765                 let second = swap_if_opposite_endian::<E>(src_unaligned.at(second_pos));
766                 if !super::in_range16(second, 0xDC00, 0xE000) {
767                     // Unpaired surrogate
768                     source.pos += second_pos * 2;
769                     self.pos += offset;
770                     return Some((source.pos, self.pos));
771                 }
772                 // `surrogate` was already speculatively written
773                 dst_remaining[second_pos] = second;
774                 offset += 2;
775                 continue;
776             } else {
777                 source.pos += src_unaligned.len() * 2;
778                 self.pos += src_unaligned.len();
779                 return None;
780             }
781         }
782     }
783 }
784 
785 // UTF-8 destination
786 
787 pub struct Utf8BmpHandle<'a, 'b>
788 where
789     'b: 'a,
790 {
791     dest: &'a mut Utf8Destination<'b>,
792 }
793 
794 impl<'a, 'b> Utf8BmpHandle<'a, 'b>
795 where
796     'b: 'a,
797 {
798     #[inline(always)]
new(dst: &'a mut Utf8Destination<'b>) -> Utf8BmpHandle<'a, 'b>799     fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8BmpHandle<'a, 'b> {
800         Utf8BmpHandle { dest: dst }
801     }
802     #[inline(always)]
written(&self) -> usize803     pub fn written(&self) -> usize {
804         self.dest.written()
805     }
806     #[inline(always)]
write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b>807     pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> {
808         self.dest.write_ascii(ascii);
809         self.dest
810     }
811     #[inline(always)]
write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b>812     pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
813         self.dest.write_bmp(bmp);
814         self.dest
815     }
816     #[inline(always)]
write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b>817     pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
818         self.dest.write_bmp_excl_ascii(bmp);
819         self.dest
820     }
821     #[inline(always)]
write_mid_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b>822     pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
823         self.dest.write_mid_bmp(bmp);
824         self.dest
825     }
826     #[inline(always)]
write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b>827     pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
828         self.dest.write_upper_bmp(bmp);
829         self.dest
830     }
831     #[inline(always)]
commit(self) -> &'a mut Utf8Destination<'b>832     pub fn commit(self) -> &'a mut Utf8Destination<'b> {
833         self.dest
834     }
835 }
836 
837 pub struct Utf8AstralHandle<'a, 'b>
838 where
839     'b: 'a,
840 {
841     dest: &'a mut Utf8Destination<'b>,
842 }
843 
844 impl<'a, 'b> Utf8AstralHandle<'a, 'b>
845 where
846     'b: 'a,
847 {
848     #[inline(always)]
new(dst: &'a mut Utf8Destination<'b>) -> Utf8AstralHandle<'a, 'b>849     fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8AstralHandle<'a, 'b> {
850         Utf8AstralHandle { dest: dst }
851     }
852     #[inline(always)]
written(&self) -> usize853     pub fn written(&self) -> usize {
854         self.dest.written()
855     }
856     #[inline(always)]
write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b>857     pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> {
858         self.dest.write_ascii(ascii);
859         self.dest
860     }
861     #[inline(always)]
write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b>862     pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
863         self.dest.write_bmp(bmp);
864         self.dest
865     }
866     #[inline(always)]
write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b>867     pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
868         self.dest.write_bmp_excl_ascii(bmp);
869         self.dest
870     }
871     #[inline(always)]
write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b>872     pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
873         self.dest.write_upper_bmp(bmp);
874         self.dest
875     }
876     #[inline(always)]
write_astral(self, astral: u32) -> &'a mut Utf8Destination<'b>877     pub fn write_astral(self, astral: u32) -> &'a mut Utf8Destination<'b> {
878         self.dest.write_astral(astral);
879         self.dest
880     }
881     #[inline(always)]
write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf8Destination<'b>882     pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf8Destination<'b> {
883         self.dest.write_surrogate_pair(high, low);
884         self.dest
885     }
886     #[inline(always)]
write_big5_combination( self, combined: u16, combining: u16, ) -> &'a mut Utf8Destination<'b>887     pub fn write_big5_combination(
888         self,
889         combined: u16,
890         combining: u16,
891     ) -> &'a mut Utf8Destination<'b> {
892         self.dest.write_big5_combination(combined, combining);
893         self.dest
894     }
895     #[inline(always)]
commit(self) -> &'a mut Utf8Destination<'b>896     pub fn commit(self) -> &'a mut Utf8Destination<'b> {
897         self.dest
898     }
899 }
900 
901 pub struct Utf8Destination<'a> {
902     slice: &'a mut [u8],
903     pos: usize,
904 }
905 
906 impl<'a> Utf8Destination<'a> {
907     #[inline(always)]
new(dst: &mut [u8]) -> Utf8Destination908     pub fn new(dst: &mut [u8]) -> Utf8Destination {
909         Utf8Destination { slice: dst, pos: 0 }
910     }
911     #[inline(always)]
check_space_bmp<'b>(&'b mut self) -> Space<Utf8BmpHandle<'b, 'a>>912     pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf8BmpHandle<'b, 'a>> {
913         if self.pos + 2 < self.slice.len() {
914             Space::Available(Utf8BmpHandle::new(self))
915         } else {
916             Space::Full(self.written())
917         }
918     }
919     #[inline(always)]
check_space_astral<'b>(&'b mut self) -> Space<Utf8AstralHandle<'b, 'a>>920     pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf8AstralHandle<'b, 'a>> {
921         if self.pos + 3 < self.slice.len() {
922             Space::Available(Utf8AstralHandle::new(self))
923         } else {
924             Space::Full(self.written())
925         }
926     }
927     #[inline(always)]
written(&self) -> usize928     pub fn written(&self) -> usize {
929         self.pos
930     }
931     #[inline(always)]
write_code_unit(&mut self, u: u8)932     fn write_code_unit(&mut self, u: u8) {
933         unsafe {
934             // OK, because we checked before handing out a handle.
935             *(self.slice.get_unchecked_mut(self.pos)) = u;
936         }
937         self.pos += 1;
938     }
939     #[inline(always)]
write_ascii(&mut self, ascii: u8)940     fn write_ascii(&mut self, ascii: u8) {
941         debug_assert!(ascii < 0x80);
942         self.write_code_unit(ascii);
943     }
944     #[inline(always)]
write_bmp(&mut self, bmp: u16)945     fn write_bmp(&mut self, bmp: u16) {
946         if bmp < 0x80u16 {
947             self.write_ascii(bmp as u8);
948         } else if bmp < 0x800u16 {
949             self.write_mid_bmp(bmp);
950         } else {
951             self.write_upper_bmp(bmp);
952         }
953     }
954     #[inline(always)]
write_mid_bmp(&mut self, mid_bmp: u16)955     fn write_mid_bmp(&mut self, mid_bmp: u16) {
956         debug_assert!(mid_bmp >= 0x80);
957         debug_assert!(mid_bmp < 0x800);
958         self.write_code_unit(((mid_bmp >> 6) | 0xC0) as u8);
959         self.write_code_unit(((mid_bmp & 0x3F) | 0x80) as u8);
960     }
961     #[inline(always)]
write_upper_bmp(&mut self, upper_bmp: u16)962     fn write_upper_bmp(&mut self, upper_bmp: u16) {
963         debug_assert!(upper_bmp >= 0x800);
964         self.write_code_unit(((upper_bmp >> 12) | 0xE0) as u8);
965         self.write_code_unit((((upper_bmp & 0xFC0) >> 6) | 0x80) as u8);
966         self.write_code_unit(((upper_bmp & 0x3F) | 0x80) as u8);
967     }
968     #[inline(always)]
write_bmp_excl_ascii(&mut self, bmp: u16)969     fn write_bmp_excl_ascii(&mut self, bmp: u16) {
970         if bmp < 0x800u16 {
971             self.write_mid_bmp(bmp);
972         } else {
973             self.write_upper_bmp(bmp);
974         }
975     }
976     #[inline(always)]
write_astral(&mut self, astral: u32)977     fn write_astral(&mut self, astral: u32) {
978         debug_assert!(astral > 0xFFFF);
979         debug_assert!(astral <= 0x10_FFFF);
980         self.write_code_unit(((astral >> 18) | 0xF0) as u8);
981         self.write_code_unit((((astral & 0x3F000) >> 12) | 0x80) as u8);
982         self.write_code_unit((((astral & 0xFC0) >> 6) | 0x80) as u8);
983         self.write_code_unit(((astral & 0x3F) | 0x80) as u8);
984     }
985     #[inline(always)]
write_surrogate_pair(&mut self, high: u16, low: u16)986     pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
987         self.write_astral(
988             (u32::from(high) << 10) + u32::from(low)
989                 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
990         );
991     }
992     #[inline(always)]
write_big5_combination(&mut self, combined: u16, combining: u16)993     fn write_big5_combination(&mut self, combined: u16, combining: u16) {
994         self.write_mid_bmp(combined);
995         self.write_mid_bmp(combining);
996     }
997     #[inline(always)]
copy_ascii_from_check_space_bmp<'b>( &'b mut self, source: &mut ByteSource, ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8BmpHandle<'b, 'a>)>998     pub fn copy_ascii_from_check_space_bmp<'b>(
999         &'b mut self,
1000         source: &mut ByteSource,
1001     ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8BmpHandle<'b, 'a>)> {
1002         let non_ascii_ret = {
1003             let dst_len = self.slice.len();
1004             let src_remaining = &source.slice[source.pos..];
1005             let dst_remaining = &mut self.slice[self.pos..];
1006             let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1007                 (DecoderResult::OutputFull, dst_remaining.len())
1008             } else {
1009                 (DecoderResult::InputEmpty, src_remaining.len())
1010             };
1011             match unsafe {
1012                 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1013             } {
1014                 None => {
1015                     source.pos += length;
1016                     self.pos += length;
1017                     return CopyAsciiResult::Stop((pending, source.pos, self.pos));
1018                 }
1019                 Some((non_ascii, consumed)) => {
1020                     source.pos += consumed;
1021                     self.pos += consumed;
1022                     if self.pos + 2 < dst_len {
1023                         source.pos += 1; // +1 for non_ascii
1024                         non_ascii
1025                     } else {
1026                         return CopyAsciiResult::Stop((
1027                             DecoderResult::OutputFull,
1028                             source.pos,
1029                             self.pos,
1030                         ));
1031                     }
1032                 }
1033             }
1034         };
1035         CopyAsciiResult::GoOn((non_ascii_ret, Utf8BmpHandle::new(self)))
1036     }
1037     #[inline(always)]
copy_ascii_from_check_space_astral<'b>( &'b mut self, source: &mut ByteSource, ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8AstralHandle<'b, 'a>)>1038     pub fn copy_ascii_from_check_space_astral<'b>(
1039         &'b mut self,
1040         source: &mut ByteSource,
1041     ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8AstralHandle<'b, 'a>)> {
1042         let non_ascii_ret = {
1043             let dst_len = self.slice.len();
1044             let src_remaining = &source.slice[source.pos..];
1045             let dst_remaining = &mut self.slice[self.pos..];
1046             let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1047                 (DecoderResult::OutputFull, dst_remaining.len())
1048             } else {
1049                 (DecoderResult::InputEmpty, src_remaining.len())
1050             };
1051             match unsafe {
1052                 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1053             } {
1054                 None => {
1055                     source.pos += length;
1056                     self.pos += length;
1057                     return CopyAsciiResult::Stop((pending, source.pos, self.pos));
1058                 }
1059                 Some((non_ascii, consumed)) => {
1060                     source.pos += consumed;
1061                     self.pos += consumed;
1062                     if self.pos + 3 < dst_len {
1063                         source.pos += 1; // +1 for non_ascii
1064                         non_ascii
1065                     } else {
1066                         return CopyAsciiResult::Stop((
1067                             DecoderResult::OutputFull,
1068                             source.pos,
1069                             self.pos,
1070                         ));
1071                     }
1072                 }
1073             }
1074         };
1075         CopyAsciiResult::GoOn((non_ascii_ret, Utf8AstralHandle::new(self)))
1076     }
1077     #[inline(always)]
copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource)1078     pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) {
1079         let src_remaining = &source.slice[source.pos..];
1080         let dst_remaining = &mut self.slice[self.pos..];
1081         let min_len = ::std::cmp::min(src_remaining.len(), dst_remaining.len());
1082         // Validate first, then memcpy to let memcpy do its thing even for
1083         // non-ASCII. (And potentially do something better than SSE2 for ASCII.)
1084         let valid_len = utf8_valid_up_to(&src_remaining[..min_len]);
1085         (&mut dst_remaining[..valid_len]).copy_from_slice(&src_remaining[..valid_len]);
1086         source.pos += valid_len;
1087         self.pos += valid_len;
1088     }
1089     #[inline(always)]
copy_utf16_from<E: Endian>( &mut self, source: &mut ByteSource, ) -> Option<(usize, usize)>1090     pub fn copy_utf16_from<E: Endian>(
1091         &mut self,
1092         source: &mut ByteSource,
1093     ) -> Option<(usize, usize)> {
1094         let src_remaining = &source.slice[source.pos..];
1095         let dst_remaining = &mut self.slice[self.pos..];
1096 
1097         let mut src_unaligned =
1098             unsafe { UnalignedU16Slice::new(src_remaining.as_ptr(), src_remaining.len() / 2) };
1099         if src_unaligned.len() == 0 {
1100             return None;
1101         }
1102         let mut last_unit = src_unaligned.at(src_unaligned.len() - 1);
1103         if E::OPPOSITE_ENDIAN {
1104             last_unit = last_unit.swap_bytes();
1105         }
1106         if super::in_range16(last_unit, 0xD800, 0xDC00) {
1107             // Last code unit is a high surrogate. It might
1108             // legitimately form a pair later, so let's not
1109             // include it.
1110             src_unaligned.trim_last();
1111         }
1112         let (read, written, had_error) =
1113             convert_unaligned_utf16_to_utf8::<E>(src_unaligned, dst_remaining);
1114         source.pos += read * 2;
1115         self.pos += written;
1116         if had_error {
1117             Some((source.pos, self.pos))
1118         } else {
1119             None
1120         }
1121     }
1122 }
1123 
1124 // UTF-16 source
1125 
1126 pub struct Utf16Source<'a> {
1127     slice: &'a [u16],
1128     pos: usize,
1129     old_pos: usize,
1130 }
1131 
1132 impl<'a> Utf16Source<'a> {
1133     #[inline(always)]
new(src: &[u16]) -> Utf16Source1134     pub fn new(src: &[u16]) -> Utf16Source {
1135         Utf16Source {
1136             slice: src,
1137             pos: 0,
1138             old_pos: 0,
1139         }
1140     }
1141     #[inline(always)]
check_available<'b>(&'b mut self) -> Space<Utf16ReadHandle<'b, 'a>>1142     pub fn check_available<'b>(&'b mut self) -> Space<Utf16ReadHandle<'b, 'a>> {
1143         if self.pos < self.slice.len() {
1144             Space::Available(Utf16ReadHandle::new(self))
1145         } else {
1146             Space::Full(self.consumed())
1147         }
1148     }
1149     #[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
1150     #[inline(always)]
read(&mut self) -> char1151     fn read(&mut self) -> char {
1152         self.old_pos = self.pos;
1153         let unit = self.slice[self.pos];
1154         self.pos += 1;
1155         let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1156         if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1157             return unsafe { ::std::char::from_u32_unchecked(u32::from(unit)) };
1158         }
1159         if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1160             // high surrogate
1161             if self.pos < self.slice.len() {
1162                 let second = self.slice[self.pos];
1163                 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1164                 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1165                     // The next code unit is a low surrogate. Advance position.
1166                     self.pos += 1;
1167                     return unsafe {
1168                         ::std::char::from_u32_unchecked(
1169                             (u32::from(unit) << 10) + u32::from(second)
1170                                 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1171                         )
1172                     };
1173                 }
1174                 // The next code unit is not a low surrogate. Don't advance
1175                 // position and treat the high surrogate as unpaired.
1176                 // fall through
1177             }
1178             // Unpaired surrogate at the end of buffer, fall through
1179         }
1180         // Unpaired low surrogate
1181         '\u{FFFD}'
1182     }
1183     #[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
1184     #[inline(always)]
read_enum(&mut self) -> Unicode1185     fn read_enum(&mut self) -> Unicode {
1186         self.old_pos = self.pos;
1187         let unit = self.slice[self.pos];
1188         self.pos += 1;
1189         if unit < 0x80 {
1190             return Unicode::Ascii(unit as u8);
1191         }
1192         let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1193         if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1194             return Unicode::NonAscii(NonAscii::BmpExclAscii(unit));
1195         }
1196         if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1197             // high surrogate
1198             if self.pos < self.slice.len() {
1199                 let second = self.slice[self.pos];
1200                 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1201                 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1202                     // The next code unit is a low surrogate. Advance position.
1203                     self.pos += 1;
1204                     return Unicode::NonAscii(NonAscii::Astral(unsafe {
1205                         ::std::char::from_u32_unchecked(
1206                             (u32::from(unit) << 10) + u32::from(second)
1207                                 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1208                         )
1209                     }));
1210                 }
1211                 // The next code unit is not a low surrogate. Don't advance
1212                 // position and treat the high surrogate as unpaired.
1213                 // fall through
1214             }
1215             // Unpaired surrogate at the end of buffer, fall through
1216         }
1217         // Unpaired low surrogate
1218         Unicode::NonAscii(NonAscii::BmpExclAscii(0xFFFDu16))
1219     }
1220     #[inline(always)]
unread(&mut self) -> usize1221     fn unread(&mut self) -> usize {
1222         self.pos = self.old_pos;
1223         self.pos
1224     }
1225     #[inline(always)]
consumed(&self) -> usize1226     pub fn consumed(&self) -> usize {
1227         self.pos
1228     }
1229     #[inline(always)]
copy_ascii_to_check_space_two<'b>( &mut self, dest: &'b mut ByteDestination<'a>, ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)>1230     pub fn copy_ascii_to_check_space_two<'b>(
1231         &mut self,
1232         dest: &'b mut ByteDestination<'a>,
1233     ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> {
1234         let non_ascii_ret = {
1235             let dst_len = dest.slice.len();
1236             let src_remaining = &self.slice[self.pos..];
1237             let dst_remaining = &mut dest.slice[dest.pos..];
1238             let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1239                 (EncoderResult::OutputFull, dst_remaining.len())
1240             } else {
1241                 (EncoderResult::InputEmpty, src_remaining.len())
1242             };
1243             match unsafe {
1244                 basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1245             } {
1246                 None => {
1247                     self.pos += length;
1248                     dest.pos += length;
1249                     return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1250                 }
1251                 Some((non_ascii, consumed)) => {
1252                     self.pos += consumed;
1253                     dest.pos += consumed;
1254                     if dest.pos + 1 < dst_len {
1255                         self.pos += 1; // commit to reading `non_ascii`
1256                         let unit = non_ascii;
1257                         let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1258                         if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1259                             NonAscii::BmpExclAscii(unit)
1260                         } else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1261                             // high surrogate
1262                             if self.pos < self.slice.len() {
1263                                 let second = self.slice[self.pos];
1264                                 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1265                                 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1266                                     // The next code unit is a low surrogate. Advance position.
1267                                     self.pos += 1;
1268                                     NonAscii::Astral(unsafe {
1269                                         ::std::char::from_u32_unchecked(
1270                                             (u32::from(unit) << 10) + u32::from(second)
1271                                                 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1272                                         )
1273                                     })
1274                                 } else {
1275                                     // The next code unit is not a low surrogate. Don't advance
1276                                     // position and treat the high surrogate as unpaired.
1277                                     NonAscii::BmpExclAscii(0xFFFDu16)
1278                                 }
1279                             } else {
1280                                 // Unpaired surrogate at the end of the buffer.
1281                                 NonAscii::BmpExclAscii(0xFFFDu16)
1282                             }
1283                         } else {
1284                             // Unpaired low surrogate
1285                             NonAscii::BmpExclAscii(0xFFFDu16)
1286                         }
1287                     } else {
1288                         return CopyAsciiResult::Stop((
1289                             EncoderResult::OutputFull,
1290                             self.pos,
1291                             dest.pos,
1292                         ));
1293                     }
1294                 }
1295             }
1296         };
1297         CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest)))
1298     }
1299     #[inline(always)]
copy_ascii_to_check_space_four<'b>( &mut self, dest: &'b mut ByteDestination<'a>, ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)>1300     pub fn copy_ascii_to_check_space_four<'b>(
1301         &mut self,
1302         dest: &'b mut ByteDestination<'a>,
1303     ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> {
1304         let non_ascii_ret = {
1305             let dst_len = dest.slice.len();
1306             let src_remaining = &self.slice[self.pos..];
1307             let dst_remaining = &mut dest.slice[dest.pos..];
1308             let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1309                 (EncoderResult::OutputFull, dst_remaining.len())
1310             } else {
1311                 (EncoderResult::InputEmpty, src_remaining.len())
1312             };
1313             match unsafe {
1314                 basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1315             } {
1316                 None => {
1317                     self.pos += length;
1318                     dest.pos += length;
1319                     return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1320                 }
1321                 Some((non_ascii, consumed)) => {
1322                     self.pos += consumed;
1323                     dest.pos += consumed;
1324                     if dest.pos + 3 < dst_len {
1325                         self.pos += 1; // commit to reading `non_ascii`
1326                         let unit = non_ascii;
1327                         let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1328                         if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1329                             NonAscii::BmpExclAscii(unit)
1330                         } else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1331                             // high surrogate
1332                             if self.pos == self.slice.len() {
1333                                 // Unpaired surrogate at the end of the buffer.
1334                                 NonAscii::BmpExclAscii(0xFFFDu16)
1335                             } else {
1336                                 let second = self.slice[self.pos];
1337                                 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1338                                 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1339                                     // The next code unit is a low surrogate. Advance position.
1340                                     self.pos += 1;
1341                                     NonAscii::Astral(unsafe {
1342                                         ::std::char::from_u32_unchecked(
1343                                             (u32::from(unit) << 10) + u32::from(second)
1344                                                 - (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32),
1345                                         )
1346                                     })
1347                                 } else {
1348                                     // The next code unit is not a low surrogate. Don't advance
1349                                     // position and treat the high surrogate as unpaired.
1350                                     NonAscii::BmpExclAscii(0xFFFDu16)
1351                                 }
1352                             }
1353                         } else {
1354                             // Unpaired low surrogate
1355                             NonAscii::BmpExclAscii(0xFFFDu16)
1356                         }
1357                     } else {
1358                         return CopyAsciiResult::Stop((
1359                             EncoderResult::OutputFull,
1360                             self.pos,
1361                             dest.pos,
1362                         ));
1363                     }
1364                 }
1365             }
1366         };
1367         CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest)))
1368     }
1369 }
1370 
1371 pub struct Utf16ReadHandle<'a, 'b>
1372 where
1373     'b: 'a,
1374 {
1375     source: &'a mut Utf16Source<'b>,
1376 }
1377 
1378 impl<'a, 'b> Utf16ReadHandle<'a, 'b>
1379 where
1380     'b: 'a,
1381 {
1382     #[inline(always)]
new(src: &'a mut Utf16Source<'b>) -> Utf16ReadHandle<'a, 'b>1383     fn new(src: &'a mut Utf16Source<'b>) -> Utf16ReadHandle<'a, 'b> {
1384         Utf16ReadHandle { source: src }
1385     }
1386     #[inline(always)]
read(self) -> (char, Utf16UnreadHandle<'a, 'b>)1387     pub fn read(self) -> (char, Utf16UnreadHandle<'a, 'b>) {
1388         let character = self.source.read();
1389         let handle = Utf16UnreadHandle::new(self.source);
1390         (character, handle)
1391     }
1392     #[inline(always)]
read_enum(self) -> (Unicode, Utf16UnreadHandle<'a, 'b>)1393     pub fn read_enum(self) -> (Unicode, Utf16UnreadHandle<'a, 'b>) {
1394         let character = self.source.read_enum();
1395         let handle = Utf16UnreadHandle::new(self.source);
1396         (character, handle)
1397     }
1398     #[inline(always)]
consumed(&self) -> usize1399     pub fn consumed(&self) -> usize {
1400         self.source.consumed()
1401     }
1402 }
1403 
1404 pub struct Utf16UnreadHandle<'a, 'b>
1405 where
1406     'b: 'a,
1407 {
1408     source: &'a mut Utf16Source<'b>,
1409 }
1410 
1411 impl<'a, 'b> Utf16UnreadHandle<'a, 'b>
1412 where
1413     'b: 'a,
1414 {
1415     #[inline(always)]
new(src: &'a mut Utf16Source<'b>) -> Utf16UnreadHandle<'a, 'b>1416     fn new(src: &'a mut Utf16Source<'b>) -> Utf16UnreadHandle<'a, 'b> {
1417         Utf16UnreadHandle { source: src }
1418     }
1419     #[inline(always)]
unread(self) -> usize1420     pub fn unread(self) -> usize {
1421         self.source.unread()
1422     }
1423     #[inline(always)]
consumed(&self) -> usize1424     pub fn consumed(&self) -> usize {
1425         self.source.consumed()
1426     }
1427     #[inline(always)]
commit(self) -> &'a mut Utf16Source<'b>1428     pub fn commit(self) -> &'a mut Utf16Source<'b> {
1429         self.source
1430     }
1431 }
1432 
1433 // UTF-8 source
1434 
1435 pub struct Utf8Source<'a> {
1436     slice: &'a [u8],
1437     pos: usize,
1438     old_pos: usize,
1439 }
1440 
1441 impl<'a> Utf8Source<'a> {
1442     #[inline(always)]
new(src: &str) -> Utf8Source1443     pub fn new(src: &str) -> Utf8Source {
1444         Utf8Source {
1445             slice: src.as_bytes(),
1446             pos: 0,
1447             old_pos: 0,
1448         }
1449     }
1450     #[inline(always)]
check_available<'b>(&'b mut self) -> Space<Utf8ReadHandle<'b, 'a>>1451     pub fn check_available<'b>(&'b mut self) -> Space<Utf8ReadHandle<'b, 'a>> {
1452         if self.pos < self.slice.len() {
1453             Space::Available(Utf8ReadHandle::new(self))
1454         } else {
1455             Space::Full(self.consumed())
1456         }
1457     }
1458     #[inline(always)]
read(&mut self) -> char1459     fn read(&mut self) -> char {
1460         self.old_pos = self.pos;
1461         let unit = self.slice[self.pos];
1462         if unit < 0x80 {
1463             self.pos += 1;
1464             return char::from(unit);
1465         }
1466         if unit < 0xE0 {
1467             let point =
1468                 ((u32::from(unit) & 0x1F) << 6) | (u32::from(self.slice[self.pos + 1]) & 0x3F);
1469             self.pos += 2;
1470             return unsafe { ::std::char::from_u32_unchecked(point) };
1471         }
1472         if unit < 0xF0 {
1473             let point = ((u32::from(unit) & 0xF) << 12)
1474                 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1475                 | (u32::from(self.slice[self.pos + 2]) & 0x3F);
1476             self.pos += 3;
1477             return unsafe { ::std::char::from_u32_unchecked(point) };
1478         }
1479         let point = ((u32::from(unit) & 0x7) << 18)
1480             | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1481             | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1482             | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1483         self.pos += 4;
1484         unsafe { ::std::char::from_u32_unchecked(point) }
1485     }
1486     #[inline(always)]
read_enum(&mut self) -> Unicode1487     fn read_enum(&mut self) -> Unicode {
1488         self.old_pos = self.pos;
1489         let unit = self.slice[self.pos];
1490         if unit < 0x80 {
1491             self.pos += 1;
1492             return Unicode::Ascii(unit);
1493         }
1494         if unit < 0xE0 {
1495             let point =
1496                 ((u16::from(unit) & 0x1F) << 6) | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1497             self.pos += 2;
1498             return Unicode::NonAscii(NonAscii::BmpExclAscii(point));
1499         }
1500         if unit < 0xF0 {
1501             let point = ((u16::from(unit) & 0xF) << 12)
1502                 | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1503                 | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1504             self.pos += 3;
1505             return Unicode::NonAscii(NonAscii::BmpExclAscii(point));
1506         }
1507         let point = ((u32::from(unit) & 0x7) << 18)
1508             | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1509             | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1510             | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1511         self.pos += 4;
1512         Unicode::NonAscii(NonAscii::Astral(unsafe {
1513             ::std::char::from_u32_unchecked(point)
1514         }))
1515     }
1516     #[inline(always)]
unread(&mut self) -> usize1517     fn unread(&mut self) -> usize {
1518         self.pos = self.old_pos;
1519         self.pos
1520     }
1521     #[inline(always)]
consumed(&self) -> usize1522     pub fn consumed(&self) -> usize {
1523         self.pos
1524     }
1525     #[inline(always)]
copy_ascii_to_check_space_one<'b>( &mut self, dest: &'b mut ByteDestination<'a>, ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteOneHandle<'b, 'a>)>1526     pub fn copy_ascii_to_check_space_one<'b>(
1527         &mut self,
1528         dest: &'b mut ByteDestination<'a>,
1529     ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteOneHandle<'b, 'a>)> {
1530         let non_ascii_ret = {
1531             let src_remaining = &self.slice[self.pos..];
1532             let dst_remaining = &mut dest.slice[dest.pos..];
1533             let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1534                 (EncoderResult::OutputFull, dst_remaining.len())
1535             } else {
1536                 (EncoderResult::InputEmpty, src_remaining.len())
1537             };
1538             match unsafe {
1539                 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1540             } {
1541                 None => {
1542                     self.pos += length;
1543                     dest.pos += length;
1544                     return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1545                 }
1546                 Some((non_ascii, consumed)) => {
1547                     self.pos += consumed;
1548                     dest.pos += consumed;
1549                     // We don't need to check space in destination, because
1550                     // `ascii_to_ascii()` already did.
1551                     if non_ascii < 0xE0 {
1552                         let point = ((u16::from(non_ascii) & 0x1F) << 6)
1553                             | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1554                         self.pos += 2;
1555                         NonAscii::BmpExclAscii(point)
1556                     } else if non_ascii < 0xF0 {
1557                         let point = ((u16::from(non_ascii) & 0xF) << 12)
1558                             | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1559                             | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1560                         self.pos += 3;
1561                         NonAscii::BmpExclAscii(point)
1562                     } else {
1563                         let point = ((u32::from(non_ascii) & 0x7) << 18)
1564                             | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1565                             | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1566                             | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1567                         self.pos += 4;
1568                         NonAscii::Astral(unsafe { ::std::char::from_u32_unchecked(point) })
1569                     }
1570                 }
1571             }
1572         };
1573         CopyAsciiResult::GoOn((non_ascii_ret, ByteOneHandle::new(dest)))
1574     }
1575     #[inline(always)]
copy_ascii_to_check_space_two<'b>( &mut self, dest: &'b mut ByteDestination<'a>, ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)>1576     pub fn copy_ascii_to_check_space_two<'b>(
1577         &mut self,
1578         dest: &'b mut ByteDestination<'a>,
1579     ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> {
1580         let non_ascii_ret = {
1581             let dst_len = dest.slice.len();
1582             let src_remaining = &self.slice[self.pos..];
1583             let dst_remaining = &mut dest.slice[dest.pos..];
1584             let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1585                 (EncoderResult::OutputFull, dst_remaining.len())
1586             } else {
1587                 (EncoderResult::InputEmpty, src_remaining.len())
1588             };
1589             match unsafe {
1590                 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1591             } {
1592                 None => {
1593                     self.pos += length;
1594                     dest.pos += length;
1595                     return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1596                 }
1597                 Some((non_ascii, consumed)) => {
1598                     self.pos += consumed;
1599                     dest.pos += consumed;
1600                     if dest.pos + 1 < dst_len {
1601                         if non_ascii < 0xE0 {
1602                             let point = ((u16::from(non_ascii) & 0x1F) << 6)
1603                                 | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1604                             self.pos += 2;
1605                             NonAscii::BmpExclAscii(point)
1606                         } else if non_ascii < 0xF0 {
1607                             let point = ((u16::from(non_ascii) & 0xF) << 12)
1608                                 | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1609                                 | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1610                             self.pos += 3;
1611                             NonAscii::BmpExclAscii(point)
1612                         } else {
1613                             let point = ((u32::from(non_ascii) & 0x7) << 18)
1614                                 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1615                                 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1616                                 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1617                             self.pos += 4;
1618                             NonAscii::Astral(unsafe { ::std::char::from_u32_unchecked(point) })
1619                         }
1620                     } else {
1621                         return CopyAsciiResult::Stop((
1622                             EncoderResult::OutputFull,
1623                             self.pos,
1624                             dest.pos,
1625                         ));
1626                     }
1627                 }
1628             }
1629         };
1630         CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest)))
1631     }
1632     #[inline(always)]
copy_ascii_to_check_space_four<'b>( &mut self, dest: &'b mut ByteDestination<'a>, ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)>1633     pub fn copy_ascii_to_check_space_four<'b>(
1634         &mut self,
1635         dest: &'b mut ByteDestination<'a>,
1636     ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> {
1637         let non_ascii_ret = {
1638             let dst_len = dest.slice.len();
1639             let src_remaining = &self.slice[self.pos..];
1640             let dst_remaining = &mut dest.slice[dest.pos..];
1641             let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1642                 (EncoderResult::OutputFull, dst_remaining.len())
1643             } else {
1644                 (EncoderResult::InputEmpty, src_remaining.len())
1645             };
1646             match unsafe {
1647                 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1648             } {
1649                 None => {
1650                     self.pos += length;
1651                     dest.pos += length;
1652                     return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1653                 }
1654                 Some((non_ascii, consumed)) => {
1655                     self.pos += consumed;
1656                     dest.pos += consumed;
1657                     if dest.pos + 3 < dst_len {
1658                         if non_ascii < 0xE0 {
1659                             let point = ((u16::from(non_ascii) & 0x1F) << 6)
1660                                 | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1661                             self.pos += 2;
1662                             NonAscii::BmpExclAscii(point)
1663                         } else if non_ascii < 0xF0 {
1664                             let point = ((u16::from(non_ascii) & 0xF) << 12)
1665                                 | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1666                                 | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1667                             self.pos += 3;
1668                             NonAscii::BmpExclAscii(point)
1669                         } else {
1670                             let point = ((u32::from(non_ascii) & 0x7) << 18)
1671                                 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1672                                 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1673                                 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1674                             self.pos += 4;
1675                             NonAscii::Astral(unsafe { ::std::char::from_u32_unchecked(point) })
1676                         }
1677                     } else {
1678                         return CopyAsciiResult::Stop((
1679                             EncoderResult::OutputFull,
1680                             self.pos,
1681                             dest.pos,
1682                         ));
1683                     }
1684                 }
1685             }
1686         };
1687         CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest)))
1688     }
1689 }
1690 
1691 pub struct Utf8ReadHandle<'a, 'b>
1692 where
1693     'b: 'a,
1694 {
1695     source: &'a mut Utf8Source<'b>,
1696 }
1697 
1698 impl<'a, 'b> Utf8ReadHandle<'a, 'b>
1699 where
1700     'b: 'a,
1701 {
1702     #[inline(always)]
new(src: &'a mut Utf8Source<'b>) -> Utf8ReadHandle<'a, 'b>1703     fn new(src: &'a mut Utf8Source<'b>) -> Utf8ReadHandle<'a, 'b> {
1704         Utf8ReadHandle { source: src }
1705     }
1706     #[inline(always)]
read(self) -> (char, Utf8UnreadHandle<'a, 'b>)1707     pub fn read(self) -> (char, Utf8UnreadHandle<'a, 'b>) {
1708         let character = self.source.read();
1709         let handle = Utf8UnreadHandle::new(self.source);
1710         (character, handle)
1711     }
1712     #[inline(always)]
read_enum(self) -> (Unicode, Utf8UnreadHandle<'a, 'b>)1713     pub fn read_enum(self) -> (Unicode, Utf8UnreadHandle<'a, 'b>) {
1714         let character = self.source.read_enum();
1715         let handle = Utf8UnreadHandle::new(self.source);
1716         (character, handle)
1717     }
1718     #[inline(always)]
consumed(&self) -> usize1719     pub fn consumed(&self) -> usize {
1720         self.source.consumed()
1721     }
1722 }
1723 
1724 pub struct Utf8UnreadHandle<'a, 'b>
1725 where
1726     'b: 'a,
1727 {
1728     source: &'a mut Utf8Source<'b>,
1729 }
1730 
1731 impl<'a, 'b> Utf8UnreadHandle<'a, 'b>
1732 where
1733     'b: 'a,
1734 {
1735     #[inline(always)]
new(src: &'a mut Utf8Source<'b>) -> Utf8UnreadHandle<'a, 'b>1736     fn new(src: &'a mut Utf8Source<'b>) -> Utf8UnreadHandle<'a, 'b> {
1737         Utf8UnreadHandle { source: src }
1738     }
1739     #[inline(always)]
unread(self) -> usize1740     pub fn unread(self) -> usize {
1741         self.source.unread()
1742     }
1743     #[inline(always)]
consumed(&self) -> usize1744     pub fn consumed(&self) -> usize {
1745         self.source.consumed()
1746     }
1747     #[inline(always)]
commit(self) -> &'a mut Utf8Source<'b>1748     pub fn commit(self) -> &'a mut Utf8Source<'b> {
1749         self.source
1750     }
1751 }
1752 
1753 // Byte destination
1754 
1755 pub struct ByteOneHandle<'a, 'b>
1756 where
1757     'b: 'a,
1758 {
1759     dest: &'a mut ByteDestination<'b>,
1760 }
1761 
1762 impl<'a, 'b> ByteOneHandle<'a, 'b>
1763 where
1764     'b: 'a,
1765 {
1766     #[inline(always)]
new(dst: &'a mut ByteDestination<'b>) -> ByteOneHandle<'a, 'b>1767     fn new(dst: &'a mut ByteDestination<'b>) -> ByteOneHandle<'a, 'b> {
1768         ByteOneHandle { dest: dst }
1769     }
1770     #[inline(always)]
written(&self) -> usize1771     pub fn written(&self) -> usize {
1772         self.dest.written()
1773     }
1774     #[inline(always)]
write_one(self, first: u8) -> &'a mut ByteDestination<'b>1775     pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1776         self.dest.write_one(first);
1777         self.dest
1778     }
1779 }
1780 
1781 pub struct ByteTwoHandle<'a, 'b>
1782 where
1783     'b: 'a,
1784 {
1785     dest: &'a mut ByteDestination<'b>,
1786 }
1787 
1788 impl<'a, 'b> ByteTwoHandle<'a, 'b>
1789 where
1790     'b: 'a,
1791 {
1792     #[inline(always)]
new(dst: &'a mut ByteDestination<'b>) -> ByteTwoHandle<'a, 'b>1793     fn new(dst: &'a mut ByteDestination<'b>) -> ByteTwoHandle<'a, 'b> {
1794         ByteTwoHandle { dest: dst }
1795     }
1796     #[inline(always)]
written(&self) -> usize1797     pub fn written(&self) -> usize {
1798         self.dest.written()
1799     }
1800     #[inline(always)]
write_one(self, first: u8) -> &'a mut ByteDestination<'b>1801     pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1802         self.dest.write_one(first);
1803         self.dest
1804     }
1805     #[inline(always)]
write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b>1806     pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1807         self.dest.write_two(first, second);
1808         self.dest
1809     }
1810 }
1811 
1812 pub struct ByteThreeHandle<'a, 'b>
1813 where
1814     'b: 'a,
1815 {
1816     dest: &'a mut ByteDestination<'b>,
1817 }
1818 
1819 impl<'a, 'b> ByteThreeHandle<'a, 'b>
1820 where
1821     'b: 'a,
1822 {
1823     #[inline(always)]
new(dst: &'a mut ByteDestination<'b>) -> ByteThreeHandle<'a, 'b>1824     fn new(dst: &'a mut ByteDestination<'b>) -> ByteThreeHandle<'a, 'b> {
1825         ByteThreeHandle { dest: dst }
1826     }
1827     #[inline(always)]
written(&self) -> usize1828     pub fn written(&self) -> usize {
1829         self.dest.written()
1830     }
1831     #[inline(always)]
write_one(self, first: u8) -> &'a mut ByteDestination<'b>1832     pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1833         self.dest.write_one(first);
1834         self.dest
1835     }
1836     #[inline(always)]
write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b>1837     pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1838         self.dest.write_two(first, second);
1839         self.dest
1840     }
1841     #[inline(always)]
write_three(self, first: u8, second: u8, third: u8) -> &'a mut ByteDestination<'b>1842     pub fn write_three(self, first: u8, second: u8, third: u8) -> &'a mut ByteDestination<'b> {
1843         self.dest.write_three(first, second, third);
1844         self.dest
1845     }
1846     #[inline(always)]
write_three_return_written(self, first: u8, second: u8, third: u8) -> usize1847     pub fn write_three_return_written(self, first: u8, second: u8, third: u8) -> usize {
1848         self.dest.write_three(first, second, third);
1849         self.dest.written()
1850     }
1851 }
1852 
1853 pub struct ByteFourHandle<'a, 'b>
1854 where
1855     'b: 'a,
1856 {
1857     dest: &'a mut ByteDestination<'b>,
1858 }
1859 
1860 impl<'a, 'b> ByteFourHandle<'a, 'b>
1861 where
1862     'b: 'a,
1863 {
1864     #[inline(always)]
new(dst: &'a mut ByteDestination<'b>) -> ByteFourHandle<'a, 'b>1865     fn new(dst: &'a mut ByteDestination<'b>) -> ByteFourHandle<'a, 'b> {
1866         ByteFourHandle { dest: dst }
1867     }
1868     #[inline(always)]
written(&self) -> usize1869     pub fn written(&self) -> usize {
1870         self.dest.written()
1871     }
1872     #[inline(always)]
write_one(self, first: u8) -> &'a mut ByteDestination<'b>1873     pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1874         self.dest.write_one(first);
1875         self.dest
1876     }
1877     #[inline(always)]
write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b>1878     pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1879         self.dest.write_two(first, second);
1880         self.dest
1881     }
1882     #[inline(always)]
write_four( self, first: u8, second: u8, third: u8, fourth: u8, ) -> &'a mut ByteDestination<'b>1883     pub fn write_four(
1884         self,
1885         first: u8,
1886         second: u8,
1887         third: u8,
1888         fourth: u8,
1889     ) -> &'a mut ByteDestination<'b> {
1890         self.dest.write_four(first, second, third, fourth);
1891         self.dest
1892     }
1893 }
1894 
1895 pub struct ByteDestination<'a> {
1896     slice: &'a mut [u8],
1897     pos: usize,
1898 }
1899 
1900 impl<'a> ByteDestination<'a> {
1901     #[inline(always)]
new(dst: &mut [u8]) -> ByteDestination1902     pub fn new(dst: &mut [u8]) -> ByteDestination {
1903         ByteDestination { slice: dst, pos: 0 }
1904     }
1905     #[inline(always)]
check_space_one<'b>(&'b mut self) -> Space<ByteOneHandle<'b, 'a>>1906     pub fn check_space_one<'b>(&'b mut self) -> Space<ByteOneHandle<'b, 'a>> {
1907         if self.pos < self.slice.len() {
1908             Space::Available(ByteOneHandle::new(self))
1909         } else {
1910             Space::Full(self.written())
1911         }
1912     }
1913     #[inline(always)]
check_space_two<'b>(&'b mut self) -> Space<ByteTwoHandle<'b, 'a>>1914     pub fn check_space_two<'b>(&'b mut self) -> Space<ByteTwoHandle<'b, 'a>> {
1915         if self.pos + 1 < self.slice.len() {
1916             Space::Available(ByteTwoHandle::new(self))
1917         } else {
1918             Space::Full(self.written())
1919         }
1920     }
1921     #[inline(always)]
check_space_three<'b>(&'b mut self) -> Space<ByteThreeHandle<'b, 'a>>1922     pub fn check_space_three<'b>(&'b mut self) -> Space<ByteThreeHandle<'b, 'a>> {
1923         if self.pos + 2 < self.slice.len() {
1924             Space::Available(ByteThreeHandle::new(self))
1925         } else {
1926             Space::Full(self.written())
1927         }
1928     }
1929     #[inline(always)]
check_space_four<'b>(&'b mut self) -> Space<ByteFourHandle<'b, 'a>>1930     pub fn check_space_four<'b>(&'b mut self) -> Space<ByteFourHandle<'b, 'a>> {
1931         if self.pos + 3 < self.slice.len() {
1932             Space::Available(ByteFourHandle::new(self))
1933         } else {
1934             Space::Full(self.written())
1935         }
1936     }
1937     #[inline(always)]
written(&self) -> usize1938     pub fn written(&self) -> usize {
1939         self.pos
1940     }
1941     #[inline(always)]
write_one(&mut self, first: u8)1942     fn write_one(&mut self, first: u8) {
1943         self.slice[self.pos] = first;
1944         self.pos += 1;
1945     }
1946     #[inline(always)]
write_two(&mut self, first: u8, second: u8)1947     fn write_two(&mut self, first: u8, second: u8) {
1948         self.slice[self.pos] = first;
1949         self.slice[self.pos + 1] = second;
1950         self.pos += 2;
1951     }
1952     #[inline(always)]
write_three(&mut self, first: u8, second: u8, third: u8)1953     fn write_three(&mut self, first: u8, second: u8, third: u8) {
1954         self.slice[self.pos] = first;
1955         self.slice[self.pos + 1] = second;
1956         self.slice[self.pos + 2] = third;
1957         self.pos += 3;
1958     }
1959     #[inline(always)]
write_four(&mut self, first: u8, second: u8, third: u8, fourth: u8)1960     fn write_four(&mut self, first: u8, second: u8, third: u8, fourth: u8) {
1961         self.slice[self.pos] = first;
1962         self.slice[self.pos + 1] = second;
1963         self.slice[self.pos + 2] = third;
1964         self.slice[self.pos + 3] = fourth;
1965         self.pos += 4;
1966     }
1967 }
1968