1 // Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
2 // file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9
10 //! This module provides structs that use lifetimes to couple bounds checking
11 //! and space availability checking and detaching those from actual slice
12 //! reading/writing.
13 //!
14 //! At present, the internals of the implementation are safe code, so the
15 //! bound checks currently also happen on read/write. Once this code works,
16 //! the plan is to replace the internals with unsafe code that omits the
17 //! bound check at the read/write time.
18
19 #[cfg(all(
20 feature = "simd-accel",
21 any(
22 target_feature = "sse2",
23 all(target_endian = "little", target_arch = "aarch64"),
24 all(target_endian = "little", target_feature = "neon")
25 )
26 ))]
27 use simd_funcs::*;
28
29 #[cfg(all(
30 feature = "simd-accel",
31 any(
32 target_feature = "sse2",
33 all(target_endian = "little", target_arch = "aarch64"),
34 all(target_endian = "little", target_feature = "neon")
35 )
36 ))]
37 use packed_simd::u16x8;
38
39 use super::DecoderResult;
40 use super::EncoderResult;
41 use ascii::*;
42 use utf_8::convert_utf8_to_utf16_up_to_invalid;
43 use utf_8::utf8_valid_up_to;
44
45 pub enum Space<T> {
46 Available(T),
47 Full(usize),
48 }
49
50 pub enum CopyAsciiResult<T, U> {
51 Stop(T),
52 GoOn(U),
53 }
54
55 pub enum NonAscii {
56 BmpExclAscii(u16),
57 Astral(char),
58 }
59
60 pub enum Unicode {
61 Ascii(u8),
62 NonAscii(NonAscii),
63 }
64
65 // Start UTF-16LE/BE fast path
66
67 pub trait Endian {
68 const OPPOSITE_ENDIAN: bool;
69 }
70
71 pub struct BigEndian;
72
73 impl Endian for BigEndian {
74 #[cfg(target_endian = "little")]
75 const OPPOSITE_ENDIAN: bool = true;
76
77 #[cfg(target_endian = "big")]
78 const OPPOSITE_ENDIAN: bool = false;
79 }
80
81 pub struct LittleEndian;
82
83 impl Endian for LittleEndian {
84 #[cfg(target_endian = "little")]
85 const OPPOSITE_ENDIAN: bool = false;
86
87 #[cfg(target_endian = "big")]
88 const OPPOSITE_ENDIAN: bool = true;
89 }
90
91 #[derive(Debug, Copy, Clone)]
92 struct UnalignedU16Slice {
93 ptr: *const u8,
94 len: usize,
95 }
96
97 impl UnalignedU16Slice {
98 #[inline(always)]
new(ptr: *const u8, len: usize) -> UnalignedU16Slice99 pub unsafe fn new(ptr: *const u8, len: usize) -> UnalignedU16Slice {
100 UnalignedU16Slice { ptr, len }
101 }
102
103 #[inline(always)]
trim_last(&mut self)104 pub fn trim_last(&mut self) {
105 assert!(self.len > 0);
106 self.len -= 1;
107 }
108
109 #[inline(always)]
at(&self, i: usize) -> u16110 pub fn at(&self, i: usize) -> u16 {
111 assert!(i < self.len);
112 unsafe {
113 let mut u: u16 = ::std::mem::uninitialized();
114 ::std::ptr::copy_nonoverlapping(self.ptr.add(i * 2), &mut u as *mut u16 as *mut u8, 2);
115 u
116 }
117 }
118
119 #[cfg(feature = "simd-accel")]
120 #[inline(always)]
simd_at(&self, i: usize) -> u16x8121 pub fn simd_at(&self, i: usize) -> u16x8 {
122 assert!(i + SIMD_STRIDE_SIZE / 2 <= self.len);
123 let byte_index = i * 2;
124 unsafe { to_u16_lanes(load16_unaligned(self.ptr.add(byte_index))) }
125 }
126
127 #[inline(always)]
len(&self) -> usize128 pub fn len(&self) -> usize {
129 self.len
130 }
131
132 #[inline(always)]
tail(&self, from: usize) -> UnalignedU16Slice133 pub fn tail(&self, from: usize) -> UnalignedU16Slice {
134 // XXX the return value should be restricted not to
135 // outlive self.
136 assert!(from <= self.len);
137 unsafe { UnalignedU16Slice::new(self.ptr.add(from * 2), self.len - from) }
138 }
139
140 #[cfg(feature = "simd-accel")]
141 #[inline(always)]
copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)>142 pub fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
143 assert!(self.len <= other.len());
144 let mut offset = 0;
145 if SIMD_STRIDE_SIZE / 2 <= self.len {
146 let len_minus_stride = self.len - SIMD_STRIDE_SIZE / 2;
147 loop {
148 let mut simd = self.simd_at(offset);
149 if E::OPPOSITE_ENDIAN {
150 simd = simd_byte_swap(simd);
151 }
152 unsafe {
153 store8_unaligned(other.as_mut_ptr().add(offset), simd);
154 }
155 if contains_surrogates(simd) {
156 break;
157 }
158 offset += SIMD_STRIDE_SIZE / 2;
159 if offset > len_minus_stride {
160 break;
161 }
162 }
163 }
164 while offset < self.len {
165 let unit = swap_if_opposite_endian::<E>(self.at(offset));
166 other[offset] = unit;
167 if super::in_range16(unit, 0xD800, 0xE000) {
168 return Some((unit, offset));
169 }
170 offset += 1;
171 }
172 None
173 }
174
175 #[cfg(not(feature = "simd-accel"))]
176 #[inline(always)]
copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)>177 fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
178 assert!(self.len <= other.len());
179 for (i, target) in other.iter_mut().enumerate().take(self.len) {
180 let unit = swap_if_opposite_endian::<E>(self.at(i));
181 *target = unit;
182 if super::in_range16(unit, 0xD800, 0xE000) {
183 return Some((unit, i));
184 }
185 }
186 None
187 }
188 }
189
190 #[inline(always)]
copy_unaligned_basic_latin_to_ascii_alu<E: Endian>( src: UnalignedU16Slice, dst: &mut [u8], offset: usize, ) -> CopyAsciiResult<usize, (u16, usize)>191 fn copy_unaligned_basic_latin_to_ascii_alu<E: Endian>(
192 src: UnalignedU16Slice,
193 dst: &mut [u8],
194 offset: usize,
195 ) -> CopyAsciiResult<usize, (u16, usize)> {
196 let len = ::std::cmp::min(src.len(), dst.len());
197 let mut i = 0usize;
198 loop {
199 if i == len {
200 return CopyAsciiResult::Stop(i + offset);
201 }
202 let unit = swap_if_opposite_endian::<E>(src.at(i));
203 if unit > 0x7F {
204 return CopyAsciiResult::GoOn((unit, i + offset));
205 }
206 dst[i] = unit as u8;
207 i += 1;
208 }
209 }
210
211 #[inline(always)]
swap_if_opposite_endian<E: Endian>(unit: u16) -> u16212 fn swap_if_opposite_endian<E: Endian>(unit: u16) -> u16 {
213 if E::OPPOSITE_ENDIAN {
214 unit.swap_bytes()
215 } else {
216 unit
217 }
218 }
219
220 #[cfg(not(feature = "simd-accel"))]
221 #[inline(always)]
copy_unaligned_basic_latin_to_ascii<E: Endian>( src: UnalignedU16Slice, dst: &mut [u8], ) -> CopyAsciiResult<usize, (u16, usize)>222 fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
223 src: UnalignedU16Slice,
224 dst: &mut [u8],
225 ) -> CopyAsciiResult<usize, (u16, usize)> {
226 copy_unaligned_basic_latin_to_ascii_alu::<E>(src, dst, 0)
227 }
228
229 #[cfg(feature = "simd-accel")]
230 #[inline(always)]
copy_unaligned_basic_latin_to_ascii<E: Endian>( src: UnalignedU16Slice, dst: &mut [u8], ) -> CopyAsciiResult<usize, (u16, usize)>231 fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
232 src: UnalignedU16Slice,
233 dst: &mut [u8],
234 ) -> CopyAsciiResult<usize, (u16, usize)> {
235 let len = ::std::cmp::min(src.len(), dst.len());
236 let mut offset = 0;
237 if SIMD_STRIDE_SIZE <= len {
238 let len_minus_stride = len - SIMD_STRIDE_SIZE;
239 loop {
240 let mut first = src.simd_at(offset);
241 let mut second = src.simd_at(offset + (SIMD_STRIDE_SIZE / 2));
242 if E::OPPOSITE_ENDIAN {
243 first = simd_byte_swap(first);
244 second = simd_byte_swap(second);
245 }
246 if !simd_is_basic_latin(first | second) {
247 break;
248 }
249 let packed = simd_pack(first, second);
250 unsafe {
251 store16_unaligned(dst.as_mut_ptr().add(offset), packed);
252 }
253 offset += SIMD_STRIDE_SIZE;
254 if offset > len_minus_stride {
255 break;
256 }
257 }
258 }
259 copy_unaligned_basic_latin_to_ascii_alu::<E>(src.tail(offset), &mut dst[offset..], offset)
260 }
261
262 #[inline(always)]
convert_unaligned_utf16_to_utf8<E: Endian>( src: UnalignedU16Slice, dst: &mut [u8], ) -> (usize, usize, bool)263 fn convert_unaligned_utf16_to_utf8<E: Endian>(
264 src: UnalignedU16Slice,
265 dst: &mut [u8],
266 ) -> (usize, usize, bool) {
267 if dst.len() < 4 {
268 return (0, 0, false);
269 }
270 let mut src_pos = 0usize;
271 let mut dst_pos = 0usize;
272 let src_len = src.len();
273 let dst_len_minus_three = dst.len() - 3;
274 'outer: loop {
275 let mut non_ascii = match copy_unaligned_basic_latin_to_ascii::<E>(
276 src.tail(src_pos),
277 &mut dst[dst_pos..],
278 ) {
279 CopyAsciiResult::GoOn((unit, read_written)) => {
280 src_pos += read_written;
281 dst_pos += read_written;
282 unit
283 }
284 CopyAsciiResult::Stop(read_written) => {
285 return (src_pos + read_written, dst_pos + read_written, false);
286 }
287 };
288 if dst_pos >= dst_len_minus_three {
289 break 'outer;
290 }
291 // We have enough destination space to commit to
292 // having read `non_ascii`.
293 src_pos += 1;
294 'inner: loop {
295 let non_ascii_minus_surrogate_start = non_ascii.wrapping_sub(0xD800);
296 if non_ascii_minus_surrogate_start > (0xDFFF - 0xD800) {
297 if non_ascii < 0x800 {
298 dst[dst_pos] = ((non_ascii >> 6) | 0xC0) as u8;
299 dst_pos += 1;
300 dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8;
301 dst_pos += 1;
302 } else {
303 dst[dst_pos] = ((non_ascii >> 12) | 0xE0) as u8;
304 dst_pos += 1;
305 dst[dst_pos] = (((non_ascii & 0xFC0) >> 6) | 0x80) as u8;
306 dst_pos += 1;
307 dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8;
308 dst_pos += 1;
309 }
310 } else if non_ascii_minus_surrogate_start <= (0xDBFF - 0xD800) {
311 // high surrogate
312 if src_pos < src_len {
313 let second = swap_if_opposite_endian::<E>(src.at(src_pos));
314 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
315 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
316 // The next code unit is a low surrogate. Advance position.
317 src_pos += 1;
318 let point = (u32::from(non_ascii) << 10) + u32::from(second)
319 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32);
320
321 dst[dst_pos] = ((point >> 18) | 0xF0u32) as u8;
322 dst_pos += 1;
323 dst[dst_pos] = (((point & 0x3F000u32) >> 12) | 0x80u32) as u8;
324 dst_pos += 1;
325 dst[dst_pos] = (((point & 0xFC0u32) >> 6) | 0x80u32) as u8;
326 dst_pos += 1;
327 dst[dst_pos] = ((point & 0x3Fu32) | 0x80u32) as u8;
328 dst_pos += 1;
329 } else {
330 // The next code unit is not a low surrogate. Don't advance
331 // position and treat the high surrogate as unpaired.
332 return (src_pos, dst_pos, true);
333 }
334 } else {
335 // Unpaired surrogate at the end of buffer
336 return (src_pos, dst_pos, true);
337 }
338 } else {
339 // Unpaired low surrogate
340 return (src_pos, dst_pos, true);
341 }
342 if dst_pos >= dst_len_minus_three || src_pos == src_len {
343 break 'outer;
344 }
345 let unit = swap_if_opposite_endian::<E>(src.at(src_pos));
346 src_pos += 1;
347 if unit > 0x7F {
348 non_ascii = unit;
349 continue 'inner;
350 }
351 dst[dst_pos] = unit as u8;
352 dst_pos += 1;
353 continue 'outer;
354 }
355 }
356 (src_pos, dst_pos, false)
357 }
358
359 // Byte source
360
361 pub struct ByteSource<'a> {
362 slice: &'a [u8],
363 pos: usize,
364 }
365
366 impl<'a> ByteSource<'a> {
367 #[inline(always)]
new(src: &[u8]) -> ByteSource368 pub fn new(src: &[u8]) -> ByteSource {
369 ByteSource { slice: src, pos: 0 }
370 }
371 #[inline(always)]
check_available<'b>(&'b mut self) -> Space<ByteReadHandle<'b, 'a>>372 pub fn check_available<'b>(&'b mut self) -> Space<ByteReadHandle<'b, 'a>> {
373 if self.pos < self.slice.len() {
374 Space::Available(ByteReadHandle::new(self))
375 } else {
376 Space::Full(self.consumed())
377 }
378 }
379 #[inline(always)]
read(&mut self) -> u8380 fn read(&mut self) -> u8 {
381 let ret = self.slice[self.pos];
382 self.pos += 1;
383 ret
384 }
385 #[inline(always)]
unread(&mut self) -> usize386 fn unread(&mut self) -> usize {
387 self.pos -= 1;
388 self.pos
389 }
390 #[inline(always)]
consumed(&self) -> usize391 pub fn consumed(&self) -> usize {
392 self.pos
393 }
394 }
395
396 pub struct ByteReadHandle<'a, 'b>
397 where
398 'b: 'a,
399 {
400 source: &'a mut ByteSource<'b>,
401 }
402
403 impl<'a, 'b> ByteReadHandle<'a, 'b>
404 where
405 'b: 'a,
406 {
407 #[inline(always)]
new(src: &'a mut ByteSource<'b>) -> ByteReadHandle<'a, 'b>408 fn new(src: &'a mut ByteSource<'b>) -> ByteReadHandle<'a, 'b> {
409 ByteReadHandle { source: src }
410 }
411 #[inline(always)]
read(self) -> (u8, ByteUnreadHandle<'a, 'b>)412 pub fn read(self) -> (u8, ByteUnreadHandle<'a, 'b>) {
413 let byte = self.source.read();
414 let handle = ByteUnreadHandle::new(self.source);
415 (byte, handle)
416 }
417 #[inline(always)]
consumed(&self) -> usize418 pub fn consumed(&self) -> usize {
419 self.source.consumed()
420 }
421 }
422
423 pub struct ByteUnreadHandle<'a, 'b>
424 where
425 'b: 'a,
426 {
427 source: &'a mut ByteSource<'b>,
428 }
429
430 impl<'a, 'b> ByteUnreadHandle<'a, 'b>
431 where
432 'b: 'a,
433 {
434 #[inline(always)]
new(src: &'a mut ByteSource<'b>) -> ByteUnreadHandle<'a, 'b>435 fn new(src: &'a mut ByteSource<'b>) -> ByteUnreadHandle<'a, 'b> {
436 ByteUnreadHandle { source: src }
437 }
438 #[inline(always)]
unread(self) -> usize439 pub fn unread(self) -> usize {
440 self.source.unread()
441 }
442 #[inline(always)]
consumed(&self) -> usize443 pub fn consumed(&self) -> usize {
444 self.source.consumed()
445 }
446 #[inline(always)]
commit(self) -> &'a mut ByteSource<'b>447 pub fn commit(self) -> &'a mut ByteSource<'b> {
448 self.source
449 }
450 }
451
452 // UTF-16 destination
453
454 pub struct Utf16BmpHandle<'a, 'b>
455 where
456 'b: 'a,
457 {
458 dest: &'a mut Utf16Destination<'b>,
459 }
460
461 impl<'a, 'b> Utf16BmpHandle<'a, 'b>
462 where
463 'b: 'a,
464 {
465 #[inline(always)]
new(dst: &'a mut Utf16Destination<'b>) -> Utf16BmpHandle<'a, 'b>466 fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16BmpHandle<'a, 'b> {
467 Utf16BmpHandle { dest: dst }
468 }
469 #[inline(always)]
written(&self) -> usize470 pub fn written(&self) -> usize {
471 self.dest.written()
472 }
473 #[inline(always)]
write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b>474 pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> {
475 self.dest.write_ascii(ascii);
476 self.dest
477 }
478 #[inline(always)]
write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b>479 pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
480 self.dest.write_bmp(bmp);
481 self.dest
482 }
483 #[inline(always)]
write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b>484 pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
485 self.dest.write_bmp_excl_ascii(bmp);
486 self.dest
487 }
488 #[inline(always)]
write_mid_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b>489 pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
490 self.dest.write_mid_bmp(bmp);
491 self.dest
492 }
493 #[inline(always)]
write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b>494 pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
495 self.dest.write_upper_bmp(bmp);
496 self.dest
497 }
498 #[inline(always)]
commit(self) -> &'a mut Utf16Destination<'b>499 pub fn commit(self) -> &'a mut Utf16Destination<'b> {
500 self.dest
501 }
502 }
503
504 pub struct Utf16AstralHandle<'a, 'b>
505 where
506 'b: 'a,
507 {
508 dest: &'a mut Utf16Destination<'b>,
509 }
510
511 impl<'a, 'b> Utf16AstralHandle<'a, 'b>
512 where
513 'b: 'a,
514 {
515 #[inline(always)]
new(dst: &'a mut Utf16Destination<'b>) -> Utf16AstralHandle<'a, 'b>516 fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16AstralHandle<'a, 'b> {
517 Utf16AstralHandle { dest: dst }
518 }
519 #[inline(always)]
written(&self) -> usize520 pub fn written(&self) -> usize {
521 self.dest.written()
522 }
523 #[inline(always)]
write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b>524 pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> {
525 self.dest.write_ascii(ascii);
526 self.dest
527 }
528 #[inline(always)]
write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b>529 pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
530 self.dest.write_bmp(bmp);
531 self.dest
532 }
533 #[inline(always)]
write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b>534 pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
535 self.dest.write_bmp_excl_ascii(bmp);
536 self.dest
537 }
538 #[inline(always)]
write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b>539 pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
540 self.dest.write_upper_bmp(bmp);
541 self.dest
542 }
543 #[inline(always)]
write_astral(self, astral: u32) -> &'a mut Utf16Destination<'b>544 pub fn write_astral(self, astral: u32) -> &'a mut Utf16Destination<'b> {
545 self.dest.write_astral(astral);
546 self.dest
547 }
548 #[inline(always)]
write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf16Destination<'b>549 pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf16Destination<'b> {
550 self.dest.write_surrogate_pair(high, low);
551 self.dest
552 }
553 #[inline(always)]
write_big5_combination( self, combined: u16, combining: u16, ) -> &'a mut Utf16Destination<'b>554 pub fn write_big5_combination(
555 self,
556 combined: u16,
557 combining: u16,
558 ) -> &'a mut Utf16Destination<'b> {
559 self.dest.write_big5_combination(combined, combining);
560 self.dest
561 }
562 #[inline(always)]
commit(self) -> &'a mut Utf16Destination<'b>563 pub fn commit(self) -> &'a mut Utf16Destination<'b> {
564 self.dest
565 }
566 }
567
568 pub struct Utf16Destination<'a> {
569 slice: &'a mut [u16],
570 pos: usize,
571 }
572
573 impl<'a> Utf16Destination<'a> {
574 #[inline(always)]
new(dst: &mut [u16]) -> Utf16Destination575 pub fn new(dst: &mut [u16]) -> Utf16Destination {
576 Utf16Destination { slice: dst, pos: 0 }
577 }
578 #[inline(always)]
check_space_bmp<'b>(&'b mut self) -> Space<Utf16BmpHandle<'b, 'a>>579 pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf16BmpHandle<'b, 'a>> {
580 if self.pos < self.slice.len() {
581 Space::Available(Utf16BmpHandle::new(self))
582 } else {
583 Space::Full(self.written())
584 }
585 }
586 #[inline(always)]
check_space_astral<'b>(&'b mut self) -> Space<Utf16AstralHandle<'b, 'a>>587 pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf16AstralHandle<'b, 'a>> {
588 if self.pos + 1 < self.slice.len() {
589 Space::Available(Utf16AstralHandle::new(self))
590 } else {
591 Space::Full(self.written())
592 }
593 }
594 #[inline(always)]
written(&self) -> usize595 pub fn written(&self) -> usize {
596 self.pos
597 }
598 #[inline(always)]
write_code_unit(&mut self, u: u16)599 fn write_code_unit(&mut self, u: u16) {
600 unsafe {
601 // OK, because we checked before handing out a handle.
602 *(self.slice.get_unchecked_mut(self.pos)) = u;
603 }
604 self.pos += 1;
605 }
606 #[inline(always)]
write_ascii(&mut self, ascii: u8)607 fn write_ascii(&mut self, ascii: u8) {
608 debug_assert!(ascii < 0x80);
609 self.write_code_unit(u16::from(ascii));
610 }
611 #[inline(always)]
write_bmp(&mut self, bmp: u16)612 fn write_bmp(&mut self, bmp: u16) {
613 self.write_code_unit(bmp);
614 }
615 #[inline(always)]
write_bmp_excl_ascii(&mut self, bmp: u16)616 fn write_bmp_excl_ascii(&mut self, bmp: u16) {
617 debug_assert!(bmp >= 0x80);
618 self.write_code_unit(bmp);
619 }
620 #[inline(always)]
write_mid_bmp(&mut self, bmp: u16)621 fn write_mid_bmp(&mut self, bmp: u16) {
622 debug_assert!(bmp >= 0x80); // XXX
623 self.write_code_unit(bmp);
624 }
625 #[inline(always)]
write_upper_bmp(&mut self, bmp: u16)626 fn write_upper_bmp(&mut self, bmp: u16) {
627 debug_assert!(bmp >= 0x80);
628 self.write_code_unit(bmp);
629 }
630 #[inline(always)]
write_astral(&mut self, astral: u32)631 fn write_astral(&mut self, astral: u32) {
632 debug_assert!(astral > 0xFFFF);
633 debug_assert!(astral <= 0x10_FFFF);
634 self.write_code_unit((0xD7C0 + (astral >> 10)) as u16);
635 self.write_code_unit((0xDC00 + (astral & 0x3FF)) as u16);
636 }
637 #[inline(always)]
write_surrogate_pair(&mut self, high: u16, low: u16)638 pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
639 self.write_code_unit(high);
640 self.write_code_unit(low);
641 }
642 #[inline(always)]
write_big5_combination(&mut self, combined: u16, combining: u16)643 fn write_big5_combination(&mut self, combined: u16, combining: u16) {
644 self.write_bmp_excl_ascii(combined);
645 self.write_bmp_excl_ascii(combining);
646 }
647 #[inline(always)]
copy_ascii_from_check_space_bmp<'b>( &'b mut self, source: &mut ByteSource, ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16BmpHandle<'b, 'a>)>648 pub fn copy_ascii_from_check_space_bmp<'b>(
649 &'b mut self,
650 source: &mut ByteSource,
651 ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16BmpHandle<'b, 'a>)> {
652 let non_ascii_ret = {
653 let src_remaining = &source.slice[source.pos..];
654 let dst_remaining = &mut self.slice[self.pos..];
655 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
656 (DecoderResult::OutputFull, dst_remaining.len())
657 } else {
658 (DecoderResult::InputEmpty, src_remaining.len())
659 };
660 match unsafe {
661 ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
662 } {
663 None => {
664 source.pos += length;
665 self.pos += length;
666 return CopyAsciiResult::Stop((pending, source.pos, self.pos));
667 }
668 Some((non_ascii, consumed)) => {
669 source.pos += consumed;
670 self.pos += consumed;
671 source.pos += 1; // +1 for non_ascii
672 non_ascii
673 }
674 }
675 };
676 CopyAsciiResult::GoOn((non_ascii_ret, Utf16BmpHandle::new(self)))
677 }
678 #[inline(always)]
copy_ascii_from_check_space_astral<'b>( &'b mut self, source: &mut ByteSource, ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16AstralHandle<'b, 'a>)>679 pub fn copy_ascii_from_check_space_astral<'b>(
680 &'b mut self,
681 source: &mut ByteSource,
682 ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16AstralHandle<'b, 'a>)> {
683 let non_ascii_ret = {
684 let dst_len = self.slice.len();
685 let src_remaining = &source.slice[source.pos..];
686 let dst_remaining = &mut self.slice[self.pos..];
687 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
688 (DecoderResult::OutputFull, dst_remaining.len())
689 } else {
690 (DecoderResult::InputEmpty, src_remaining.len())
691 };
692 match unsafe {
693 ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
694 } {
695 None => {
696 source.pos += length;
697 self.pos += length;
698 return CopyAsciiResult::Stop((pending, source.pos, self.pos));
699 }
700 Some((non_ascii, consumed)) => {
701 source.pos += consumed;
702 self.pos += consumed;
703 if self.pos + 1 < dst_len {
704 source.pos += 1; // +1 for non_ascii
705 non_ascii
706 } else {
707 return CopyAsciiResult::Stop((
708 DecoderResult::OutputFull,
709 source.pos,
710 self.pos,
711 ));
712 }
713 }
714 }
715 };
716 CopyAsciiResult::GoOn((non_ascii_ret, Utf16AstralHandle::new(self)))
717 }
718 #[inline(always)]
copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource)719 pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) {
720 let src_remaining = &source.slice[source.pos..];
721 let dst_remaining = &mut self.slice[self.pos..];
722 let (read, written) = convert_utf8_to_utf16_up_to_invalid(src_remaining, dst_remaining);
723 source.pos += read;
724 self.pos += written;
725 }
726 #[inline(always)]
copy_utf16_from<E: Endian>( &mut self, source: &mut ByteSource, ) -> Option<(usize, usize)>727 pub fn copy_utf16_from<E: Endian>(
728 &mut self,
729 source: &mut ByteSource,
730 ) -> Option<(usize, usize)> {
731 let src_remaining = &source.slice[source.pos..];
732 let dst_remaining = &mut self.slice[self.pos..];
733
734 let mut src_unaligned = unsafe {
735 UnalignedU16Slice::new(
736 src_remaining.as_ptr(),
737 ::std::cmp::min(src_remaining.len() / 2, dst_remaining.len()),
738 )
739 };
740 if src_unaligned.len() == 0 {
741 return None;
742 }
743 let last_unit = swap_if_opposite_endian::<E>(src_unaligned.at(src_unaligned.len() - 1));
744 if super::in_range16(last_unit, 0xD800, 0xDC00) {
745 // Last code unit is a high surrogate. It might
746 // legitimately form a pair later, so let's not
747 // include it.
748 src_unaligned.trim_last();
749 }
750 let mut offset = 0usize;
751 loop {
752 if let Some((surrogate, bmp_len)) = {
753 let src_left = src_unaligned.tail(offset);
754 let dst_left = &mut dst_remaining[offset..src_unaligned.len()];
755 src_left.copy_bmp_to::<E>(dst_left)
756 } {
757 offset += bmp_len; // surrogate has not been consumed yet
758 let second_pos = offset + 1;
759 if surrogate > 0xDBFF || second_pos == src_unaligned.len() {
760 // Unpaired surrogate
761 source.pos += second_pos * 2;
762 self.pos += offset;
763 return Some((source.pos, self.pos));
764 }
765 let second = swap_if_opposite_endian::<E>(src_unaligned.at(second_pos));
766 if !super::in_range16(second, 0xDC00, 0xE000) {
767 // Unpaired surrogate
768 source.pos += second_pos * 2;
769 self.pos += offset;
770 return Some((source.pos, self.pos));
771 }
772 // `surrogate` was already speculatively written
773 dst_remaining[second_pos] = second;
774 offset += 2;
775 continue;
776 } else {
777 source.pos += src_unaligned.len() * 2;
778 self.pos += src_unaligned.len();
779 return None;
780 }
781 }
782 }
783 }
784
785 // UTF-8 destination
786
787 pub struct Utf8BmpHandle<'a, 'b>
788 where
789 'b: 'a,
790 {
791 dest: &'a mut Utf8Destination<'b>,
792 }
793
794 impl<'a, 'b> Utf8BmpHandle<'a, 'b>
795 where
796 'b: 'a,
797 {
798 #[inline(always)]
new(dst: &'a mut Utf8Destination<'b>) -> Utf8BmpHandle<'a, 'b>799 fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8BmpHandle<'a, 'b> {
800 Utf8BmpHandle { dest: dst }
801 }
802 #[inline(always)]
written(&self) -> usize803 pub fn written(&self) -> usize {
804 self.dest.written()
805 }
806 #[inline(always)]
write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b>807 pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> {
808 self.dest.write_ascii(ascii);
809 self.dest
810 }
811 #[inline(always)]
write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b>812 pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
813 self.dest.write_bmp(bmp);
814 self.dest
815 }
816 #[inline(always)]
write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b>817 pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
818 self.dest.write_bmp_excl_ascii(bmp);
819 self.dest
820 }
821 #[inline(always)]
write_mid_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b>822 pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
823 self.dest.write_mid_bmp(bmp);
824 self.dest
825 }
826 #[inline(always)]
write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b>827 pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
828 self.dest.write_upper_bmp(bmp);
829 self.dest
830 }
831 #[inline(always)]
commit(self) -> &'a mut Utf8Destination<'b>832 pub fn commit(self) -> &'a mut Utf8Destination<'b> {
833 self.dest
834 }
835 }
836
837 pub struct Utf8AstralHandle<'a, 'b>
838 where
839 'b: 'a,
840 {
841 dest: &'a mut Utf8Destination<'b>,
842 }
843
844 impl<'a, 'b> Utf8AstralHandle<'a, 'b>
845 where
846 'b: 'a,
847 {
848 #[inline(always)]
new(dst: &'a mut Utf8Destination<'b>) -> Utf8AstralHandle<'a, 'b>849 fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8AstralHandle<'a, 'b> {
850 Utf8AstralHandle { dest: dst }
851 }
852 #[inline(always)]
written(&self) -> usize853 pub fn written(&self) -> usize {
854 self.dest.written()
855 }
856 #[inline(always)]
write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b>857 pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> {
858 self.dest.write_ascii(ascii);
859 self.dest
860 }
861 #[inline(always)]
write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b>862 pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
863 self.dest.write_bmp(bmp);
864 self.dest
865 }
866 #[inline(always)]
write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b>867 pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
868 self.dest.write_bmp_excl_ascii(bmp);
869 self.dest
870 }
871 #[inline(always)]
write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b>872 pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
873 self.dest.write_upper_bmp(bmp);
874 self.dest
875 }
876 #[inline(always)]
write_astral(self, astral: u32) -> &'a mut Utf8Destination<'b>877 pub fn write_astral(self, astral: u32) -> &'a mut Utf8Destination<'b> {
878 self.dest.write_astral(astral);
879 self.dest
880 }
881 #[inline(always)]
write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf8Destination<'b>882 pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf8Destination<'b> {
883 self.dest.write_surrogate_pair(high, low);
884 self.dest
885 }
886 #[inline(always)]
write_big5_combination( self, combined: u16, combining: u16, ) -> &'a mut Utf8Destination<'b>887 pub fn write_big5_combination(
888 self,
889 combined: u16,
890 combining: u16,
891 ) -> &'a mut Utf8Destination<'b> {
892 self.dest.write_big5_combination(combined, combining);
893 self.dest
894 }
895 #[inline(always)]
commit(self) -> &'a mut Utf8Destination<'b>896 pub fn commit(self) -> &'a mut Utf8Destination<'b> {
897 self.dest
898 }
899 }
900
901 pub struct Utf8Destination<'a> {
902 slice: &'a mut [u8],
903 pos: usize,
904 }
905
906 impl<'a> Utf8Destination<'a> {
907 #[inline(always)]
new(dst: &mut [u8]) -> Utf8Destination908 pub fn new(dst: &mut [u8]) -> Utf8Destination {
909 Utf8Destination { slice: dst, pos: 0 }
910 }
911 #[inline(always)]
check_space_bmp<'b>(&'b mut self) -> Space<Utf8BmpHandle<'b, 'a>>912 pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf8BmpHandle<'b, 'a>> {
913 if self.pos + 2 < self.slice.len() {
914 Space::Available(Utf8BmpHandle::new(self))
915 } else {
916 Space::Full(self.written())
917 }
918 }
919 #[inline(always)]
check_space_astral<'b>(&'b mut self) -> Space<Utf8AstralHandle<'b, 'a>>920 pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf8AstralHandle<'b, 'a>> {
921 if self.pos + 3 < self.slice.len() {
922 Space::Available(Utf8AstralHandle::new(self))
923 } else {
924 Space::Full(self.written())
925 }
926 }
927 #[inline(always)]
written(&self) -> usize928 pub fn written(&self) -> usize {
929 self.pos
930 }
931 #[inline(always)]
write_code_unit(&mut self, u: u8)932 fn write_code_unit(&mut self, u: u8) {
933 unsafe {
934 // OK, because we checked before handing out a handle.
935 *(self.slice.get_unchecked_mut(self.pos)) = u;
936 }
937 self.pos += 1;
938 }
939 #[inline(always)]
write_ascii(&mut self, ascii: u8)940 fn write_ascii(&mut self, ascii: u8) {
941 debug_assert!(ascii < 0x80);
942 self.write_code_unit(ascii);
943 }
944 #[inline(always)]
write_bmp(&mut self, bmp: u16)945 fn write_bmp(&mut self, bmp: u16) {
946 if bmp < 0x80u16 {
947 self.write_ascii(bmp as u8);
948 } else if bmp < 0x800u16 {
949 self.write_mid_bmp(bmp);
950 } else {
951 self.write_upper_bmp(bmp);
952 }
953 }
954 #[inline(always)]
write_mid_bmp(&mut self, mid_bmp: u16)955 fn write_mid_bmp(&mut self, mid_bmp: u16) {
956 debug_assert!(mid_bmp >= 0x80);
957 debug_assert!(mid_bmp < 0x800);
958 self.write_code_unit(((mid_bmp >> 6) | 0xC0) as u8);
959 self.write_code_unit(((mid_bmp & 0x3F) | 0x80) as u8);
960 }
961 #[inline(always)]
write_upper_bmp(&mut self, upper_bmp: u16)962 fn write_upper_bmp(&mut self, upper_bmp: u16) {
963 debug_assert!(upper_bmp >= 0x800);
964 self.write_code_unit(((upper_bmp >> 12) | 0xE0) as u8);
965 self.write_code_unit((((upper_bmp & 0xFC0) >> 6) | 0x80) as u8);
966 self.write_code_unit(((upper_bmp & 0x3F) | 0x80) as u8);
967 }
968 #[inline(always)]
write_bmp_excl_ascii(&mut self, bmp: u16)969 fn write_bmp_excl_ascii(&mut self, bmp: u16) {
970 if bmp < 0x800u16 {
971 self.write_mid_bmp(bmp);
972 } else {
973 self.write_upper_bmp(bmp);
974 }
975 }
976 #[inline(always)]
write_astral(&mut self, astral: u32)977 fn write_astral(&mut self, astral: u32) {
978 debug_assert!(astral > 0xFFFF);
979 debug_assert!(astral <= 0x10_FFFF);
980 self.write_code_unit(((astral >> 18) | 0xF0) as u8);
981 self.write_code_unit((((astral & 0x3F000) >> 12) | 0x80) as u8);
982 self.write_code_unit((((astral & 0xFC0) >> 6) | 0x80) as u8);
983 self.write_code_unit(((astral & 0x3F) | 0x80) as u8);
984 }
985 #[inline(always)]
write_surrogate_pair(&mut self, high: u16, low: u16)986 pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
987 self.write_astral(
988 (u32::from(high) << 10) + u32::from(low)
989 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
990 );
991 }
992 #[inline(always)]
write_big5_combination(&mut self, combined: u16, combining: u16)993 fn write_big5_combination(&mut self, combined: u16, combining: u16) {
994 self.write_mid_bmp(combined);
995 self.write_mid_bmp(combining);
996 }
997 #[inline(always)]
copy_ascii_from_check_space_bmp<'b>( &'b mut self, source: &mut ByteSource, ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8BmpHandle<'b, 'a>)>998 pub fn copy_ascii_from_check_space_bmp<'b>(
999 &'b mut self,
1000 source: &mut ByteSource,
1001 ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8BmpHandle<'b, 'a>)> {
1002 let non_ascii_ret = {
1003 let dst_len = self.slice.len();
1004 let src_remaining = &source.slice[source.pos..];
1005 let dst_remaining = &mut self.slice[self.pos..];
1006 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1007 (DecoderResult::OutputFull, dst_remaining.len())
1008 } else {
1009 (DecoderResult::InputEmpty, src_remaining.len())
1010 };
1011 match unsafe {
1012 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1013 } {
1014 None => {
1015 source.pos += length;
1016 self.pos += length;
1017 return CopyAsciiResult::Stop((pending, source.pos, self.pos));
1018 }
1019 Some((non_ascii, consumed)) => {
1020 source.pos += consumed;
1021 self.pos += consumed;
1022 if self.pos + 2 < dst_len {
1023 source.pos += 1; // +1 for non_ascii
1024 non_ascii
1025 } else {
1026 return CopyAsciiResult::Stop((
1027 DecoderResult::OutputFull,
1028 source.pos,
1029 self.pos,
1030 ));
1031 }
1032 }
1033 }
1034 };
1035 CopyAsciiResult::GoOn((non_ascii_ret, Utf8BmpHandle::new(self)))
1036 }
1037 #[inline(always)]
copy_ascii_from_check_space_astral<'b>( &'b mut self, source: &mut ByteSource, ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8AstralHandle<'b, 'a>)>1038 pub fn copy_ascii_from_check_space_astral<'b>(
1039 &'b mut self,
1040 source: &mut ByteSource,
1041 ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8AstralHandle<'b, 'a>)> {
1042 let non_ascii_ret = {
1043 let dst_len = self.slice.len();
1044 let src_remaining = &source.slice[source.pos..];
1045 let dst_remaining = &mut self.slice[self.pos..];
1046 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1047 (DecoderResult::OutputFull, dst_remaining.len())
1048 } else {
1049 (DecoderResult::InputEmpty, src_remaining.len())
1050 };
1051 match unsafe {
1052 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1053 } {
1054 None => {
1055 source.pos += length;
1056 self.pos += length;
1057 return CopyAsciiResult::Stop((pending, source.pos, self.pos));
1058 }
1059 Some((non_ascii, consumed)) => {
1060 source.pos += consumed;
1061 self.pos += consumed;
1062 if self.pos + 3 < dst_len {
1063 source.pos += 1; // +1 for non_ascii
1064 non_ascii
1065 } else {
1066 return CopyAsciiResult::Stop((
1067 DecoderResult::OutputFull,
1068 source.pos,
1069 self.pos,
1070 ));
1071 }
1072 }
1073 }
1074 };
1075 CopyAsciiResult::GoOn((non_ascii_ret, Utf8AstralHandle::new(self)))
1076 }
1077 #[inline(always)]
copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource)1078 pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) {
1079 let src_remaining = &source.slice[source.pos..];
1080 let dst_remaining = &mut self.slice[self.pos..];
1081 let min_len = ::std::cmp::min(src_remaining.len(), dst_remaining.len());
1082 // Validate first, then memcpy to let memcpy do its thing even for
1083 // non-ASCII. (And potentially do something better than SSE2 for ASCII.)
1084 let valid_len = utf8_valid_up_to(&src_remaining[..min_len]);
1085 (&mut dst_remaining[..valid_len]).copy_from_slice(&src_remaining[..valid_len]);
1086 source.pos += valid_len;
1087 self.pos += valid_len;
1088 }
1089 #[inline(always)]
copy_utf16_from<E: Endian>( &mut self, source: &mut ByteSource, ) -> Option<(usize, usize)>1090 pub fn copy_utf16_from<E: Endian>(
1091 &mut self,
1092 source: &mut ByteSource,
1093 ) -> Option<(usize, usize)> {
1094 let src_remaining = &source.slice[source.pos..];
1095 let dst_remaining = &mut self.slice[self.pos..];
1096
1097 let mut src_unaligned =
1098 unsafe { UnalignedU16Slice::new(src_remaining.as_ptr(), src_remaining.len() / 2) };
1099 if src_unaligned.len() == 0 {
1100 return None;
1101 }
1102 let mut last_unit = src_unaligned.at(src_unaligned.len() - 1);
1103 if E::OPPOSITE_ENDIAN {
1104 last_unit = last_unit.swap_bytes();
1105 }
1106 if super::in_range16(last_unit, 0xD800, 0xDC00) {
1107 // Last code unit is a high surrogate. It might
1108 // legitimately form a pair later, so let's not
1109 // include it.
1110 src_unaligned.trim_last();
1111 }
1112 let (read, written, had_error) =
1113 convert_unaligned_utf16_to_utf8::<E>(src_unaligned, dst_remaining);
1114 source.pos += read * 2;
1115 self.pos += written;
1116 if had_error {
1117 Some((source.pos, self.pos))
1118 } else {
1119 None
1120 }
1121 }
1122 }
1123
1124 // UTF-16 source
1125
1126 pub struct Utf16Source<'a> {
1127 slice: &'a [u16],
1128 pos: usize,
1129 old_pos: usize,
1130 }
1131
1132 impl<'a> Utf16Source<'a> {
1133 #[inline(always)]
new(src: &[u16]) -> Utf16Source1134 pub fn new(src: &[u16]) -> Utf16Source {
1135 Utf16Source {
1136 slice: src,
1137 pos: 0,
1138 old_pos: 0,
1139 }
1140 }
1141 #[inline(always)]
check_available<'b>(&'b mut self) -> Space<Utf16ReadHandle<'b, 'a>>1142 pub fn check_available<'b>(&'b mut self) -> Space<Utf16ReadHandle<'b, 'a>> {
1143 if self.pos < self.slice.len() {
1144 Space::Available(Utf16ReadHandle::new(self))
1145 } else {
1146 Space::Full(self.consumed())
1147 }
1148 }
1149 #[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
1150 #[inline(always)]
read(&mut self) -> char1151 fn read(&mut self) -> char {
1152 self.old_pos = self.pos;
1153 let unit = self.slice[self.pos];
1154 self.pos += 1;
1155 let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1156 if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1157 return unsafe { ::std::char::from_u32_unchecked(u32::from(unit)) };
1158 }
1159 if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1160 // high surrogate
1161 if self.pos < self.slice.len() {
1162 let second = self.slice[self.pos];
1163 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1164 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1165 // The next code unit is a low surrogate. Advance position.
1166 self.pos += 1;
1167 return unsafe {
1168 ::std::char::from_u32_unchecked(
1169 (u32::from(unit) << 10) + u32::from(second)
1170 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1171 )
1172 };
1173 }
1174 // The next code unit is not a low surrogate. Don't advance
1175 // position and treat the high surrogate as unpaired.
1176 // fall through
1177 }
1178 // Unpaired surrogate at the end of buffer, fall through
1179 }
1180 // Unpaired low surrogate
1181 '\u{FFFD}'
1182 }
1183 #[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
1184 #[inline(always)]
read_enum(&mut self) -> Unicode1185 fn read_enum(&mut self) -> Unicode {
1186 self.old_pos = self.pos;
1187 let unit = self.slice[self.pos];
1188 self.pos += 1;
1189 if unit < 0x80 {
1190 return Unicode::Ascii(unit as u8);
1191 }
1192 let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1193 if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1194 return Unicode::NonAscii(NonAscii::BmpExclAscii(unit));
1195 }
1196 if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1197 // high surrogate
1198 if self.pos < self.slice.len() {
1199 let second = self.slice[self.pos];
1200 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1201 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1202 // The next code unit is a low surrogate. Advance position.
1203 self.pos += 1;
1204 return Unicode::NonAscii(NonAscii::Astral(unsafe {
1205 ::std::char::from_u32_unchecked(
1206 (u32::from(unit) << 10) + u32::from(second)
1207 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1208 )
1209 }));
1210 }
1211 // The next code unit is not a low surrogate. Don't advance
1212 // position and treat the high surrogate as unpaired.
1213 // fall through
1214 }
1215 // Unpaired surrogate at the end of buffer, fall through
1216 }
1217 // Unpaired low surrogate
1218 Unicode::NonAscii(NonAscii::BmpExclAscii(0xFFFDu16))
1219 }
1220 #[inline(always)]
unread(&mut self) -> usize1221 fn unread(&mut self) -> usize {
1222 self.pos = self.old_pos;
1223 self.pos
1224 }
1225 #[inline(always)]
consumed(&self) -> usize1226 pub fn consumed(&self) -> usize {
1227 self.pos
1228 }
1229 #[inline(always)]
copy_ascii_to_check_space_two<'b>( &mut self, dest: &'b mut ByteDestination<'a>, ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)>1230 pub fn copy_ascii_to_check_space_two<'b>(
1231 &mut self,
1232 dest: &'b mut ByteDestination<'a>,
1233 ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> {
1234 let non_ascii_ret = {
1235 let dst_len = dest.slice.len();
1236 let src_remaining = &self.slice[self.pos..];
1237 let dst_remaining = &mut dest.slice[dest.pos..];
1238 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1239 (EncoderResult::OutputFull, dst_remaining.len())
1240 } else {
1241 (EncoderResult::InputEmpty, src_remaining.len())
1242 };
1243 match unsafe {
1244 basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1245 } {
1246 None => {
1247 self.pos += length;
1248 dest.pos += length;
1249 return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1250 }
1251 Some((non_ascii, consumed)) => {
1252 self.pos += consumed;
1253 dest.pos += consumed;
1254 if dest.pos + 1 < dst_len {
1255 self.pos += 1; // commit to reading `non_ascii`
1256 let unit = non_ascii;
1257 let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1258 if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1259 NonAscii::BmpExclAscii(unit)
1260 } else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1261 // high surrogate
1262 if self.pos < self.slice.len() {
1263 let second = self.slice[self.pos];
1264 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1265 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1266 // The next code unit is a low surrogate. Advance position.
1267 self.pos += 1;
1268 NonAscii::Astral(unsafe {
1269 ::std::char::from_u32_unchecked(
1270 (u32::from(unit) << 10) + u32::from(second)
1271 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1272 )
1273 })
1274 } else {
1275 // The next code unit is not a low surrogate. Don't advance
1276 // position and treat the high surrogate as unpaired.
1277 NonAscii::BmpExclAscii(0xFFFDu16)
1278 }
1279 } else {
1280 // Unpaired surrogate at the end of the buffer.
1281 NonAscii::BmpExclAscii(0xFFFDu16)
1282 }
1283 } else {
1284 // Unpaired low surrogate
1285 NonAscii::BmpExclAscii(0xFFFDu16)
1286 }
1287 } else {
1288 return CopyAsciiResult::Stop((
1289 EncoderResult::OutputFull,
1290 self.pos,
1291 dest.pos,
1292 ));
1293 }
1294 }
1295 }
1296 };
1297 CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest)))
1298 }
1299 #[inline(always)]
copy_ascii_to_check_space_four<'b>( &mut self, dest: &'b mut ByteDestination<'a>, ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)>1300 pub fn copy_ascii_to_check_space_four<'b>(
1301 &mut self,
1302 dest: &'b mut ByteDestination<'a>,
1303 ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> {
1304 let non_ascii_ret = {
1305 let dst_len = dest.slice.len();
1306 let src_remaining = &self.slice[self.pos..];
1307 let dst_remaining = &mut dest.slice[dest.pos..];
1308 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1309 (EncoderResult::OutputFull, dst_remaining.len())
1310 } else {
1311 (EncoderResult::InputEmpty, src_remaining.len())
1312 };
1313 match unsafe {
1314 basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1315 } {
1316 None => {
1317 self.pos += length;
1318 dest.pos += length;
1319 return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1320 }
1321 Some((non_ascii, consumed)) => {
1322 self.pos += consumed;
1323 dest.pos += consumed;
1324 if dest.pos + 3 < dst_len {
1325 self.pos += 1; // commit to reading `non_ascii`
1326 let unit = non_ascii;
1327 let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1328 if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1329 NonAscii::BmpExclAscii(unit)
1330 } else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1331 // high surrogate
1332 if self.pos == self.slice.len() {
1333 // Unpaired surrogate at the end of the buffer.
1334 NonAscii::BmpExclAscii(0xFFFDu16)
1335 } else {
1336 let second = self.slice[self.pos];
1337 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1338 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1339 // The next code unit is a low surrogate. Advance position.
1340 self.pos += 1;
1341 NonAscii::Astral(unsafe {
1342 ::std::char::from_u32_unchecked(
1343 (u32::from(unit) << 10) + u32::from(second)
1344 - (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32),
1345 )
1346 })
1347 } else {
1348 // The next code unit is not a low surrogate. Don't advance
1349 // position and treat the high surrogate as unpaired.
1350 NonAscii::BmpExclAscii(0xFFFDu16)
1351 }
1352 }
1353 } else {
1354 // Unpaired low surrogate
1355 NonAscii::BmpExclAscii(0xFFFDu16)
1356 }
1357 } else {
1358 return CopyAsciiResult::Stop((
1359 EncoderResult::OutputFull,
1360 self.pos,
1361 dest.pos,
1362 ));
1363 }
1364 }
1365 }
1366 };
1367 CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest)))
1368 }
1369 }
1370
1371 pub struct Utf16ReadHandle<'a, 'b>
1372 where
1373 'b: 'a,
1374 {
1375 source: &'a mut Utf16Source<'b>,
1376 }
1377
1378 impl<'a, 'b> Utf16ReadHandle<'a, 'b>
1379 where
1380 'b: 'a,
1381 {
1382 #[inline(always)]
new(src: &'a mut Utf16Source<'b>) -> Utf16ReadHandle<'a, 'b>1383 fn new(src: &'a mut Utf16Source<'b>) -> Utf16ReadHandle<'a, 'b> {
1384 Utf16ReadHandle { source: src }
1385 }
1386 #[inline(always)]
read(self) -> (char, Utf16UnreadHandle<'a, 'b>)1387 pub fn read(self) -> (char, Utf16UnreadHandle<'a, 'b>) {
1388 let character = self.source.read();
1389 let handle = Utf16UnreadHandle::new(self.source);
1390 (character, handle)
1391 }
1392 #[inline(always)]
read_enum(self) -> (Unicode, Utf16UnreadHandle<'a, 'b>)1393 pub fn read_enum(self) -> (Unicode, Utf16UnreadHandle<'a, 'b>) {
1394 let character = self.source.read_enum();
1395 let handle = Utf16UnreadHandle::new(self.source);
1396 (character, handle)
1397 }
1398 #[inline(always)]
consumed(&self) -> usize1399 pub fn consumed(&self) -> usize {
1400 self.source.consumed()
1401 }
1402 }
1403
1404 pub struct Utf16UnreadHandle<'a, 'b>
1405 where
1406 'b: 'a,
1407 {
1408 source: &'a mut Utf16Source<'b>,
1409 }
1410
1411 impl<'a, 'b> Utf16UnreadHandle<'a, 'b>
1412 where
1413 'b: 'a,
1414 {
1415 #[inline(always)]
new(src: &'a mut Utf16Source<'b>) -> Utf16UnreadHandle<'a, 'b>1416 fn new(src: &'a mut Utf16Source<'b>) -> Utf16UnreadHandle<'a, 'b> {
1417 Utf16UnreadHandle { source: src }
1418 }
1419 #[inline(always)]
unread(self) -> usize1420 pub fn unread(self) -> usize {
1421 self.source.unread()
1422 }
1423 #[inline(always)]
consumed(&self) -> usize1424 pub fn consumed(&self) -> usize {
1425 self.source.consumed()
1426 }
1427 #[inline(always)]
commit(self) -> &'a mut Utf16Source<'b>1428 pub fn commit(self) -> &'a mut Utf16Source<'b> {
1429 self.source
1430 }
1431 }
1432
1433 // UTF-8 source
1434
1435 pub struct Utf8Source<'a> {
1436 slice: &'a [u8],
1437 pos: usize,
1438 old_pos: usize,
1439 }
1440
1441 impl<'a> Utf8Source<'a> {
1442 #[inline(always)]
new(src: &str) -> Utf8Source1443 pub fn new(src: &str) -> Utf8Source {
1444 Utf8Source {
1445 slice: src.as_bytes(),
1446 pos: 0,
1447 old_pos: 0,
1448 }
1449 }
1450 #[inline(always)]
check_available<'b>(&'b mut self) -> Space<Utf8ReadHandle<'b, 'a>>1451 pub fn check_available<'b>(&'b mut self) -> Space<Utf8ReadHandle<'b, 'a>> {
1452 if self.pos < self.slice.len() {
1453 Space::Available(Utf8ReadHandle::new(self))
1454 } else {
1455 Space::Full(self.consumed())
1456 }
1457 }
1458 #[inline(always)]
read(&mut self) -> char1459 fn read(&mut self) -> char {
1460 self.old_pos = self.pos;
1461 let unit = self.slice[self.pos];
1462 if unit < 0x80 {
1463 self.pos += 1;
1464 return char::from(unit);
1465 }
1466 if unit < 0xE0 {
1467 let point =
1468 ((u32::from(unit) & 0x1F) << 6) | (u32::from(self.slice[self.pos + 1]) & 0x3F);
1469 self.pos += 2;
1470 return unsafe { ::std::char::from_u32_unchecked(point) };
1471 }
1472 if unit < 0xF0 {
1473 let point = ((u32::from(unit) & 0xF) << 12)
1474 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1475 | (u32::from(self.slice[self.pos + 2]) & 0x3F);
1476 self.pos += 3;
1477 return unsafe { ::std::char::from_u32_unchecked(point) };
1478 }
1479 let point = ((u32::from(unit) & 0x7) << 18)
1480 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1481 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1482 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1483 self.pos += 4;
1484 unsafe { ::std::char::from_u32_unchecked(point) }
1485 }
1486 #[inline(always)]
read_enum(&mut self) -> Unicode1487 fn read_enum(&mut self) -> Unicode {
1488 self.old_pos = self.pos;
1489 let unit = self.slice[self.pos];
1490 if unit < 0x80 {
1491 self.pos += 1;
1492 return Unicode::Ascii(unit);
1493 }
1494 if unit < 0xE0 {
1495 let point =
1496 ((u16::from(unit) & 0x1F) << 6) | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1497 self.pos += 2;
1498 return Unicode::NonAscii(NonAscii::BmpExclAscii(point));
1499 }
1500 if unit < 0xF0 {
1501 let point = ((u16::from(unit) & 0xF) << 12)
1502 | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1503 | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1504 self.pos += 3;
1505 return Unicode::NonAscii(NonAscii::BmpExclAscii(point));
1506 }
1507 let point = ((u32::from(unit) & 0x7) << 18)
1508 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1509 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1510 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1511 self.pos += 4;
1512 Unicode::NonAscii(NonAscii::Astral(unsafe {
1513 ::std::char::from_u32_unchecked(point)
1514 }))
1515 }
1516 #[inline(always)]
unread(&mut self) -> usize1517 fn unread(&mut self) -> usize {
1518 self.pos = self.old_pos;
1519 self.pos
1520 }
1521 #[inline(always)]
consumed(&self) -> usize1522 pub fn consumed(&self) -> usize {
1523 self.pos
1524 }
1525 #[inline(always)]
copy_ascii_to_check_space_one<'b>( &mut self, dest: &'b mut ByteDestination<'a>, ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteOneHandle<'b, 'a>)>1526 pub fn copy_ascii_to_check_space_one<'b>(
1527 &mut self,
1528 dest: &'b mut ByteDestination<'a>,
1529 ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteOneHandle<'b, 'a>)> {
1530 let non_ascii_ret = {
1531 let src_remaining = &self.slice[self.pos..];
1532 let dst_remaining = &mut dest.slice[dest.pos..];
1533 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1534 (EncoderResult::OutputFull, dst_remaining.len())
1535 } else {
1536 (EncoderResult::InputEmpty, src_remaining.len())
1537 };
1538 match unsafe {
1539 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1540 } {
1541 None => {
1542 self.pos += length;
1543 dest.pos += length;
1544 return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1545 }
1546 Some((non_ascii, consumed)) => {
1547 self.pos += consumed;
1548 dest.pos += consumed;
1549 // We don't need to check space in destination, because
1550 // `ascii_to_ascii()` already did.
1551 if non_ascii < 0xE0 {
1552 let point = ((u16::from(non_ascii) & 0x1F) << 6)
1553 | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1554 self.pos += 2;
1555 NonAscii::BmpExclAscii(point)
1556 } else if non_ascii < 0xF0 {
1557 let point = ((u16::from(non_ascii) & 0xF) << 12)
1558 | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1559 | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1560 self.pos += 3;
1561 NonAscii::BmpExclAscii(point)
1562 } else {
1563 let point = ((u32::from(non_ascii) & 0x7) << 18)
1564 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1565 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1566 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1567 self.pos += 4;
1568 NonAscii::Astral(unsafe { ::std::char::from_u32_unchecked(point) })
1569 }
1570 }
1571 }
1572 };
1573 CopyAsciiResult::GoOn((non_ascii_ret, ByteOneHandle::new(dest)))
1574 }
1575 #[inline(always)]
copy_ascii_to_check_space_two<'b>( &mut self, dest: &'b mut ByteDestination<'a>, ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)>1576 pub fn copy_ascii_to_check_space_two<'b>(
1577 &mut self,
1578 dest: &'b mut ByteDestination<'a>,
1579 ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> {
1580 let non_ascii_ret = {
1581 let dst_len = dest.slice.len();
1582 let src_remaining = &self.slice[self.pos..];
1583 let dst_remaining = &mut dest.slice[dest.pos..];
1584 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1585 (EncoderResult::OutputFull, dst_remaining.len())
1586 } else {
1587 (EncoderResult::InputEmpty, src_remaining.len())
1588 };
1589 match unsafe {
1590 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1591 } {
1592 None => {
1593 self.pos += length;
1594 dest.pos += length;
1595 return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1596 }
1597 Some((non_ascii, consumed)) => {
1598 self.pos += consumed;
1599 dest.pos += consumed;
1600 if dest.pos + 1 < dst_len {
1601 if non_ascii < 0xE0 {
1602 let point = ((u16::from(non_ascii) & 0x1F) << 6)
1603 | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1604 self.pos += 2;
1605 NonAscii::BmpExclAscii(point)
1606 } else if non_ascii < 0xF0 {
1607 let point = ((u16::from(non_ascii) & 0xF) << 12)
1608 | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1609 | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1610 self.pos += 3;
1611 NonAscii::BmpExclAscii(point)
1612 } else {
1613 let point = ((u32::from(non_ascii) & 0x7) << 18)
1614 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1615 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1616 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1617 self.pos += 4;
1618 NonAscii::Astral(unsafe { ::std::char::from_u32_unchecked(point) })
1619 }
1620 } else {
1621 return CopyAsciiResult::Stop((
1622 EncoderResult::OutputFull,
1623 self.pos,
1624 dest.pos,
1625 ));
1626 }
1627 }
1628 }
1629 };
1630 CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest)))
1631 }
1632 #[inline(always)]
copy_ascii_to_check_space_four<'b>( &mut self, dest: &'b mut ByteDestination<'a>, ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)>1633 pub fn copy_ascii_to_check_space_four<'b>(
1634 &mut self,
1635 dest: &'b mut ByteDestination<'a>,
1636 ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> {
1637 let non_ascii_ret = {
1638 let dst_len = dest.slice.len();
1639 let src_remaining = &self.slice[self.pos..];
1640 let dst_remaining = &mut dest.slice[dest.pos..];
1641 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1642 (EncoderResult::OutputFull, dst_remaining.len())
1643 } else {
1644 (EncoderResult::InputEmpty, src_remaining.len())
1645 };
1646 match unsafe {
1647 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1648 } {
1649 None => {
1650 self.pos += length;
1651 dest.pos += length;
1652 return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1653 }
1654 Some((non_ascii, consumed)) => {
1655 self.pos += consumed;
1656 dest.pos += consumed;
1657 if dest.pos + 3 < dst_len {
1658 if non_ascii < 0xE0 {
1659 let point = ((u16::from(non_ascii) & 0x1F) << 6)
1660 | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1661 self.pos += 2;
1662 NonAscii::BmpExclAscii(point)
1663 } else if non_ascii < 0xF0 {
1664 let point = ((u16::from(non_ascii) & 0xF) << 12)
1665 | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1666 | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1667 self.pos += 3;
1668 NonAscii::BmpExclAscii(point)
1669 } else {
1670 let point = ((u32::from(non_ascii) & 0x7) << 18)
1671 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1672 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1673 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1674 self.pos += 4;
1675 NonAscii::Astral(unsafe { ::std::char::from_u32_unchecked(point) })
1676 }
1677 } else {
1678 return CopyAsciiResult::Stop((
1679 EncoderResult::OutputFull,
1680 self.pos,
1681 dest.pos,
1682 ));
1683 }
1684 }
1685 }
1686 };
1687 CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest)))
1688 }
1689 }
1690
1691 pub struct Utf8ReadHandle<'a, 'b>
1692 where
1693 'b: 'a,
1694 {
1695 source: &'a mut Utf8Source<'b>,
1696 }
1697
1698 impl<'a, 'b> Utf8ReadHandle<'a, 'b>
1699 where
1700 'b: 'a,
1701 {
1702 #[inline(always)]
new(src: &'a mut Utf8Source<'b>) -> Utf8ReadHandle<'a, 'b>1703 fn new(src: &'a mut Utf8Source<'b>) -> Utf8ReadHandle<'a, 'b> {
1704 Utf8ReadHandle { source: src }
1705 }
1706 #[inline(always)]
read(self) -> (char, Utf8UnreadHandle<'a, 'b>)1707 pub fn read(self) -> (char, Utf8UnreadHandle<'a, 'b>) {
1708 let character = self.source.read();
1709 let handle = Utf8UnreadHandle::new(self.source);
1710 (character, handle)
1711 }
1712 #[inline(always)]
read_enum(self) -> (Unicode, Utf8UnreadHandle<'a, 'b>)1713 pub fn read_enum(self) -> (Unicode, Utf8UnreadHandle<'a, 'b>) {
1714 let character = self.source.read_enum();
1715 let handle = Utf8UnreadHandle::new(self.source);
1716 (character, handle)
1717 }
1718 #[inline(always)]
consumed(&self) -> usize1719 pub fn consumed(&self) -> usize {
1720 self.source.consumed()
1721 }
1722 }
1723
1724 pub struct Utf8UnreadHandle<'a, 'b>
1725 where
1726 'b: 'a,
1727 {
1728 source: &'a mut Utf8Source<'b>,
1729 }
1730
1731 impl<'a, 'b> Utf8UnreadHandle<'a, 'b>
1732 where
1733 'b: 'a,
1734 {
1735 #[inline(always)]
new(src: &'a mut Utf8Source<'b>) -> Utf8UnreadHandle<'a, 'b>1736 fn new(src: &'a mut Utf8Source<'b>) -> Utf8UnreadHandle<'a, 'b> {
1737 Utf8UnreadHandle { source: src }
1738 }
1739 #[inline(always)]
unread(self) -> usize1740 pub fn unread(self) -> usize {
1741 self.source.unread()
1742 }
1743 #[inline(always)]
consumed(&self) -> usize1744 pub fn consumed(&self) -> usize {
1745 self.source.consumed()
1746 }
1747 #[inline(always)]
commit(self) -> &'a mut Utf8Source<'b>1748 pub fn commit(self) -> &'a mut Utf8Source<'b> {
1749 self.source
1750 }
1751 }
1752
1753 // Byte destination
1754
1755 pub struct ByteOneHandle<'a, 'b>
1756 where
1757 'b: 'a,
1758 {
1759 dest: &'a mut ByteDestination<'b>,
1760 }
1761
1762 impl<'a, 'b> ByteOneHandle<'a, 'b>
1763 where
1764 'b: 'a,
1765 {
1766 #[inline(always)]
new(dst: &'a mut ByteDestination<'b>) -> ByteOneHandle<'a, 'b>1767 fn new(dst: &'a mut ByteDestination<'b>) -> ByteOneHandle<'a, 'b> {
1768 ByteOneHandle { dest: dst }
1769 }
1770 #[inline(always)]
written(&self) -> usize1771 pub fn written(&self) -> usize {
1772 self.dest.written()
1773 }
1774 #[inline(always)]
write_one(self, first: u8) -> &'a mut ByteDestination<'b>1775 pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1776 self.dest.write_one(first);
1777 self.dest
1778 }
1779 }
1780
1781 pub struct ByteTwoHandle<'a, 'b>
1782 where
1783 'b: 'a,
1784 {
1785 dest: &'a mut ByteDestination<'b>,
1786 }
1787
1788 impl<'a, 'b> ByteTwoHandle<'a, 'b>
1789 where
1790 'b: 'a,
1791 {
1792 #[inline(always)]
new(dst: &'a mut ByteDestination<'b>) -> ByteTwoHandle<'a, 'b>1793 fn new(dst: &'a mut ByteDestination<'b>) -> ByteTwoHandle<'a, 'b> {
1794 ByteTwoHandle { dest: dst }
1795 }
1796 #[inline(always)]
written(&self) -> usize1797 pub fn written(&self) -> usize {
1798 self.dest.written()
1799 }
1800 #[inline(always)]
write_one(self, first: u8) -> &'a mut ByteDestination<'b>1801 pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1802 self.dest.write_one(first);
1803 self.dest
1804 }
1805 #[inline(always)]
write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b>1806 pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1807 self.dest.write_two(first, second);
1808 self.dest
1809 }
1810 }
1811
1812 pub struct ByteThreeHandle<'a, 'b>
1813 where
1814 'b: 'a,
1815 {
1816 dest: &'a mut ByteDestination<'b>,
1817 }
1818
1819 impl<'a, 'b> ByteThreeHandle<'a, 'b>
1820 where
1821 'b: 'a,
1822 {
1823 #[inline(always)]
new(dst: &'a mut ByteDestination<'b>) -> ByteThreeHandle<'a, 'b>1824 fn new(dst: &'a mut ByteDestination<'b>) -> ByteThreeHandle<'a, 'b> {
1825 ByteThreeHandle { dest: dst }
1826 }
1827 #[inline(always)]
written(&self) -> usize1828 pub fn written(&self) -> usize {
1829 self.dest.written()
1830 }
1831 #[inline(always)]
write_one(self, first: u8) -> &'a mut ByteDestination<'b>1832 pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1833 self.dest.write_one(first);
1834 self.dest
1835 }
1836 #[inline(always)]
write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b>1837 pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1838 self.dest.write_two(first, second);
1839 self.dest
1840 }
1841 #[inline(always)]
write_three(self, first: u8, second: u8, third: u8) -> &'a mut ByteDestination<'b>1842 pub fn write_three(self, first: u8, second: u8, third: u8) -> &'a mut ByteDestination<'b> {
1843 self.dest.write_three(first, second, third);
1844 self.dest
1845 }
1846 #[inline(always)]
write_three_return_written(self, first: u8, second: u8, third: u8) -> usize1847 pub fn write_three_return_written(self, first: u8, second: u8, third: u8) -> usize {
1848 self.dest.write_three(first, second, third);
1849 self.dest.written()
1850 }
1851 }
1852
1853 pub struct ByteFourHandle<'a, 'b>
1854 where
1855 'b: 'a,
1856 {
1857 dest: &'a mut ByteDestination<'b>,
1858 }
1859
1860 impl<'a, 'b> ByteFourHandle<'a, 'b>
1861 where
1862 'b: 'a,
1863 {
1864 #[inline(always)]
new(dst: &'a mut ByteDestination<'b>) -> ByteFourHandle<'a, 'b>1865 fn new(dst: &'a mut ByteDestination<'b>) -> ByteFourHandle<'a, 'b> {
1866 ByteFourHandle { dest: dst }
1867 }
1868 #[inline(always)]
written(&self) -> usize1869 pub fn written(&self) -> usize {
1870 self.dest.written()
1871 }
1872 #[inline(always)]
write_one(self, first: u8) -> &'a mut ByteDestination<'b>1873 pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1874 self.dest.write_one(first);
1875 self.dest
1876 }
1877 #[inline(always)]
write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b>1878 pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1879 self.dest.write_two(first, second);
1880 self.dest
1881 }
1882 #[inline(always)]
write_four( self, first: u8, second: u8, third: u8, fourth: u8, ) -> &'a mut ByteDestination<'b>1883 pub fn write_four(
1884 self,
1885 first: u8,
1886 second: u8,
1887 third: u8,
1888 fourth: u8,
1889 ) -> &'a mut ByteDestination<'b> {
1890 self.dest.write_four(first, second, third, fourth);
1891 self.dest
1892 }
1893 }
1894
1895 pub struct ByteDestination<'a> {
1896 slice: &'a mut [u8],
1897 pos: usize,
1898 }
1899
1900 impl<'a> ByteDestination<'a> {
1901 #[inline(always)]
new(dst: &mut [u8]) -> ByteDestination1902 pub fn new(dst: &mut [u8]) -> ByteDestination {
1903 ByteDestination { slice: dst, pos: 0 }
1904 }
1905 #[inline(always)]
check_space_one<'b>(&'b mut self) -> Space<ByteOneHandle<'b, 'a>>1906 pub fn check_space_one<'b>(&'b mut self) -> Space<ByteOneHandle<'b, 'a>> {
1907 if self.pos < self.slice.len() {
1908 Space::Available(ByteOneHandle::new(self))
1909 } else {
1910 Space::Full(self.written())
1911 }
1912 }
1913 #[inline(always)]
check_space_two<'b>(&'b mut self) -> Space<ByteTwoHandle<'b, 'a>>1914 pub fn check_space_two<'b>(&'b mut self) -> Space<ByteTwoHandle<'b, 'a>> {
1915 if self.pos + 1 < self.slice.len() {
1916 Space::Available(ByteTwoHandle::new(self))
1917 } else {
1918 Space::Full(self.written())
1919 }
1920 }
1921 #[inline(always)]
check_space_three<'b>(&'b mut self) -> Space<ByteThreeHandle<'b, 'a>>1922 pub fn check_space_three<'b>(&'b mut self) -> Space<ByteThreeHandle<'b, 'a>> {
1923 if self.pos + 2 < self.slice.len() {
1924 Space::Available(ByteThreeHandle::new(self))
1925 } else {
1926 Space::Full(self.written())
1927 }
1928 }
1929 #[inline(always)]
check_space_four<'b>(&'b mut self) -> Space<ByteFourHandle<'b, 'a>>1930 pub fn check_space_four<'b>(&'b mut self) -> Space<ByteFourHandle<'b, 'a>> {
1931 if self.pos + 3 < self.slice.len() {
1932 Space::Available(ByteFourHandle::new(self))
1933 } else {
1934 Space::Full(self.written())
1935 }
1936 }
1937 #[inline(always)]
written(&self) -> usize1938 pub fn written(&self) -> usize {
1939 self.pos
1940 }
1941 #[inline(always)]
write_one(&mut self, first: u8)1942 fn write_one(&mut self, first: u8) {
1943 self.slice[self.pos] = first;
1944 self.pos += 1;
1945 }
1946 #[inline(always)]
write_two(&mut self, first: u8, second: u8)1947 fn write_two(&mut self, first: u8, second: u8) {
1948 self.slice[self.pos] = first;
1949 self.slice[self.pos + 1] = second;
1950 self.pos += 2;
1951 }
1952 #[inline(always)]
write_three(&mut self, first: u8, second: u8, third: u8)1953 fn write_three(&mut self, first: u8, second: u8, third: u8) {
1954 self.slice[self.pos] = first;
1955 self.slice[self.pos + 1] = second;
1956 self.slice[self.pos + 2] = third;
1957 self.pos += 3;
1958 }
1959 #[inline(always)]
write_four(&mut self, first: u8, second: u8, third: u8, fourth: u8)1960 fn write_four(&mut self, first: u8, second: u8, third: u8, fourth: u8) {
1961 self.slice[self.pos] = first;
1962 self.slice[self.pos + 1] = second;
1963 self.slice[self.pos + 2] = third;
1964 self.slice[self.pos + 3] = fourth;
1965 self.pos += 4;
1966 }
1967 }
1968