1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4 
5 use crate::{
6     nsACString, nsAString, nsCStringLike, BulkWriteOk, Gecko_FallibleAssignCString,
7     Latin1StringLike,
8 };
9 use encoding_rs::mem::*;
10 use encoding_rs::Encoding;
11 use std::slice;
12 
13 /// Required math stated in the docs of
14 /// `convert_utf16_to_utf8()`.
15 #[inline(always)]
times_three(a: usize) -> Option<usize>16 fn times_three(a: usize) -> Option<usize> {
17     a.checked_mul(3)
18 }
19 
20 #[inline(always)]
identity(a: usize) -> Option<usize>21 fn identity(a: usize) -> Option<usize> {
22     Some(a)
23 }
24 
25 #[inline(always)]
plus_one(a: usize) -> Option<usize>26 fn plus_one(a: usize) -> Option<usize> {
27     a.checked_add(1)
28 }
29 
30 /// Typical cache line size per
31 /// https://stackoverflow.com/questions/14707803/line-size-of-l1-and-l2-caches
32 ///
33 /// For consistent behavior, not trying to use 128 on aarch64
34 /// or other fanciness like that.
35 const CACHE_LINE: usize = 64;
36 
37 const CACHE_LINE_MASK: usize = CACHE_LINE - 1;
38 
39 /// Returns true if the string is both longer than a cache line
40 /// and the first cache line is ASCII.
41 #[inline(always)]
long_string_starts_with_ascii(buffer: &[u8]) -> bool42 fn long_string_starts_with_ascii(buffer: &[u8]) -> bool {
43     // We examine data only up to the end of the cache line
44     // to make this check minimally disruptive.
45     if buffer.len() <= CACHE_LINE {
46         return false;
47     }
48     let bound = CACHE_LINE - ((buffer.as_ptr() as usize) & CACHE_LINE_MASK);
49     is_ascii(&buffer[..bound])
50 }
51 
52 /// Returns true if the string is both longer than two cache lines
53 /// and the first two cache lines are Basic Latin.
54 #[inline(always)]
long_string_stars_with_basic_latin(buffer: &[u16]) -> bool55 fn long_string_stars_with_basic_latin(buffer: &[u16]) -> bool {
56     // We look at two cache lines with code unit size of two. There is need
57     // to look at more than one cache line in the UTF-16 case, because looking
58     // at just one cache line wouldn't catch non-ASCII Latin with high enough
59     // probability with Latin-script languages that have relatively infrequent
60     // non-ASCII characters.
61     if buffer.len() <= CACHE_LINE {
62         return false;
63     }
64     let bound = (CACHE_LINE * 2 - ((buffer.as_ptr() as usize) & CACHE_LINE_MASK)) / 2;
65     is_basic_latin(&buffer[..bound])
66 }
67 
68 // Ignoring the copy avoidance complications of conversions between Latin1 and
69 // UTF-8, a conversion function has the outward form of
70 // `fn F(&mut self, other: &[T], old_len: usize) -> Result<BulkWriteOk, ()>`,
71 // where `T` is either `u8` or `u16`. `other` is the slice whose converted
72 // content are to be appended to `self` and `old_len` indicates how many
73 // code unit of `self` are to be preserved (0 for the assignment case and
74 // `self.len()` for the appending case).
75 //
76 // As implementation parameters a conversion function needs to know the
77 // math for computing the worst case conversion length in code units given
78 // the input length in code units. For a _constant conversion_ the number
79 // of code units the conversion produces equals the number of code units
80 // in the input. For a _shinking conversion_ the maximum number of code
81 // units the conversion can produce equals the number of code units in
82 // the input, but the conversion can produce fewer code units. Still, due
83 // to implementation details, the function might want _one_ unit more of
84 // output space. For an _expanding conversion_ (no need for macro), the
85 // minimum number of code units produced by the conversion is the number
86 // of code units in the input, but the conversion can produce more.
87 //
88 // Copy avoidance conversions avoid copying a refcounted buffer when it's
89 // ASCII-only.
90 //
91 // Internally, a conversion function needs to know the underlying
92 // encoding_rs conversion function, the math for computing the required
93 // output buffer size and, depending on the case, the underlying
94 // encoding_rs ASCII prefix handling function.
95 
96 /// A conversion where the number of code units in the output is potentially
97 /// smaller than the number of code units in the input.
98 ///
99 /// Takes the name of the method to be generated, the name of the conversion
100 /// function and the type of the input slice.
101 ///
102 /// `$name` is the name of the function to generate
103 /// `$convert` is the underlying `encoding_rs::mem` function to use
104 /// `$other_ty` is the type of the input slice
105 /// `$math` is the worst-case length math that `$convert` expects
106 macro_rules! shrinking_conversion {
107     (name = $name:ident,
108      convert = $convert:ident,
109      other_ty = $other_ty:ty,
110      math = $math:ident) => {
111         fn $name(&mut self, other: $other_ty, old_len: usize) -> Result<BulkWriteOk, ()> {
112             let needed = $math(other.len()).ok_or(())?;
113             let mut handle =
114                 unsafe { self.bulk_write(old_len.checked_add(needed).ok_or(())?, old_len, false)? };
115             let written = $convert(other, &mut handle.as_mut_slice()[old_len..]);
116             let new_len = old_len + written;
117             Ok(handle.finish(new_len, new_len > CACHE_LINE))
118         }
119     };
120 }
121 
122 /// A conversion where the number of code units in the output is always equal
123 /// to the number of code units in the input.
124 ///
125 /// Takes the name of the method to be generated, the name of the conversion
126 /// function and the type of the input slice.
127 ///
128 /// `$name` is the name of the function to generate
129 /// `$convert` is the underlying `encoding_rs::mem` function to use
130 /// `$other_ty` is the type of the input slice
131 macro_rules! constant_conversion {
132     (name = $name:ident,
133      convert = $convert:ident,
134      other_ty = $other_ty:ty) => {
135         fn $name(
136             &mut self,
137             other: $other_ty,
138             old_len: usize,
139             allow_shrinking: bool,
140         ) -> Result<BulkWriteOk, ()> {
141             let new_len = old_len.checked_add(other.len()).ok_or(())?;
142             let mut handle = unsafe { self.bulk_write(new_len, old_len, allow_shrinking)? };
143             $convert(other, &mut handle.as_mut_slice()[old_len..]);
144             Ok(handle.finish(new_len, false))
145         }
146     };
147 }
148 
149 /// An intermediate check for avoiding a copy and having an `nsStringBuffer`
150 /// refcount increment instead when both `self` and `other` are `nsACString`s,
151 /// `other` is entirely ASCII and all old data in `self` is discarded.
152 ///
153 /// `$name` is the name of the function to generate
154 /// `$impl` is the underlying conversion that takes a slice and that is used
155 ///         when we can't just adopt the incoming buffer as-is
156 /// `$string_like` is the kind of input taken
157 macro_rules! ascii_copy_avoidance {
158     (name = $name:ident,
159      implementation = $implementation:ident,
160      string_like = $string_like:ident) => {
161         fn $name<T: $string_like + ?Sized>(
162             &mut self,
163             other: &T,
164             old_len: usize,
165         ) -> Result<BulkWriteOk, ()> {
166             let adapter = other.adapt();
167             let other_slice = adapter.as_ref();
168             let num_ascii = if adapter.is_abstract() && old_len == 0 {
169                 let up_to = Encoding::ascii_valid_up_to(other_slice);
170                 if up_to == other_slice.len() {
171                     // Calling something whose argument can be obtained from
172                     // the adapter rather than an nsStringLike avoids a huge
173                     // lifetime mess by keeping nsStringLike and
174                     // Latin1StringLike free of lifetime interdependencies.
175                     if unsafe { Gecko_FallibleAssignCString(self, other.adapt().as_ptr()) } {
176                         return Ok(BulkWriteOk {});
177                     } else {
178                         return Err(());
179                     }
180                 }
181                 Some(up_to)
182             } else {
183                 None
184             };
185             self.$implementation(other_slice, old_len, num_ascii)
186         }
187     };
188 }
189 
190 impl nsAString {
191     // Valid UTF-8 to UTF-16
192 
193     // Documentation says the destination buffer needs to have
194     // as many code units as the input.
195     shrinking_conversion!(
196         name = fallible_append_str_impl,
197         convert = convert_str_to_utf16,
198         other_ty = &str,
199         math = identity
200     );
201 
202     /// Convert a valid UTF-8 string into valid UTF-16 and replace the content
203     /// of this string with the conversion result.
assign_str(&mut self, other: &str)204     pub fn assign_str(&mut self, other: &str) {
205         self.fallible_append_str_impl(other, 0)
206             .expect("Out of memory");
207     }
208 
209     /// Convert a valid UTF-8 string into valid UTF-16 and fallibly replace the
210     /// content of this string with the conversion result.
fallible_assign_str(&mut self, other: &str) -> Result<(), ()>211     pub fn fallible_assign_str(&mut self, other: &str) -> Result<(), ()> {
212         self.fallible_append_str_impl(other, 0).map(|_| ())
213     }
214 
215     /// Convert a valid UTF-8 string into valid UTF-16 and append the conversion
216     /// to this string.
append_str(&mut self, other: &str)217     pub fn append_str(&mut self, other: &str) {
218         let len = self.len();
219         self.fallible_append_str_impl(other, len)
220             .expect("Out of memory");
221     }
222 
223     /// Convert a valid UTF-8 string into valid UTF-16 and fallibly append the
224     /// conversion to this string.
fallible_append_str(&mut self, other: &str) -> Result<(), ()>225     pub fn fallible_append_str(&mut self, other: &str) -> Result<(), ()> {
226         let len = self.len();
227         self.fallible_append_str_impl(other, len).map(|_| ())
228     }
229 
230     // Potentially-invalid UTF-8 to UTF-16
231 
232     // Documentation says the destination buffer needs to have
233     // one more code unit than the input.
234     shrinking_conversion!(
235         name = fallible_append_utf8_impl,
236         convert = convert_utf8_to_utf16,
237         other_ty = &[u8],
238         math = plus_one
239     );
240 
241     /// Convert a potentially-invalid UTF-8 string into valid UTF-16
242     /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
243     /// replace the content of this string with the conversion result.
assign_utf8(&mut self, other: &[u8])244     pub fn assign_utf8(&mut self, other: &[u8]) {
245         self.fallible_append_utf8_impl(other, 0)
246             .expect("Out of memory");
247     }
248 
249     /// Convert a potentially-invalid UTF-8 string into valid UTF-16
250     /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
251     /// fallibly replace the content of this string with the conversion result.
fallible_assign_utf8(&mut self, other: &[u8]) -> Result<(), ()>252     pub fn fallible_assign_utf8(&mut self, other: &[u8]) -> Result<(), ()> {
253         self.fallible_append_utf8_impl(other, 0).map(|_| ())
254     }
255 
256     /// Convert a potentially-invalid UTF-8 string into valid UTF-16
257     /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
258     /// append the conversion result to this string.
append_utf8(&mut self, other: &[u8])259     pub fn append_utf8(&mut self, other: &[u8]) {
260         let len = self.len();
261         self.fallible_append_utf8_impl(other, len)
262             .expect("Out of memory");
263     }
264 
265     /// Convert a potentially-invalid UTF-8 string into valid UTF-16
266     /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
267     /// fallibly append the conversion result to this string.
fallible_append_utf8(&mut self, other: &[u8]) -> Result<(), ()>268     pub fn fallible_append_utf8(&mut self, other: &[u8]) -> Result<(), ()> {
269         let len = self.len();
270         self.fallible_append_utf8_impl(other, len).map(|_| ())
271     }
272 
273     // Latin1 to UTF-16
274 
275     constant_conversion!(
276         name = fallible_append_latin1_impl,
277         convert = convert_latin1_to_utf16,
278         other_ty = &[u8]
279     );
280 
281     /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
282     /// into UTF-16 and replace the content of this string with the conversion result.
assign_latin1(&mut self, other: &[u8])283     pub fn assign_latin1(&mut self, other: &[u8]) {
284         self.fallible_append_latin1_impl(other, 0, true)
285             .expect("Out of memory");
286     }
287 
288     /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
289     /// into UTF-16 and fallibly replace the content of this string with the
290     /// conversion result.
fallible_assign_latin1(&mut self, other: &[u8]) -> Result<(), ()>291     pub fn fallible_assign_latin1(&mut self, other: &[u8]) -> Result<(), ()> {
292         self.fallible_append_latin1_impl(other, 0, true).map(|_| ())
293     }
294 
295     /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
296     /// into UTF-16 and append the conversion result to this string.
append_latin1(&mut self, other: &[u8])297     pub fn append_latin1(&mut self, other: &[u8]) {
298         let len = self.len();
299         self.fallible_append_latin1_impl(other, len, false)
300             .expect("Out of memory");
301     }
302 
303     /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
304     /// into UTF-16 and fallibly append the conversion result to this string.
fallible_append_latin1(&mut self, other: &[u8]) -> Result<(), ()>305     pub fn fallible_append_latin1(&mut self, other: &[u8]) -> Result<(), ()> {
306         let len = self.len();
307         self.fallible_append_latin1_impl(other, len, false)
308             .map(|_| ())
309     }
310 }
311 
312 impl nsACString {
313     // UTF-16 to UTF-8
314 
fallible_append_utf16_to_utf8_impl( &mut self, other: &[u16], old_len: usize, ) -> Result<BulkWriteOk, ()>315     fn fallible_append_utf16_to_utf8_impl(
316         &mut self,
317         other: &[u16],
318         old_len: usize,
319     ) -> Result<BulkWriteOk, ()> {
320         // We first size the buffer for ASCII if the first two cache lines are ASCII. If that turns out
321         // not to be enough, we size for the worst case given the length of the remaining input at that
322         // point. BUT if the worst case fits inside the inline capacity of an autostring, we skip
323         // the ASCII stuff.
324         let worst_case_needed = if let Some(inline_capacity) = self.inline_capacity() {
325             let worst_case = times_three(other.len()).ok_or(())?;
326             if worst_case <= inline_capacity {
327                 Some(worst_case)
328             } else {
329                 None
330             }
331         } else {
332             None
333         };
334         let (filled, read, mut handle) =
335             if worst_case_needed.is_none() && long_string_stars_with_basic_latin(other) {
336                 let new_len_with_ascii = old_len.checked_add(other.len()).ok_or(())?;
337                 let mut handle = unsafe { self.bulk_write(new_len_with_ascii, old_len, false)? };
338                 let (read, written) =
339                     convert_utf16_to_utf8_partial(other, &mut handle.as_mut_slice()[old_len..]);
340                 let left = other.len() - read;
341                 if left == 0 {
342                     return Ok(handle.finish(old_len + written, true));
343                 }
344                 let filled = old_len + written;
345                 let needed = times_three(left).ok_or(())?;
346                 let new_len = filled.checked_add(needed).ok_or(())?;
347                 unsafe {
348                     handle.restart_bulk_write(new_len, filled, false)?;
349                 }
350                 (filled, read, handle)
351             } else {
352                 // Started with non-ASCII. Compute worst case
353                 let needed = if let Some(n) = worst_case_needed {
354                     n
355                 } else {
356                     times_three(other.len()).ok_or(())?
357                 };
358                 let new_len = old_len.checked_add(needed).ok_or(())?;
359                 let handle = unsafe { self.bulk_write(new_len, old_len, false)? };
360                 (old_len, 0, handle)
361             };
362         let written = convert_utf16_to_utf8(&other[read..], &mut handle.as_mut_slice()[filled..]);
363         Ok(handle.finish(filled + written, true))
364     }
365 
366     /// Convert a potentially-invalid UTF-16 string into valid UTF-8
367     /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
368     /// replace the content of this string with the conversion result.
assign_utf16_to_utf8(&mut self, other: &[u16])369     pub fn assign_utf16_to_utf8(&mut self, other: &[u16]) {
370         self.fallible_append_utf16_to_utf8_impl(other, 0)
371             .expect("Out of memory");
372     }
373 
374     /// Convert a potentially-invalid UTF-16 string into valid UTF-8
375     /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
376     /// fallibly replace the content of this string with the conversion result.
fallible_assign_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()>377     pub fn fallible_assign_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> {
378         self.fallible_append_utf16_to_utf8_impl(other, 0)
379             .map(|_| ())
380     }
381 
382     /// Convert a potentially-invalid UTF-16 string into valid UTF-8
383     /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
384     /// append the conversion result to this string.
append_utf16_to_utf8(&mut self, other: &[u16])385     pub fn append_utf16_to_utf8(&mut self, other: &[u16]) {
386         let len = self.len();
387         self.fallible_append_utf16_to_utf8_impl(other, len)
388             .expect("Out of memory");
389     }
390 
391     /// Convert a potentially-invalid UTF-16 string into valid UTF-8
392     /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
393     /// fallibly append the conversion result to this string.
fallible_append_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()>394     pub fn fallible_append_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> {
395         let len = self.len();
396         self.fallible_append_utf16_to_utf8_impl(other, len)
397             .map(|_| ())
398     }
399 
400     // UTF-16 to Latin1
401 
402     constant_conversion!(
403         name = fallible_append_utf16_to_latin1_lossy_impl,
404         convert = convert_utf16_to_latin1_lossy,
405         other_ty = &[u16]
406     );
407 
408     /// Convert a UTF-16 string whose all code points are below U+0100 into
409     /// a Latin1 (scalar value is byte value; not windows-1252!) string and
410     /// replace the content of this string with the conversion result.
411     ///
412     /// # Panics
413     ///
414     /// If the input contains code points above U+00FF or is not valid UTF-16,
415     /// panics in debug mode and produces garbage in a memory-safe way in
416     /// release builds. The nature of the garbage may differ based on CPU
417     /// architecture and must not be relied upon.
assign_utf16_to_latin1_lossy(&mut self, other: &[u16])418     pub fn assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) {
419         self.fallible_append_utf16_to_latin1_lossy_impl(other, 0, true)
420             .expect("Out of memory");
421     }
422 
423     /// Convert a UTF-16 string whose all code points are below U+0100 into
424     /// a Latin1 (scalar value is byte value; not windows-1252!) string and
425     /// fallibly replace the content of this string with the conversion result.
426     ///
427     /// # Panics
428     ///
429     /// If the input contains code points above U+00FF or is not valid UTF-16,
430     /// panics in debug mode and produces garbage in a memory-safe way in
431     /// release builds. The nature of the garbage may differ based on CPU
432     /// architecture and must not be relied upon.
fallible_assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()>433     pub fn fallible_assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> {
434         self.fallible_append_utf16_to_latin1_lossy_impl(other, 0, true)
435             .map(|_| ())
436     }
437 
438     /// Convert a UTF-16 string whose all code points are below U+0100 into
439     /// a Latin1 (scalar value is byte value; not windows-1252!) string and
440     /// append the conversion result to this string.
441     ///
442     /// # Panics
443     ///
444     /// If the input contains code points above U+00FF or is not valid UTF-16,
445     /// panics in debug mode and produces garbage in a memory-safe way in
446     /// release builds. The nature of the garbage may differ based on CPU
447     /// architecture and must not be relied upon.
append_utf16_to_latin1_lossy(&mut self, other: &[u16])448     pub fn append_utf16_to_latin1_lossy(&mut self, other: &[u16]) {
449         let len = self.len();
450         self.fallible_append_utf16_to_latin1_lossy_impl(other, len, false)
451             .expect("Out of memory");
452     }
453 
454     /// Convert a UTF-16 string whose all code points are below U+0100 into
455     /// a Latin1 (scalar value is byte value; not windows-1252!) string and
456     /// fallibly append the conversion result to this string.
457     ///
458     /// # Panics
459     ///
460     /// If the input contains code points above U+00FF or is not valid UTF-16,
461     /// panics in debug mode and produces garbage in a memory-safe way in
462     /// release builds. The nature of the garbage may differ based on CPU
463     /// architecture and must not be relied upon.
fallible_append_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()>464     pub fn fallible_append_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> {
465         let len = self.len();
466         self.fallible_append_utf16_to_latin1_lossy_impl(other, len, false)
467             .map(|_| ())
468     }
469 
470     // UTF-8 to Latin1
471 
472     ascii_copy_avoidance!(
473         name = fallible_append_utf8_to_latin1_lossy_check,
474         implementation = fallible_append_utf8_to_latin1_lossy_impl,
475         string_like = nsCStringLike
476     );
477 
fallible_append_utf8_to_latin1_lossy_impl( &mut self, other: &[u8], old_len: usize, maybe_num_ascii: Option<usize>, ) -> Result<BulkWriteOk, ()>478     fn fallible_append_utf8_to_latin1_lossy_impl(
479         &mut self,
480         other: &[u8],
481         old_len: usize,
482         maybe_num_ascii: Option<usize>,
483     ) -> Result<BulkWriteOk, ()> {
484         let new_len = old_len.checked_add(other.len()).ok_or(())?;
485         let num_ascii = maybe_num_ascii.unwrap_or(0);
486         // Already checked for overflow above, so this can't overflow.
487         let old_len_plus_num_ascii = old_len + num_ascii;
488         let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
489         let written = {
490             let buffer = handle.as_mut_slice();
491             if num_ascii != 0 {
492                 (&mut buffer[old_len..old_len_plus_num_ascii]).copy_from_slice(&other[..num_ascii]);
493             }
494             convert_utf8_to_latin1_lossy(&other[num_ascii..], &mut buffer[old_len_plus_num_ascii..])
495         };
496         Ok(handle.finish(old_len_plus_num_ascii + written, true))
497     }
498 
499     /// Convert a UTF-8 string whose all code points are below U+0100 into
500     /// a Latin1 (scalar value is byte value; not windows-1252!) string and
501     /// replace the content of this string with the conversion result.
502     ///
503     /// # Panics
504     ///
505     /// If the input contains code points above U+00FF or is not valid UTF-8,
506     /// panics in debug mode and produces garbage in a memory-safe way in
507     /// release builds. The nature of the garbage may differ based on CPU
508     /// architecture and must not be relied upon.
assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T)509     pub fn assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
510         self.fallible_append_utf8_to_latin1_lossy_check(other, 0)
511             .expect("Out of memory");
512     }
513 
514     /// Convert a UTF-8 string whose all code points are below U+0100 into
515     /// a Latin1 (scalar value is byte value; not windows-1252!) string and
516     /// fallibly replace the content of this string with the conversion result.
517     ///
518     /// # Panics
519     ///
520     /// If the input contains code points above U+00FF or is not valid UTF-8,
521     /// panics in debug mode and produces garbage in a memory-safe way in
522     /// release builds. The nature of the garbage may differ based on CPU
523     /// architecture and must not be relied upon.
fallible_assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>( &mut self, other: &T, ) -> Result<(), ()>524     pub fn fallible_assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(
525         &mut self,
526         other: &T,
527     ) -> Result<(), ()> {
528         self.fallible_append_utf8_to_latin1_lossy_check(other, 0)
529             .map(|_| ())
530     }
531 
532     /// Convert a UTF-8 string whose all code points are below U+0100 into
533     /// a Latin1 (scalar value is byte value; not windows-1252!) string and
534     /// append the conversion result to this string.
535     ///
536     /// # Panics
537     ///
538     /// If the input contains code points above U+00FF or is not valid UTF-8,
539     /// panics in debug mode and produces garbage in a memory-safe way in
540     /// release builds. The nature of the garbage may differ based on CPU
541     /// architecture and must not be relied upon.
append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T)542     pub fn append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
543         let len = self.len();
544         self.fallible_append_utf8_to_latin1_lossy_check(other, len)
545             .expect("Out of memory");
546     }
547 
548     /// Convert a UTF-8 string whose all code points are below U+0100 into
549     /// a Latin1 (scalar value is byte value; not windows-1252!) string and
550     /// fallibly append the conversion result to this string.
551     ///
552     /// # Panics
553     ///
554     /// If the input contains code points above U+00FF or is not valid UTF-8,
555     /// panics in debug mode and produces garbage in a memory-safe way in
556     /// release builds. The nature of the garbage may differ based on CPU
557     /// architecture and must not be relied upon.
fallible_append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>( &mut self, other: &T, ) -> Result<(), ()>558     pub fn fallible_append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(
559         &mut self,
560         other: &T,
561     ) -> Result<(), ()> {
562         let len = self.len();
563         self.fallible_append_utf8_to_latin1_lossy_check(other, len)
564             .map(|_| ())
565     }
566 
567     // Latin1 to UTF-8 CString
568 
569     ascii_copy_avoidance!(
570         name = fallible_append_latin1_to_utf8_check,
571         implementation = fallible_append_latin1_to_utf8_impl,
572         string_like = Latin1StringLike
573     );
574 
fallible_append_latin1_to_utf8_impl( &mut self, other: &[u8], old_len: usize, maybe_num_ascii: Option<usize>, ) -> Result<BulkWriteOk, ()>575     fn fallible_append_latin1_to_utf8_impl(
576         &mut self,
577         other: &[u8],
578         old_len: usize,
579         maybe_num_ascii: Option<usize>,
580     ) -> Result<BulkWriteOk, ()> {
581         let (filled, read, mut handle) = if let Some(num_ascii) = maybe_num_ascii {
582             // Wrapper checked for ASCII
583             let left = other.len() - num_ascii;
584             let filled = old_len + num_ascii;
585             let needed = left.checked_mul(2).ok_or(())?;
586             let new_len = filled.checked_add(needed).ok_or(())?;
587             let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
588             if num_ascii != 0 {
589                 (&mut handle.as_mut_slice()[old_len..filled]).copy_from_slice(&other[..num_ascii]);
590             }
591             (filled, num_ascii, handle)
592         } else {
593             let worst_case_needed = if let Some(inline_capacity) = self.inline_capacity() {
594                 let worst_case = other.len().checked_mul(2).ok_or(())?;
595                 if worst_case <= inline_capacity {
596                     Some(worst_case)
597                 } else {
598                     None
599                 }
600             } else {
601                 None
602             };
603             if worst_case_needed.is_none() && long_string_starts_with_ascii(other) {
604                 // Wrapper didn't check for ASCII, so let's see if `other` starts with ASCII
605                 // `other` starts with ASCII, so let's first size the buffer
606                 // with optimism that it's ASCII-only.
607                 let new_len_with_ascii = old_len.checked_add(other.len()).ok_or(())?;
608                 let mut handle = unsafe { self.bulk_write(new_len_with_ascii, old_len, false)? };
609                 let (read, written) =
610                     convert_latin1_to_utf8_partial(other, &mut handle.as_mut_slice()[old_len..]);
611                 let left = other.len() - read;
612                 let filled = old_len + written;
613                 if left == 0 {
614                     // `other` fit in the initial allocation
615                     return Ok(handle.finish(filled, true));
616                 }
617                 let needed = left.checked_mul(2).ok_or(())?;
618                 let new_len = filled.checked_add(needed).ok_or(())?;
619                 unsafe {
620                     handle.restart_bulk_write(new_len, filled, false)?;
621                 }
622                 (filled, read, handle)
623             } else {
624                 // Started with non-ASCII. Assume worst case.
625                 let needed = if let Some(n) = worst_case_needed {
626                     n
627                 } else {
628                     other.len().checked_mul(2).ok_or(())?
629                 };
630                 let new_len = old_len.checked_add(needed).ok_or(())?;
631                 let handle = unsafe { self.bulk_write(new_len, old_len, false)? };
632                 (old_len, 0, handle)
633             }
634         };
635         let written = convert_latin1_to_utf8(&other[read..], &mut handle.as_mut_slice()[filled..]);
636         Ok(handle.finish(filled + written, true))
637     }
638 
639     /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
640     /// into UTF-8 and replace the content of this string with the conversion result.
assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T)641     pub fn assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) {
642         self.fallible_append_latin1_to_utf8_check(other, 0)
643             .expect("Out of memory");
644     }
645 
646     /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
647     /// into UTF-8 and fallibly replace the content of this string with the
648     /// conversion result.
fallible_assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>( &mut self, other: &T, ) -> Result<(), ()>649     pub fn fallible_assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(
650         &mut self,
651         other: &T,
652     ) -> Result<(), ()> {
653         self.fallible_append_latin1_to_utf8_check(other, 0)
654             .map(|_| ())
655     }
656 
657     /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
658     /// into UTF-8 and append the conversion result to this string.
append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T)659     pub fn append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) {
660         let len = self.len();
661         self.fallible_append_latin1_to_utf8_check(other, len)
662             .expect("Out of memory");
663     }
664 
665     /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
666     /// into UTF-8 and fallibly append the conversion result to this string.
fallible_append_latin1_to_utf8<T: Latin1StringLike + ?Sized>( &mut self, other: &T, ) -> Result<(), ()>667     pub fn fallible_append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(
668         &mut self,
669         other: &T,
670     ) -> Result<(), ()> {
671         let len = self.len();
672         self.fallible_append_latin1_to_utf8_check(other, len)
673             .map(|_| ())
674     }
675 }
676 
677 #[no_mangle]
nsstring_fallible_append_utf8_impl( this: *mut nsAString, other: *const u8, other_len: usize, old_len: usize, ) -> bool678 pub unsafe extern "C" fn nsstring_fallible_append_utf8_impl(
679     this: *mut nsAString,
680     other: *const u8,
681     other_len: usize,
682     old_len: usize,
683 ) -> bool {
684     let other_slice = slice::from_raw_parts(other, other_len);
685     (*this)
686         .fallible_append_utf8_impl(other_slice, old_len)
687         .is_ok()
688 }
689 
690 #[no_mangle]
nsstring_fallible_append_latin1_impl( this: *mut nsAString, other: *const u8, other_len: usize, old_len: usize, allow_shrinking: bool, ) -> bool691 pub unsafe extern "C" fn nsstring_fallible_append_latin1_impl(
692     this: *mut nsAString,
693     other: *const u8,
694     other_len: usize,
695     old_len: usize,
696     allow_shrinking: bool,
697 ) -> bool {
698     let other_slice = slice::from_raw_parts(other, other_len);
699     (*this)
700         .fallible_append_latin1_impl(other_slice, old_len, allow_shrinking)
701         .is_ok()
702 }
703 
704 #[no_mangle]
nscstring_fallible_append_utf16_to_utf8_impl( this: *mut nsACString, other: *const u16, other_len: usize, old_len: usize, ) -> bool705 pub unsafe extern "C" fn nscstring_fallible_append_utf16_to_utf8_impl(
706     this: *mut nsACString,
707     other: *const u16,
708     other_len: usize,
709     old_len: usize,
710 ) -> bool {
711     let other_slice = slice::from_raw_parts(other, other_len);
712     (*this)
713         .fallible_append_utf16_to_utf8_impl(other_slice, old_len)
714         .is_ok()
715 }
716 
717 #[no_mangle]
nscstring_fallible_append_utf16_to_latin1_lossy_impl( this: *mut nsACString, other: *const u16, other_len: usize, old_len: usize, allow_shrinking: bool, ) -> bool718 pub unsafe extern "C" fn nscstring_fallible_append_utf16_to_latin1_lossy_impl(
719     this: *mut nsACString,
720     other: *const u16,
721     other_len: usize,
722     old_len: usize,
723     allow_shrinking: bool,
724 ) -> bool {
725     let other_slice = slice::from_raw_parts(other, other_len);
726     (*this)
727         .fallible_append_utf16_to_latin1_lossy_impl(other_slice, old_len, allow_shrinking)
728         .is_ok()
729 }
730 
731 #[no_mangle]
nscstring_fallible_append_utf8_to_latin1_lossy_check( this: *mut nsACString, other: *const nsACString, old_len: usize, ) -> bool732 pub unsafe extern "C" fn nscstring_fallible_append_utf8_to_latin1_lossy_check(
733     this: *mut nsACString,
734     other: *const nsACString,
735     old_len: usize,
736 ) -> bool {
737     (*this)
738         .fallible_append_utf8_to_latin1_lossy_check(&*other, old_len)
739         .is_ok()
740 }
741 
742 #[no_mangle]
nscstring_fallible_append_latin1_to_utf8_check( this: *mut nsACString, other: *const nsACString, old_len: usize, ) -> bool743 pub unsafe extern "C" fn nscstring_fallible_append_latin1_to_utf8_check(
744     this: *mut nsACString,
745     other: *const nsACString,
746     old_len: usize,
747 ) -> bool {
748     (*this)
749         .fallible_append_latin1_to_utf8_check(&*other, old_len)
750         .is_ok()
751 }
752