1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5 use crate::{
6 nsACString, nsAString, nsCStringLike, BulkWriteOk, Gecko_FallibleAssignCString,
7 Latin1StringLike,
8 };
9 use encoding_rs::mem::*;
10 use encoding_rs::Encoding;
11 use std::slice;
12
13 /// Required math stated in the docs of
14 /// `convert_utf16_to_utf8()`.
15 #[inline(always)]
times_three(a: usize) -> Option<usize>16 fn times_three(a: usize) -> Option<usize> {
17 a.checked_mul(3)
18 }
19
20 #[inline(always)]
identity(a: usize) -> Option<usize>21 fn identity(a: usize) -> Option<usize> {
22 Some(a)
23 }
24
25 #[inline(always)]
plus_one(a: usize) -> Option<usize>26 fn plus_one(a: usize) -> Option<usize> {
27 a.checked_add(1)
28 }
29
30 /// Typical cache line size per
31 /// https://stackoverflow.com/questions/14707803/line-size-of-l1-and-l2-caches
32 ///
33 /// For consistent behavior, not trying to use 128 on aarch64
34 /// or other fanciness like that.
35 const CACHE_LINE: usize = 64;
36
37 const CACHE_LINE_MASK: usize = CACHE_LINE - 1;
38
39 /// Returns true if the string is both longer than a cache line
40 /// and the first cache line is ASCII.
41 #[inline(always)]
long_string_starts_with_ascii(buffer: &[u8]) -> bool42 fn long_string_starts_with_ascii(buffer: &[u8]) -> bool {
43 // We examine data only up to the end of the cache line
44 // to make this check minimally disruptive.
45 if buffer.len() <= CACHE_LINE {
46 return false;
47 }
48 let bound = CACHE_LINE - ((buffer.as_ptr() as usize) & CACHE_LINE_MASK);
49 is_ascii(&buffer[..bound])
50 }
51
52 /// Returns true if the string is both longer than two cache lines
53 /// and the first two cache lines are Basic Latin.
54 #[inline(always)]
long_string_stars_with_basic_latin(buffer: &[u16]) -> bool55 fn long_string_stars_with_basic_latin(buffer: &[u16]) -> bool {
56 // We look at two cache lines with code unit size of two. There is need
57 // to look at more than one cache line in the UTF-16 case, because looking
58 // at just one cache line wouldn't catch non-ASCII Latin with high enough
59 // probability with Latin-script languages that have relatively infrequent
60 // non-ASCII characters.
61 if buffer.len() <= CACHE_LINE {
62 return false;
63 }
64 let bound = (CACHE_LINE * 2 - ((buffer.as_ptr() as usize) & CACHE_LINE_MASK)) / 2;
65 is_basic_latin(&buffer[..bound])
66 }
67
68 // Ignoring the copy avoidance complications of conversions between Latin1 and
69 // UTF-8, a conversion function has the outward form of
70 // `fn F(&mut self, other: &[T], old_len: usize) -> Result<BulkWriteOk, ()>`,
71 // where `T` is either `u8` or `u16`. `other` is the slice whose converted
72 // content are to be appended to `self` and `old_len` indicates how many
73 // code unit of `self` are to be preserved (0 for the assignment case and
74 // `self.len()` for the appending case).
75 //
76 // As implementation parameters a conversion function needs to know the
77 // math for computing the worst case conversion length in code units given
78 // the input length in code units. For a _constant conversion_ the number
79 // of code units the conversion produces equals the number of code units
80 // in the input. For a _shinking conversion_ the maximum number of code
81 // units the conversion can produce equals the number of code units in
82 // the input, but the conversion can produce fewer code units. Still, due
83 // to implementation details, the function might want _one_ unit more of
84 // output space. For an _expanding conversion_ (no need for macro), the
85 // minimum number of code units produced by the conversion is the number
86 // of code units in the input, but the conversion can produce more.
87 //
88 // Copy avoidance conversions avoid copying a refcounted buffer when it's
89 // ASCII-only.
90 //
91 // Internally, a conversion function needs to know the underlying
92 // encoding_rs conversion function, the math for computing the required
93 // output buffer size and, depending on the case, the underlying
94 // encoding_rs ASCII prefix handling function.
95
96 /// A conversion where the number of code units in the output is potentially
97 /// smaller than the number of code units in the input.
98 ///
99 /// Takes the name of the method to be generated, the name of the conversion
100 /// function and the type of the input slice.
101 ///
102 /// `$name` is the name of the function to generate
103 /// `$convert` is the underlying `encoding_rs::mem` function to use
104 /// `$other_ty` is the type of the input slice
105 /// `$math` is the worst-case length math that `$convert` expects
106 macro_rules! shrinking_conversion {
107 (name = $name:ident,
108 convert = $convert:ident,
109 other_ty = $other_ty:ty,
110 math = $math:ident) => {
111 fn $name(&mut self, other: $other_ty, old_len: usize) -> Result<BulkWriteOk, ()> {
112 let needed = $math(other.len()).ok_or(())?;
113 let mut handle =
114 unsafe { self.bulk_write(old_len.checked_add(needed).ok_or(())?, old_len, false)? };
115 let written = $convert(other, &mut handle.as_mut_slice()[old_len..]);
116 let new_len = old_len + written;
117 Ok(handle.finish(new_len, new_len > CACHE_LINE))
118 }
119 };
120 }
121
122 /// A conversion where the number of code units in the output is always equal
123 /// to the number of code units in the input.
124 ///
125 /// Takes the name of the method to be generated, the name of the conversion
126 /// function and the type of the input slice.
127 ///
128 /// `$name` is the name of the function to generate
129 /// `$convert` is the underlying `encoding_rs::mem` function to use
130 /// `$other_ty` is the type of the input slice
131 macro_rules! constant_conversion {
132 (name = $name:ident,
133 convert = $convert:ident,
134 other_ty = $other_ty:ty) => {
135 fn $name(
136 &mut self,
137 other: $other_ty,
138 old_len: usize,
139 allow_shrinking: bool,
140 ) -> Result<BulkWriteOk, ()> {
141 let new_len = old_len.checked_add(other.len()).ok_or(())?;
142 let mut handle = unsafe { self.bulk_write(new_len, old_len, allow_shrinking)? };
143 $convert(other, &mut handle.as_mut_slice()[old_len..]);
144 Ok(handle.finish(new_len, false))
145 }
146 };
147 }
148
149 /// An intermediate check for avoiding a copy and having an `nsStringBuffer`
150 /// refcount increment instead when both `self` and `other` are `nsACString`s,
151 /// `other` is entirely ASCII and all old data in `self` is discarded.
152 ///
153 /// `$name` is the name of the function to generate
154 /// `$impl` is the underlying conversion that takes a slice and that is used
155 /// when we can't just adopt the incoming buffer as-is
156 /// `$string_like` is the kind of input taken
157 macro_rules! ascii_copy_avoidance {
158 (name = $name:ident,
159 implementation = $implementation:ident,
160 string_like = $string_like:ident) => {
161 fn $name<T: $string_like + ?Sized>(
162 &mut self,
163 other: &T,
164 old_len: usize,
165 ) -> Result<BulkWriteOk, ()> {
166 let adapter = other.adapt();
167 let other_slice = adapter.as_ref();
168 let num_ascii = if adapter.is_abstract() && old_len == 0 {
169 let up_to = Encoding::ascii_valid_up_to(other_slice);
170 if up_to == other_slice.len() {
171 // Calling something whose argument can be obtained from
172 // the adapter rather than an nsStringLike avoids a huge
173 // lifetime mess by keeping nsStringLike and
174 // Latin1StringLike free of lifetime interdependencies.
175 if unsafe { Gecko_FallibleAssignCString(self, other.adapt().as_ptr()) } {
176 return Ok(BulkWriteOk {});
177 } else {
178 return Err(());
179 }
180 }
181 Some(up_to)
182 } else {
183 None
184 };
185 self.$implementation(other_slice, old_len, num_ascii)
186 }
187 };
188 }
189
190 impl nsAString {
191 // Valid UTF-8 to UTF-16
192
193 // Documentation says the destination buffer needs to have
194 // as many code units as the input.
195 shrinking_conversion!(
196 name = fallible_append_str_impl,
197 convert = convert_str_to_utf16,
198 other_ty = &str,
199 math = identity
200 );
201
202 /// Convert a valid UTF-8 string into valid UTF-16 and replace the content
203 /// of this string with the conversion result.
assign_str(&mut self, other: &str)204 pub fn assign_str(&mut self, other: &str) {
205 self.fallible_append_str_impl(other, 0)
206 .expect("Out of memory");
207 }
208
209 /// Convert a valid UTF-8 string into valid UTF-16 and fallibly replace the
210 /// content of this string with the conversion result.
fallible_assign_str(&mut self, other: &str) -> Result<(), ()>211 pub fn fallible_assign_str(&mut self, other: &str) -> Result<(), ()> {
212 self.fallible_append_str_impl(other, 0).map(|_| ())
213 }
214
215 /// Convert a valid UTF-8 string into valid UTF-16 and append the conversion
216 /// to this string.
append_str(&mut self, other: &str)217 pub fn append_str(&mut self, other: &str) {
218 let len = self.len();
219 self.fallible_append_str_impl(other, len)
220 .expect("Out of memory");
221 }
222
223 /// Convert a valid UTF-8 string into valid UTF-16 and fallibly append the
224 /// conversion to this string.
fallible_append_str(&mut self, other: &str) -> Result<(), ()>225 pub fn fallible_append_str(&mut self, other: &str) -> Result<(), ()> {
226 let len = self.len();
227 self.fallible_append_str_impl(other, len).map(|_| ())
228 }
229
230 // Potentially-invalid UTF-8 to UTF-16
231
232 // Documentation says the destination buffer needs to have
233 // one more code unit than the input.
234 shrinking_conversion!(
235 name = fallible_append_utf8_impl,
236 convert = convert_utf8_to_utf16,
237 other_ty = &[u8],
238 math = plus_one
239 );
240
241 /// Convert a potentially-invalid UTF-8 string into valid UTF-16
242 /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
243 /// replace the content of this string with the conversion result.
assign_utf8(&mut self, other: &[u8])244 pub fn assign_utf8(&mut self, other: &[u8]) {
245 self.fallible_append_utf8_impl(other, 0)
246 .expect("Out of memory");
247 }
248
249 /// Convert a potentially-invalid UTF-8 string into valid UTF-16
250 /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
251 /// fallibly replace the content of this string with the conversion result.
fallible_assign_utf8(&mut self, other: &[u8]) -> Result<(), ()>252 pub fn fallible_assign_utf8(&mut self, other: &[u8]) -> Result<(), ()> {
253 self.fallible_append_utf8_impl(other, 0).map(|_| ())
254 }
255
256 /// Convert a potentially-invalid UTF-8 string into valid UTF-16
257 /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
258 /// append the conversion result to this string.
append_utf8(&mut self, other: &[u8])259 pub fn append_utf8(&mut self, other: &[u8]) {
260 let len = self.len();
261 self.fallible_append_utf8_impl(other, len)
262 .expect("Out of memory");
263 }
264
265 /// Convert a potentially-invalid UTF-8 string into valid UTF-16
266 /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
267 /// fallibly append the conversion result to this string.
fallible_append_utf8(&mut self, other: &[u8]) -> Result<(), ()>268 pub fn fallible_append_utf8(&mut self, other: &[u8]) -> Result<(), ()> {
269 let len = self.len();
270 self.fallible_append_utf8_impl(other, len).map(|_| ())
271 }
272
273 // Latin1 to UTF-16
274
275 constant_conversion!(
276 name = fallible_append_latin1_impl,
277 convert = convert_latin1_to_utf16,
278 other_ty = &[u8]
279 );
280
281 /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
282 /// into UTF-16 and replace the content of this string with the conversion result.
assign_latin1(&mut self, other: &[u8])283 pub fn assign_latin1(&mut self, other: &[u8]) {
284 self.fallible_append_latin1_impl(other, 0, true)
285 .expect("Out of memory");
286 }
287
288 /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
289 /// into UTF-16 and fallibly replace the content of this string with the
290 /// conversion result.
fallible_assign_latin1(&mut self, other: &[u8]) -> Result<(), ()>291 pub fn fallible_assign_latin1(&mut self, other: &[u8]) -> Result<(), ()> {
292 self.fallible_append_latin1_impl(other, 0, true).map(|_| ())
293 }
294
295 /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
296 /// into UTF-16 and append the conversion result to this string.
append_latin1(&mut self, other: &[u8])297 pub fn append_latin1(&mut self, other: &[u8]) {
298 let len = self.len();
299 self.fallible_append_latin1_impl(other, len, false)
300 .expect("Out of memory");
301 }
302
303 /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
304 /// into UTF-16 and fallibly append the conversion result to this string.
fallible_append_latin1(&mut self, other: &[u8]) -> Result<(), ()>305 pub fn fallible_append_latin1(&mut self, other: &[u8]) -> Result<(), ()> {
306 let len = self.len();
307 self.fallible_append_latin1_impl(other, len, false)
308 .map(|_| ())
309 }
310 }
311
312 impl nsACString {
313 // UTF-16 to UTF-8
314
fallible_append_utf16_to_utf8_impl( &mut self, other: &[u16], old_len: usize, ) -> Result<BulkWriteOk, ()>315 fn fallible_append_utf16_to_utf8_impl(
316 &mut self,
317 other: &[u16],
318 old_len: usize,
319 ) -> Result<BulkWriteOk, ()> {
320 // We first size the buffer for ASCII if the first two cache lines are ASCII. If that turns out
321 // not to be enough, we size for the worst case given the length of the remaining input at that
322 // point. BUT if the worst case fits inside the inline capacity of an autostring, we skip
323 // the ASCII stuff.
324 let worst_case_needed = if let Some(inline_capacity) = self.inline_capacity() {
325 let worst_case = times_three(other.len()).ok_or(())?;
326 if worst_case <= inline_capacity {
327 Some(worst_case)
328 } else {
329 None
330 }
331 } else {
332 None
333 };
334 let (filled, read, mut handle) =
335 if worst_case_needed.is_none() && long_string_stars_with_basic_latin(other) {
336 let new_len_with_ascii = old_len.checked_add(other.len()).ok_or(())?;
337 let mut handle = unsafe { self.bulk_write(new_len_with_ascii, old_len, false)? };
338 let (read, written) =
339 convert_utf16_to_utf8_partial(other, &mut handle.as_mut_slice()[old_len..]);
340 let left = other.len() - read;
341 if left == 0 {
342 return Ok(handle.finish(old_len + written, true));
343 }
344 let filled = old_len + written;
345 let needed = times_three(left).ok_or(())?;
346 let new_len = filled.checked_add(needed).ok_or(())?;
347 unsafe {
348 handle.restart_bulk_write(new_len, filled, false)?;
349 }
350 (filled, read, handle)
351 } else {
352 // Started with non-ASCII. Compute worst case
353 let needed = if let Some(n) = worst_case_needed {
354 n
355 } else {
356 times_three(other.len()).ok_or(())?
357 };
358 let new_len = old_len.checked_add(needed).ok_or(())?;
359 let handle = unsafe { self.bulk_write(new_len, old_len, false)? };
360 (old_len, 0, handle)
361 };
362 let written = convert_utf16_to_utf8(&other[read..], &mut handle.as_mut_slice()[filled..]);
363 Ok(handle.finish(filled + written, true))
364 }
365
366 /// Convert a potentially-invalid UTF-16 string into valid UTF-8
367 /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
368 /// replace the content of this string with the conversion result.
assign_utf16_to_utf8(&mut self, other: &[u16])369 pub fn assign_utf16_to_utf8(&mut self, other: &[u16]) {
370 self.fallible_append_utf16_to_utf8_impl(other, 0)
371 .expect("Out of memory");
372 }
373
374 /// Convert a potentially-invalid UTF-16 string into valid UTF-8
375 /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
376 /// fallibly replace the content of this string with the conversion result.
fallible_assign_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()>377 pub fn fallible_assign_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> {
378 self.fallible_append_utf16_to_utf8_impl(other, 0)
379 .map(|_| ())
380 }
381
382 /// Convert a potentially-invalid UTF-16 string into valid UTF-8
383 /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
384 /// append the conversion result to this string.
append_utf16_to_utf8(&mut self, other: &[u16])385 pub fn append_utf16_to_utf8(&mut self, other: &[u16]) {
386 let len = self.len();
387 self.fallible_append_utf16_to_utf8_impl(other, len)
388 .expect("Out of memory");
389 }
390
391 /// Convert a potentially-invalid UTF-16 string into valid UTF-8
392 /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
393 /// fallibly append the conversion result to this string.
fallible_append_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()>394 pub fn fallible_append_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> {
395 let len = self.len();
396 self.fallible_append_utf16_to_utf8_impl(other, len)
397 .map(|_| ())
398 }
399
400 // UTF-16 to Latin1
401
402 constant_conversion!(
403 name = fallible_append_utf16_to_latin1_lossy_impl,
404 convert = convert_utf16_to_latin1_lossy,
405 other_ty = &[u16]
406 );
407
408 /// Convert a UTF-16 string whose all code points are below U+0100 into
409 /// a Latin1 (scalar value is byte value; not windows-1252!) string and
410 /// replace the content of this string with the conversion result.
411 ///
412 /// # Panics
413 ///
414 /// If the input contains code points above U+00FF or is not valid UTF-16,
415 /// panics in debug mode and produces garbage in a memory-safe way in
416 /// release builds. The nature of the garbage may differ based on CPU
417 /// architecture and must not be relied upon.
assign_utf16_to_latin1_lossy(&mut self, other: &[u16])418 pub fn assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) {
419 self.fallible_append_utf16_to_latin1_lossy_impl(other, 0, true)
420 .expect("Out of memory");
421 }
422
423 /// Convert a UTF-16 string whose all code points are below U+0100 into
424 /// a Latin1 (scalar value is byte value; not windows-1252!) string and
425 /// fallibly replace the content of this string with the conversion result.
426 ///
427 /// # Panics
428 ///
429 /// If the input contains code points above U+00FF or is not valid UTF-16,
430 /// panics in debug mode and produces garbage in a memory-safe way in
431 /// release builds. The nature of the garbage may differ based on CPU
432 /// architecture and must not be relied upon.
fallible_assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()>433 pub fn fallible_assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> {
434 self.fallible_append_utf16_to_latin1_lossy_impl(other, 0, true)
435 .map(|_| ())
436 }
437
438 /// Convert a UTF-16 string whose all code points are below U+0100 into
439 /// a Latin1 (scalar value is byte value; not windows-1252!) string and
440 /// append the conversion result to this string.
441 ///
442 /// # Panics
443 ///
444 /// If the input contains code points above U+00FF or is not valid UTF-16,
445 /// panics in debug mode and produces garbage in a memory-safe way in
446 /// release builds. The nature of the garbage may differ based on CPU
447 /// architecture and must not be relied upon.
append_utf16_to_latin1_lossy(&mut self, other: &[u16])448 pub fn append_utf16_to_latin1_lossy(&mut self, other: &[u16]) {
449 let len = self.len();
450 self.fallible_append_utf16_to_latin1_lossy_impl(other, len, false)
451 .expect("Out of memory");
452 }
453
454 /// Convert a UTF-16 string whose all code points are below U+0100 into
455 /// a Latin1 (scalar value is byte value; not windows-1252!) string and
456 /// fallibly append the conversion result to this string.
457 ///
458 /// # Panics
459 ///
460 /// If the input contains code points above U+00FF or is not valid UTF-16,
461 /// panics in debug mode and produces garbage in a memory-safe way in
462 /// release builds. The nature of the garbage may differ based on CPU
463 /// architecture and must not be relied upon.
fallible_append_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()>464 pub fn fallible_append_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> {
465 let len = self.len();
466 self.fallible_append_utf16_to_latin1_lossy_impl(other, len, false)
467 .map(|_| ())
468 }
469
470 // UTF-8 to Latin1
471
472 ascii_copy_avoidance!(
473 name = fallible_append_utf8_to_latin1_lossy_check,
474 implementation = fallible_append_utf8_to_latin1_lossy_impl,
475 string_like = nsCStringLike
476 );
477
fallible_append_utf8_to_latin1_lossy_impl( &mut self, other: &[u8], old_len: usize, maybe_num_ascii: Option<usize>, ) -> Result<BulkWriteOk, ()>478 fn fallible_append_utf8_to_latin1_lossy_impl(
479 &mut self,
480 other: &[u8],
481 old_len: usize,
482 maybe_num_ascii: Option<usize>,
483 ) -> Result<BulkWriteOk, ()> {
484 let new_len = old_len.checked_add(other.len()).ok_or(())?;
485 let num_ascii = maybe_num_ascii.unwrap_or(0);
486 // Already checked for overflow above, so this can't overflow.
487 let old_len_plus_num_ascii = old_len + num_ascii;
488 let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
489 let written = {
490 let buffer = handle.as_mut_slice();
491 if num_ascii != 0 {
492 (&mut buffer[old_len..old_len_plus_num_ascii]).copy_from_slice(&other[..num_ascii]);
493 }
494 convert_utf8_to_latin1_lossy(&other[num_ascii..], &mut buffer[old_len_plus_num_ascii..])
495 };
496 Ok(handle.finish(old_len_plus_num_ascii + written, true))
497 }
498
499 /// Convert a UTF-8 string whose all code points are below U+0100 into
500 /// a Latin1 (scalar value is byte value; not windows-1252!) string and
501 /// replace the content of this string with the conversion result.
502 ///
503 /// # Panics
504 ///
505 /// If the input contains code points above U+00FF or is not valid UTF-8,
506 /// panics in debug mode and produces garbage in a memory-safe way in
507 /// release builds. The nature of the garbage may differ based on CPU
508 /// architecture and must not be relied upon.
assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T)509 pub fn assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
510 self.fallible_append_utf8_to_latin1_lossy_check(other, 0)
511 .expect("Out of memory");
512 }
513
514 /// Convert a UTF-8 string whose all code points are below U+0100 into
515 /// a Latin1 (scalar value is byte value; not windows-1252!) string and
516 /// fallibly replace the content of this string with the conversion result.
517 ///
518 /// # Panics
519 ///
520 /// If the input contains code points above U+00FF or is not valid UTF-8,
521 /// panics in debug mode and produces garbage in a memory-safe way in
522 /// release builds. The nature of the garbage may differ based on CPU
523 /// architecture and must not be relied upon.
fallible_assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>( &mut self, other: &T, ) -> Result<(), ()>524 pub fn fallible_assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(
525 &mut self,
526 other: &T,
527 ) -> Result<(), ()> {
528 self.fallible_append_utf8_to_latin1_lossy_check(other, 0)
529 .map(|_| ())
530 }
531
532 /// Convert a UTF-8 string whose all code points are below U+0100 into
533 /// a Latin1 (scalar value is byte value; not windows-1252!) string and
534 /// append the conversion result to this string.
535 ///
536 /// # Panics
537 ///
538 /// If the input contains code points above U+00FF or is not valid UTF-8,
539 /// panics in debug mode and produces garbage in a memory-safe way in
540 /// release builds. The nature of the garbage may differ based on CPU
541 /// architecture and must not be relied upon.
append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T)542 pub fn append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
543 let len = self.len();
544 self.fallible_append_utf8_to_latin1_lossy_check(other, len)
545 .expect("Out of memory");
546 }
547
548 /// Convert a UTF-8 string whose all code points are below U+0100 into
549 /// a Latin1 (scalar value is byte value; not windows-1252!) string and
550 /// fallibly append the conversion result to this string.
551 ///
552 /// # Panics
553 ///
554 /// If the input contains code points above U+00FF or is not valid UTF-8,
555 /// panics in debug mode and produces garbage in a memory-safe way in
556 /// release builds. The nature of the garbage may differ based on CPU
557 /// architecture and must not be relied upon.
fallible_append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>( &mut self, other: &T, ) -> Result<(), ()>558 pub fn fallible_append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(
559 &mut self,
560 other: &T,
561 ) -> Result<(), ()> {
562 let len = self.len();
563 self.fallible_append_utf8_to_latin1_lossy_check(other, len)
564 .map(|_| ())
565 }
566
567 // Latin1 to UTF-8 CString
568
569 ascii_copy_avoidance!(
570 name = fallible_append_latin1_to_utf8_check,
571 implementation = fallible_append_latin1_to_utf8_impl,
572 string_like = Latin1StringLike
573 );
574
fallible_append_latin1_to_utf8_impl( &mut self, other: &[u8], old_len: usize, maybe_num_ascii: Option<usize>, ) -> Result<BulkWriteOk, ()>575 fn fallible_append_latin1_to_utf8_impl(
576 &mut self,
577 other: &[u8],
578 old_len: usize,
579 maybe_num_ascii: Option<usize>,
580 ) -> Result<BulkWriteOk, ()> {
581 let (filled, read, mut handle) = if let Some(num_ascii) = maybe_num_ascii {
582 // Wrapper checked for ASCII
583 let left = other.len() - num_ascii;
584 let filled = old_len + num_ascii;
585 let needed = left.checked_mul(2).ok_or(())?;
586 let new_len = filled.checked_add(needed).ok_or(())?;
587 let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
588 if num_ascii != 0 {
589 (&mut handle.as_mut_slice()[old_len..filled]).copy_from_slice(&other[..num_ascii]);
590 }
591 (filled, num_ascii, handle)
592 } else {
593 let worst_case_needed = if let Some(inline_capacity) = self.inline_capacity() {
594 let worst_case = other.len().checked_mul(2).ok_or(())?;
595 if worst_case <= inline_capacity {
596 Some(worst_case)
597 } else {
598 None
599 }
600 } else {
601 None
602 };
603 if worst_case_needed.is_none() && long_string_starts_with_ascii(other) {
604 // Wrapper didn't check for ASCII, so let's see if `other` starts with ASCII
605 // `other` starts with ASCII, so let's first size the buffer
606 // with optimism that it's ASCII-only.
607 let new_len_with_ascii = old_len.checked_add(other.len()).ok_or(())?;
608 let mut handle = unsafe { self.bulk_write(new_len_with_ascii, old_len, false)? };
609 let (read, written) =
610 convert_latin1_to_utf8_partial(other, &mut handle.as_mut_slice()[old_len..]);
611 let left = other.len() - read;
612 let filled = old_len + written;
613 if left == 0 {
614 // `other` fit in the initial allocation
615 return Ok(handle.finish(filled, true));
616 }
617 let needed = left.checked_mul(2).ok_or(())?;
618 let new_len = filled.checked_add(needed).ok_or(())?;
619 unsafe {
620 handle.restart_bulk_write(new_len, filled, false)?;
621 }
622 (filled, read, handle)
623 } else {
624 // Started with non-ASCII. Assume worst case.
625 let needed = if let Some(n) = worst_case_needed {
626 n
627 } else {
628 other.len().checked_mul(2).ok_or(())?
629 };
630 let new_len = old_len.checked_add(needed).ok_or(())?;
631 let handle = unsafe { self.bulk_write(new_len, old_len, false)? };
632 (old_len, 0, handle)
633 }
634 };
635 let written = convert_latin1_to_utf8(&other[read..], &mut handle.as_mut_slice()[filled..]);
636 Ok(handle.finish(filled + written, true))
637 }
638
639 /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
640 /// into UTF-8 and replace the content of this string with the conversion result.
assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T)641 pub fn assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) {
642 self.fallible_append_latin1_to_utf8_check(other, 0)
643 .expect("Out of memory");
644 }
645
646 /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
647 /// into UTF-8 and fallibly replace the content of this string with the
648 /// conversion result.
fallible_assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>( &mut self, other: &T, ) -> Result<(), ()>649 pub fn fallible_assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(
650 &mut self,
651 other: &T,
652 ) -> Result<(), ()> {
653 self.fallible_append_latin1_to_utf8_check(other, 0)
654 .map(|_| ())
655 }
656
657 /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
658 /// into UTF-8 and append the conversion result to this string.
append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T)659 pub fn append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) {
660 let len = self.len();
661 self.fallible_append_latin1_to_utf8_check(other, len)
662 .expect("Out of memory");
663 }
664
665 /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
666 /// into UTF-8 and fallibly append the conversion result to this string.
fallible_append_latin1_to_utf8<T: Latin1StringLike + ?Sized>( &mut self, other: &T, ) -> Result<(), ()>667 pub fn fallible_append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(
668 &mut self,
669 other: &T,
670 ) -> Result<(), ()> {
671 let len = self.len();
672 self.fallible_append_latin1_to_utf8_check(other, len)
673 .map(|_| ())
674 }
675 }
676
677 #[no_mangle]
nsstring_fallible_append_utf8_impl( this: *mut nsAString, other: *const u8, other_len: usize, old_len: usize, ) -> bool678 pub unsafe extern "C" fn nsstring_fallible_append_utf8_impl(
679 this: *mut nsAString,
680 other: *const u8,
681 other_len: usize,
682 old_len: usize,
683 ) -> bool {
684 let other_slice = slice::from_raw_parts(other, other_len);
685 (*this)
686 .fallible_append_utf8_impl(other_slice, old_len)
687 .is_ok()
688 }
689
690 #[no_mangle]
nsstring_fallible_append_latin1_impl( this: *mut nsAString, other: *const u8, other_len: usize, old_len: usize, allow_shrinking: bool, ) -> bool691 pub unsafe extern "C" fn nsstring_fallible_append_latin1_impl(
692 this: *mut nsAString,
693 other: *const u8,
694 other_len: usize,
695 old_len: usize,
696 allow_shrinking: bool,
697 ) -> bool {
698 let other_slice = slice::from_raw_parts(other, other_len);
699 (*this)
700 .fallible_append_latin1_impl(other_slice, old_len, allow_shrinking)
701 .is_ok()
702 }
703
704 #[no_mangle]
nscstring_fallible_append_utf16_to_utf8_impl( this: *mut nsACString, other: *const u16, other_len: usize, old_len: usize, ) -> bool705 pub unsafe extern "C" fn nscstring_fallible_append_utf16_to_utf8_impl(
706 this: *mut nsACString,
707 other: *const u16,
708 other_len: usize,
709 old_len: usize,
710 ) -> bool {
711 let other_slice = slice::from_raw_parts(other, other_len);
712 (*this)
713 .fallible_append_utf16_to_utf8_impl(other_slice, old_len)
714 .is_ok()
715 }
716
717 #[no_mangle]
nscstring_fallible_append_utf16_to_latin1_lossy_impl( this: *mut nsACString, other: *const u16, other_len: usize, old_len: usize, allow_shrinking: bool, ) -> bool718 pub unsafe extern "C" fn nscstring_fallible_append_utf16_to_latin1_lossy_impl(
719 this: *mut nsACString,
720 other: *const u16,
721 other_len: usize,
722 old_len: usize,
723 allow_shrinking: bool,
724 ) -> bool {
725 let other_slice = slice::from_raw_parts(other, other_len);
726 (*this)
727 .fallible_append_utf16_to_latin1_lossy_impl(other_slice, old_len, allow_shrinking)
728 .is_ok()
729 }
730
731 #[no_mangle]
nscstring_fallible_append_utf8_to_latin1_lossy_check( this: *mut nsACString, other: *const nsACString, old_len: usize, ) -> bool732 pub unsafe extern "C" fn nscstring_fallible_append_utf8_to_latin1_lossy_check(
733 this: *mut nsACString,
734 other: *const nsACString,
735 old_len: usize,
736 ) -> bool {
737 (*this)
738 .fallible_append_utf8_to_latin1_lossy_check(&*other, old_len)
739 .is_ok()
740 }
741
742 #[no_mangle]
nscstring_fallible_append_latin1_to_utf8_check( this: *mut nsACString, other: *const nsACString, old_len: usize, ) -> bool743 pub unsafe extern "C" fn nscstring_fallible_append_latin1_to_utf8_check(
744 this: *mut nsACString,
745 other: *const nsACString,
746 old_len: usize,
747 ) -> bool {
748 (*this)
749 .fallible_append_latin1_to_utf8_check(&*other, old_len)
750 .is_ok()
751 }
752