1 //! Constants 2 //! 3 //! The constant pool defined here allows Cranelift to avoid emitting the same constant multiple 4 //! times. As constants are inserted in the pool, a handle is returned; the handle is a Cranelift 5 //! Entity. Inserting the same data multiple times will always return the same handle. 6 //! 7 //! Future work could include: 8 //! - ensuring alignment of constants within the pool, 9 //! - bucketing constants by size. 10 11 use crate::ir::immediates::{IntoBytes, V128Imm}; 12 use crate::ir::Constant; 13 use crate::HashMap; 14 use alloc::collections::BTreeMap; 15 use alloc::vec::Vec; 16 use core::fmt; 17 use core::iter::FromIterator; 18 use core::slice::Iter; 19 use core::str::{from_utf8, FromStr}; 20 use cranelift_entity::EntityRef; 21 22 #[cfg(feature = "enable-serde")] 23 use serde::{Deserialize, Serialize}; 24 25 /// This type describes the actual constant data. Note that the bytes stored in this structure are 26 /// expected to be in little-endian order; this is due to ease-of-use when interacting with 27 /// WebAssembly values, which are [little-endian by design]. 28 /// 29 /// [little-endian by design]: https://github.com/WebAssembly/design/blob/master/Portability.md 30 #[derive(Clone, Hash, Eq, PartialEq, Debug, Default)] 31 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] 32 pub struct ConstantData(Vec<u8>); 33 34 impl FromIterator<u8> for ConstantData { from_iter<T: IntoIterator<Item = u8>>(iter: T) -> Self35 fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> Self { 36 let v = iter.into_iter().collect(); 37 Self(v) 38 } 39 } 40 41 impl From<Vec<u8>> for ConstantData { from(v: Vec<u8>) -> Self42 fn from(v: Vec<u8>) -> Self { 43 Self(v) 44 } 45 } 46 47 impl From<&[u8]> for ConstantData { from(v: &[u8]) -> Self48 fn from(v: &[u8]) -> Self { 49 Self(v.to_vec()) 50 } 51 } 52 53 impl From<V128Imm> for ConstantData { from(v: V128Imm) -> Self54 fn from(v: V128Imm) -> Self { 55 Self(v.to_vec()) 56 } 57 } 58 59 impl ConstantData { 60 /// Return the number of bytes in the constant. len(&self) -> usize61 pub fn len(&self) -> usize { 62 self.0.len() 63 } 64 65 /// Check if the constant contains any bytes. is_empty(&self) -> bool66 pub fn is_empty(&self) -> bool { 67 self.0.is_empty() 68 } 69 70 /// Return the data as a slice. as_slice(&self) -> &[u8]71 pub fn as_slice(&self) -> &[u8] { 72 self.0.as_slice() 73 } 74 75 /// Convert the data to a vector. into_vec(self) -> Vec<u8>76 pub fn into_vec(self) -> Vec<u8> { 77 self.0 78 } 79 80 /// Iterate over the constant's bytes. iter(&self) -> Iter<u8>81 pub fn iter(&self) -> Iter<u8> { 82 self.0.iter() 83 } 84 85 /// Add new bytes to the constant data. append(mut self, bytes: impl IntoBytes) -> Self86 pub fn append(mut self, bytes: impl IntoBytes) -> Self { 87 let mut to_add = bytes.into_bytes(); 88 self.0.append(&mut to_add); 89 self 90 } 91 92 /// Expand the size of the constant data to `expected_size` number of bytes by adding zeroes 93 /// in the high-order byte slots. expand_to(mut self, expected_size: usize) -> Self94 pub fn expand_to(mut self, expected_size: usize) -> Self { 95 if self.len() > expected_size { 96 panic!( 97 "The constant data is already expanded beyond {} bytes", 98 expected_size 99 ) 100 } 101 self.0.resize(expected_size, 0); 102 self 103 } 104 } 105 106 impl fmt::Display for ConstantData { 107 /// Print the constant data in hexadecimal format, e.g. 0x000102030405060708090a0b0c0d0e0f. 108 /// This function will flip the stored order of bytes--little-endian--to the more readable 109 /// big-endian ordering. 110 /// 111 /// ``` 112 /// use cranelift_codegen::ir::ConstantData; 113 /// let data = ConstantData::from([3, 2, 1, 0, 0].as_ref()); // note the little-endian order 114 /// assert_eq!(data.to_string(), "0x0000010203"); 115 /// ``` fmt(&self, f: &mut fmt::Formatter) -> fmt::Result116 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 117 if !self.is_empty() { 118 write!(f, "0x")?; 119 for b in self.0.iter().rev() { 120 write!(f, "{:02x}", b)?; 121 } 122 } 123 Ok(()) 124 } 125 } 126 127 impl FromStr for ConstantData { 128 type Err = &'static str; 129 130 /// Parse a hexadecimal string to `ConstantData`. This is the inverse of `Display::fmt`. 131 /// 132 /// ``` 133 /// use cranelift_codegen::ir::ConstantData; 134 /// let c: ConstantData = "0x000102".parse().unwrap(); 135 /// assert_eq!(c.into_vec(), [2, 1, 0]); 136 /// ``` from_str(s: &str) -> Result<Self, &'static str>137 fn from_str(s: &str) -> Result<Self, &'static str> { 138 if s.len() <= 2 || &s[0..2] != "0x" { 139 return Err("Expected a hexadecimal string, e.g. 0x1234"); 140 } 141 142 // clean and check the string 143 let cleaned: Vec<u8> = s[2..] 144 .as_bytes() 145 .iter() 146 .filter(|&&b| b as char != '_') 147 .cloned() 148 .collect(); // remove 0x prefix and any intervening _ characters 149 150 if cleaned.is_empty() { 151 Err("Hexadecimal string must have some digits") 152 } else if cleaned.len() % 2 != 0 { 153 Err("Hexadecimal string must have an even number of digits") 154 } else if cleaned.len() > 32 { 155 Err("Hexadecimal string has too many digits to fit in a 128-bit vector") 156 } else { 157 let mut buffer = Vec::with_capacity((s.len() - 2) / 2); 158 for i in (0..cleaned.len()).step_by(2) { 159 let pair = from_utf8(&cleaned[i..i + 2]) 160 .or_else(|_| Err("Unable to parse hexadecimal pair as UTF-8"))?; 161 let byte = u8::from_str_radix(pair, 16) 162 .or_else(|_| Err("Unable to parse as hexadecimal"))?; 163 buffer.insert(0, byte); 164 } 165 Ok(Self(buffer)) 166 } 167 } 168 } 169 170 /// This type describes an offset in bytes within a constant pool. 171 pub type ConstantOffset = u32; 172 173 /// Inner type for storing data and offset together in the constant pool. The offset is optional 174 /// because it must be set relative to the function code size (i.e. constants are emitted after the 175 /// function body); because the function is not yet compiled when constants are inserted, 176 /// [`set_offset`](crate::ir::ConstantPool::set_offset) must be called once a constant's offset 177 /// from the beginning of the function is known (see 178 /// `relaxation` in `relaxation.rs`). 179 #[derive(Clone)] 180 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] 181 pub struct ConstantPoolEntry { 182 data: ConstantData, 183 offset: Option<ConstantOffset>, 184 } 185 186 impl ConstantPoolEntry { new(data: ConstantData) -> Self187 fn new(data: ConstantData) -> Self { 188 Self { data, offset: None } 189 } 190 191 /// Return the size of the constant at this entry. len(&self) -> usize192 pub fn len(&self) -> usize { 193 self.data.len() 194 } 195 196 /// Assign a new offset to the constant at this entry. set_offset(&mut self, offset: ConstantOffset)197 pub fn set_offset(&mut self, offset: ConstantOffset) { 198 self.offset = Some(offset) 199 } 200 } 201 202 /// Maintains the mapping between a constant handle (i.e. [`Constant`](crate::ir::Constant)) and 203 /// its constant data (i.e. [`ConstantData`](crate::ir::ConstantData)). 204 #[derive(Clone)] 205 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] 206 pub struct ConstantPool { 207 /// This mapping maintains the insertion order as long as Constants are created with 208 /// sequentially increasing integers. 209 handles_to_values: BTreeMap<Constant, ConstantPoolEntry>, 210 211 /// This mapping is unordered (no need for lexicographic ordering) but allows us to map 212 /// constant data back to handles. 213 values_to_handles: HashMap<ConstantData, Constant>, 214 } 215 216 impl ConstantPool { 217 /// Create a new constant pool instance. new() -> Self218 pub fn new() -> Self { 219 Self { 220 handles_to_values: BTreeMap::new(), 221 values_to_handles: HashMap::new(), 222 } 223 } 224 225 /// Empty the constant pool of all data. clear(&mut self)226 pub fn clear(&mut self) { 227 self.handles_to_values.clear(); 228 self.values_to_handles.clear(); 229 } 230 231 /// Insert constant data into the pool, returning a handle for later referencing; when constant 232 /// data is inserted that is a duplicate of previous constant data, the existing handle will be 233 /// returned. insert(&mut self, constant_value: ConstantData) -> Constant234 pub fn insert(&mut self, constant_value: ConstantData) -> Constant { 235 if self.values_to_handles.contains_key(&constant_value) { 236 *self.values_to_handles.get(&constant_value).unwrap() 237 } else { 238 let constant_handle = Constant::new(self.len()); 239 self.set(constant_handle, constant_value); 240 constant_handle 241 } 242 } 243 244 /// Retrieve the constant data given a handle. get(&self, constant_handle: Constant) -> &ConstantData245 pub fn get(&self, constant_handle: Constant) -> &ConstantData { 246 assert!(self.handles_to_values.contains_key(&constant_handle)); 247 &self.handles_to_values.get(&constant_handle).unwrap().data 248 } 249 250 /// Link a constant handle to its value. This does not de-duplicate data but does avoid 251 /// replacing any existing constant values. use `set` to tie a specific `const42` to its value; 252 /// use `insert` to add a value and return the next available `const` entity. set(&mut self, constant_handle: Constant, constant_value: ConstantData)253 pub fn set(&mut self, constant_handle: Constant, constant_value: ConstantData) { 254 let replaced = self.handles_to_values.insert( 255 constant_handle, 256 ConstantPoolEntry::new(constant_value.clone()), 257 ); 258 assert!( 259 replaced.is_none(), 260 "attempted to overwrite an existing constant {:?}: {:?} => {:?}", 261 constant_handle, 262 &constant_value, 263 replaced.unwrap().data 264 ); 265 self.values_to_handles 266 .insert(constant_value, constant_handle); 267 } 268 269 /// Assign an offset to a given constant, where the offset is the number of bytes from the 270 /// beginning of the function to the beginning of the constant data inside the pool. set_offset(&mut self, constant_handle: Constant, constant_offset: ConstantOffset)271 pub fn set_offset(&mut self, constant_handle: Constant, constant_offset: ConstantOffset) { 272 assert!( 273 self.handles_to_values.contains_key(&constant_handle), 274 "A constant handle must have already been inserted into the pool; perhaps a \ 275 constant pool was created outside of the pool?" 276 ); 277 self.handles_to_values 278 .entry(constant_handle) 279 .and_modify(|e| e.offset = Some(constant_offset)); 280 } 281 282 /// Retrieve the offset of a given constant, where the offset is the number of bytes from the 283 /// beginning of the function to the beginning of the constant data inside the pool. get_offset(&self, constant_handle: Constant) -> ConstantOffset284 pub fn get_offset(&self, constant_handle: Constant) -> ConstantOffset { 285 self.handles_to_values 286 .get(&constant_handle) 287 .expect( 288 "A constant handle must have a corresponding constant value; was a constant \ 289 handle created outside of the pool?", 290 ) 291 .offset 292 .expect( 293 "A constant offset has not yet been set; verify that `set_offset` has been \ 294 called before this point", 295 ) 296 } 297 298 /// Iterate over the constants in insertion order. iter(&self) -> impl Iterator<Item = (&Constant, &ConstantData)>299 pub fn iter(&self) -> impl Iterator<Item = (&Constant, &ConstantData)> { 300 self.handles_to_values.iter().map(|(h, e)| (h, &e.data)) 301 } 302 303 /// Iterate over mutable entries in the constant pool in insertion order. entries_mut(&mut self) -> impl Iterator<Item = &mut ConstantPoolEntry>304 pub fn entries_mut(&mut self) -> impl Iterator<Item = &mut ConstantPoolEntry> { 305 self.handles_to_values.values_mut() 306 } 307 308 /// Return the number of constants in the pool. len(&self) -> usize309 pub fn len(&self) -> usize { 310 self.handles_to_values.len() 311 } 312 313 /// Return the combined size of all of the constant values in the pool. byte_size(&self) -> usize314 pub fn byte_size(&self) -> usize { 315 self.values_to_handles.keys().map(|c| c.len()).sum() 316 } 317 } 318 319 #[cfg(test)] 320 mod tests { 321 use super::*; 322 use std::string::ToString; 323 324 #[test] empty()325 fn empty() { 326 let sut = ConstantPool::new(); 327 assert_eq!(sut.len(), 0); 328 } 329 330 #[test] insert()331 fn insert() { 332 let mut sut = ConstantPool::new(); 333 sut.insert(vec![1, 2, 3].into()); 334 sut.insert(vec![4, 5, 6].into()); 335 assert_eq!(sut.len(), 2); 336 } 337 338 #[test] insert_duplicate()339 fn insert_duplicate() { 340 let mut sut = ConstantPool::new(); 341 let a = sut.insert(vec![1, 2, 3].into()); 342 sut.insert(vec![4, 5, 6].into()); 343 let b = sut.insert(vec![1, 2, 3].into()); 344 assert_eq!(a, b); 345 } 346 347 #[test] clear()348 fn clear() { 349 let mut sut = ConstantPool::new(); 350 sut.insert(vec![1, 2, 3].into()); 351 assert_eq!(sut.len(), 1); 352 353 sut.clear(); 354 assert_eq!(sut.len(), 0); 355 } 356 357 #[test] iteration_order()358 fn iteration_order() { 359 let mut sut = ConstantPool::new(); 360 sut.insert(vec![1, 2, 3].into()); 361 sut.insert(vec![4, 5, 6].into()); 362 sut.insert(vec![1, 2, 3].into()); 363 let data = sut.iter().map(|(_, v)| v).collect::<Vec<&ConstantData>>(); 364 assert_eq!(data, vec![&vec![1, 2, 3].into(), &vec![4, 5, 6].into()]); 365 } 366 367 #[test] get()368 fn get() { 369 let mut sut = ConstantPool::new(); 370 let data = vec![1, 2, 3]; 371 let handle = sut.insert(data.clone().into()); 372 assert_eq!(sut.get(handle), &data.into()); 373 } 374 375 #[test] set()376 fn set() { 377 let mut sut = ConstantPool::new(); 378 let handle = Constant::with_number(42).unwrap(); 379 let data = vec![1, 2, 3]; 380 sut.set(handle, data.clone().into()); 381 assert_eq!(sut.get(handle), &data.into()); 382 } 383 384 #[test] 385 #[should_panic] disallow_overwriting_constant()386 fn disallow_overwriting_constant() { 387 let mut sut = ConstantPool::new(); 388 let handle = Constant::with_number(42).unwrap(); 389 sut.set(handle, vec![].into()); 390 sut.set(handle, vec![1].into()); 391 } 392 393 #[test] 394 #[should_panic] get_nonexistent_constant()395 fn get_nonexistent_constant() { 396 let sut = ConstantPool::new(); 397 let a = Constant::with_number(42).unwrap(); 398 sut.get(a); // panics, only use constants returned by ConstantPool 399 } 400 401 #[test] get_offset()402 fn get_offset() { 403 let mut sut = ConstantPool::new(); 404 let a = sut.insert(vec![1].into()); 405 sut.set_offset(a, 42); 406 assert_eq!(sut.get_offset(a), 42) 407 } 408 409 #[test] 410 #[should_panic] get_nonexistent_offset()411 fn get_nonexistent_offset() { 412 let mut sut = ConstantPool::new(); 413 let a = sut.insert(vec![1].into()); 414 sut.get_offset(a); // panics, set_offset should have been called 415 } 416 417 #[test] display_constant_data()418 fn display_constant_data() { 419 assert_eq!(ConstantData::from([0].as_ref()).to_string(), "0x00"); 420 assert_eq!(ConstantData::from([42].as_ref()).to_string(), "0x2a"); 421 assert_eq!( 422 ConstantData::from([3, 2, 1, 0].as_ref()).to_string(), 423 "0x00010203" 424 ); 425 assert_eq!( 426 ConstantData::from(3735928559u32.to_le_bytes().as_ref()).to_string(), 427 "0xdeadbeef" 428 ); 429 assert_eq!( 430 ConstantData::from(0x0102030405060708u64.to_le_bytes().as_ref()).to_string(), 431 "0x0102030405060708" 432 ); 433 } 434 435 #[test] iterate_over_constant_data()436 fn iterate_over_constant_data() { 437 let c = ConstantData::from([1, 2, 3].as_ref()); 438 let mut iter = c.iter(); 439 assert_eq!(iter.next(), Some(&1)); 440 assert_eq!(iter.next(), Some(&2)); 441 assert_eq!(iter.next(), Some(&3)); 442 assert_eq!(iter.next(), None); 443 } 444 445 #[test] add_to_constant_data()446 fn add_to_constant_data() { 447 let d = ConstantData::from([1, 2].as_ref()); 448 let e = d.append(i16::from(3u8)); 449 assert_eq!(e.into_vec(), vec![1, 2, 3, 0]) 450 } 451 452 #[test] extend_constant_data()453 fn extend_constant_data() { 454 let d = ConstantData::from([1, 2].as_ref()); 455 assert_eq!(d.expand_to(4).into_vec(), vec![1, 2, 0, 0]) 456 } 457 458 #[test] 459 #[should_panic] extend_constant_data_to_invalid_length()460 fn extend_constant_data_to_invalid_length() { 461 ConstantData::from([1, 2].as_ref()).expand_to(1); 462 } 463 464 #[test] parse_constant_data_and_restringify()465 fn parse_constant_data_and_restringify() { 466 // Verify that parsing of `from` succeeds and stringifies to `to`. 467 fn parse_ok(from: &str, to: &str) { 468 let parsed = from.parse::<ConstantData>().unwrap(); 469 assert_eq!(parsed.to_string(), to); 470 } 471 472 // Verify that parsing of `from` fails with `error_msg`. 473 fn parse_err(from: &str, error_msg: &str) { 474 let parsed = from.parse::<ConstantData>(); 475 assert!( 476 parsed.is_err(), 477 "Expected a parse error but parsing succeeded: {}", 478 from 479 ); 480 assert_eq!(parsed.err().unwrap(), error_msg); 481 } 482 483 parse_ok("0x00", "0x00"); 484 parse_ok("0x00000042", "0x00000042"); 485 parse_ok( 486 "0x0102030405060708090a0b0c0d0e0f00", 487 "0x0102030405060708090a0b0c0d0e0f00", 488 ); 489 parse_ok("0x_0000_0043_21", "0x0000004321"); 490 491 parse_err("", "Expected a hexadecimal string, e.g. 0x1234"); 492 parse_err("0x", "Expected a hexadecimal string, e.g. 0x1234"); 493 parse_err( 494 "0x042", 495 "Hexadecimal string must have an even number of digits", 496 ); 497 parse_err( 498 "0x00000000000000000000000000000000000000000000000000", 499 "Hexadecimal string has too many digits to fit in a 128-bit vector", 500 ); 501 parse_err("0xrstu", "Unable to parse as hexadecimal"); 502 parse_err("0x__", "Hexadecimal string must have some digits"); 503 } 504 505 #[test] verify_stored_bytes_in_constant_data()506 fn verify_stored_bytes_in_constant_data() { 507 assert_eq!("0x01".parse::<ConstantData>().unwrap().into_vec(), [1]); 508 assert_eq!(ConstantData::from([1, 0].as_ref()).0, [1, 0]); 509 assert_eq!(ConstantData::from(vec![1, 0, 0, 0]).0, [1, 0, 0, 0]); 510 } 511 512 #[test] check_constant_data_endianness_as_uimm128()513 fn check_constant_data_endianness_as_uimm128() { 514 fn parse_to_uimm128(from: &str) -> Vec<u8> { 515 from.parse::<ConstantData>() 516 .unwrap() 517 .expand_to(16) 518 .into_vec() 519 } 520 521 assert_eq!( 522 parse_to_uimm128("0x42"), 523 [0x42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 524 ); 525 assert_eq!( 526 parse_to_uimm128("0x00"), 527 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 528 ); 529 assert_eq!( 530 parse_to_uimm128("0x12345678"), 531 [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 532 ); 533 assert_eq!( 534 parse_to_uimm128("0x1234_5678"), 535 [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 536 ); 537 } 538 } 539