1 //! Exception handling and stack unwinding for x64.
2 //!
3 //! Exception information is exposed via the [`ExceptionData`] structure. If present in a PE file,
4 //! it contains a list of [`RuntimeFunction`] entries that can be used to get [`UnwindInfo`] for a
5 //! particular code location.
6 //!
7 //! Unwind information contains a list of unwind codes which specify the operations that are
8 //! necessary to restore registers (including the stack pointer RSP) when unwinding out of a
9 //! function.
10 //!
11 //! Depending on where the instruction pointer lies, there are three strategies to unwind:
12 //!
13 //!  1. If the RIP is within an epilog, then control is leaving the function, there can be no
14 //!     exception handler associated with this exception for this function, and the effects of the
15 //!     epilog must be continued to compute the context of the caller function. To determine if the
16 //!     RIP is within an epilog, the code stream from RIP on is examined. If that code stream can be
17 //!     matched to the trailing portion of a legitimate epilog, then it's in an epilog, and the
18 //!     remaining portion of the epilog is simulated, with the context record updated as each
19 //!     instruction is processed. After this, step 1 is repeated.
20 //!
21 //!  2. Case b) If the RIP lies within the prologue, then control has not entered the function,
22 //!     there can be no exception handler associated with this exception for this function, and the
23 //!     effects of the prolog must be undone to compute the context of the caller function. The RIP
24 //!     is within the prolog if the distance from the function start to the RIP is less than or
25 //!     equal to the prolog size encoded in the unwind info. The effects of the prolog are unwound
26 //!     by scanning forward through the unwind codes array for the first entry with an offset less
27 //!     than or equal to the offset of the RIP from the function start, then undoing the effect of
28 //!     all remaining items in the unwind code array. Step 1 is then repeated.
29 //!
30 //!  3. If the RIP is not within a prolog or epilog and the function has an exception handler, then
31 //!     the language-specific handler is called. The handler scans its data and calls filter
32 //!     functions as appropriate. The language-specific handler can return that the exception was
33 //!     handled or that the search is to be continued. It can also initiate an unwind directly.
34 //!
35 //! For more information, see [x64 exception handling].
36 //!
37 //! [`ExceptionData`]: struct.ExceptionData.html
38 //! [`RuntimeFunction`]: struct.RuntimeFunction.html
39 //! [`UnwindInfo`]: struct.UnwindInfo.html
40 //! [x64 exception handling]: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=vs-2017
41 
42 use core::cmp::Ordering;
43 use core::fmt;
44 use core::iter::FusedIterator;
45 
46 use scroll::ctx::TryFromCtx;
47 use scroll::{self, Pread, Pwrite};
48 
49 use crate::error;
50 
51 use crate::pe::data_directories;
52 use crate::pe::section_table;
53 use crate::pe::utils;
54 
55 /// The function has an exception handler that should be called when looking for functions that need
56 /// to examine exceptions.
57 const UNW_FLAG_EHANDLER: u8 = 0x01;
58 /// The function has a termination handler that should be called when unwinding an exception.
59 const UNW_FLAG_UHANDLER: u8 = 0x02;
60 /// This unwind info structure is not the primary one for the procedure. Instead, the chained unwind
61 /// info entry is the contents of a previous `RUNTIME_FUNCTION` entry. If this flag is set, then the
62 /// `UNW_FLAG_EHANDLER` and `UNW_FLAG_UHANDLER` flags must be cleared. Also, the frame register and
63 /// fixed-stack allocation fields must have the same values as in the primary unwind info.
64 const UNW_FLAG_CHAININFO: u8 = 0x04;
65 
66 /// info == register number
67 const UWOP_PUSH_NONVOL: u8 = 0;
68 /// no info, alloc size in next 2 slots
69 const UWOP_ALLOC_LARGE: u8 = 1;
70 /// info == size of allocation / 8 - 1
71 const UWOP_ALLOC_SMALL: u8 = 2;
72 /// no info, FP = RSP + UNWIND_INFO.FPRegOffset*16
73 const UWOP_SET_FPREG: u8 = 3;
74 /// info == register number, offset in next slot
75 const UWOP_SAVE_NONVOL: u8 = 4;
76 /// info == register number, offset in next 2 slots
77 const UWOP_SAVE_NONVOL_FAR: u8 = 5;
78 /// changes the structure of unwind codes to `struct Epilogue`.
79 /// (was UWOP_SAVE_XMM in version 1, but deprecated and removed)
80 const UWOP_EPILOG: u8 = 6;
81 /// reserved
82 /// (was UWOP_SAVE_XMM_FAR in version 1, but deprecated and removed)
83 const UWOP_SPARE_CODE: u8 = 7;
84 /// info == XMM reg number, offset in next slot
85 const UWOP_SAVE_XMM128: u8 = 8;
86 /// info == XMM reg number, offset in next 2 slots
87 const UWOP_SAVE_XMM128_FAR: u8 = 9;
88 /// info == 0: no error-code, 1: error-code
89 const UWOP_PUSH_MACHFRAME: u8 = 10;
90 
91 /// Size of `RuntimeFunction` entries.
92 const RUNTIME_FUNCTION_SIZE: usize = 12;
93 /// Size of unwind code slots. Codes take 1 - 3 slots.
94 const UNWIND_CODE_SIZE: usize = 2;
95 
96 /// An unwind entry for a range of a function.
97 ///
98 /// Unwind information for this function can be loaded with [`ExceptionData::get_unwind_info`].
99 ///
100 /// [`ExceptionData::get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info
101 #[repr(C)]
102 #[derive(Copy, Clone, PartialEq, Default, Pread, Pwrite)]
103 pub struct RuntimeFunction {
104     /// Function start address.
105     pub begin_address: u32,
106     /// Function end address.
107     pub end_address: u32,
108     /// Unwind info address.
109     pub unwind_info_address: u32,
110 }
111 
112 impl fmt::Debug for RuntimeFunction {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result113     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
114         f.debug_struct("RuntimeFunction")
115             .field("begin_address", &format_args!("{:#x}", self.begin_address))
116             .field("end_address", &format_args!("{:#x}", self.end_address))
117             .field(
118                 "unwind_info_address",
119                 &format_args!("{:#x}", self.unwind_info_address),
120             )
121             .finish()
122     }
123 }
124 
125 /// Iterator over runtime function entries in [`ExceptionData`](struct.ExceptionData.html).
126 #[derive(Debug)]
127 pub struct RuntimeFunctionIterator<'a> {
128     data: &'a [u8],
129 }
130 
131 impl Iterator for RuntimeFunctionIterator<'_> {
132     type Item = error::Result<RuntimeFunction>;
133 
next(&mut self) -> Option<Self::Item>134     fn next(&mut self) -> Option<Self::Item> {
135         if self.data.is_empty() {
136             return None;
137         }
138 
139         Some(match self.data.pread_with(0, scroll::LE) {
140             Ok(func) => {
141                 self.data = &self.data[RUNTIME_FUNCTION_SIZE..];
142                 Ok(func)
143             }
144             Err(error) => {
145                 self.data = &[];
146                 Err(error.into())
147             }
148         })
149     }
150 
size_hint(&self) -> (usize, Option<usize>)151     fn size_hint(&self) -> (usize, Option<usize>) {
152         let len = self.data.len() / RUNTIME_FUNCTION_SIZE;
153         (len, Some(len))
154     }
155 }
156 
157 impl FusedIterator for RuntimeFunctionIterator<'_> {}
158 impl ExactSizeIterator for RuntimeFunctionIterator<'_> {}
159 
160 /// An x64 register used during unwinding.
161 ///
162 ///  - `0` - `15`: General purpose registers
163 ///  - `17` - `32`: XMM registers
164 #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
165 pub struct Register(pub u8);
166 
167 impl Register {
xmm(number: u8) -> Self168     fn xmm(number: u8) -> Self {
169         Register(number + 17)
170     }
171 
172     /// Returns the x64 register name.
name(self) -> &'static str173     pub fn name(self) -> &'static str {
174         match self.0 {
175             0 => "$rax",
176             1 => "$rcx",
177             2 => "$rdx",
178             3 => "$rbx",
179             4 => "$rsp",
180             5 => "$rbp",
181             6 => "$rsi",
182             7 => "$rdi",
183             8 => "$r8",
184             9 => "$r9",
185             10 => "$r10",
186             11 => "$r11",
187             12 => "$r12",
188             13 => "$r13",
189             14 => "$r14",
190             15 => "$r15",
191             16 => "$rip",
192             17 => "$xmm0",
193             18 => "$xmm1",
194             19 => "$xmm2",
195             20 => "$xmm3",
196             21 => "$xmm4",
197             22 => "$xmm5",
198             23 => "$xmm6",
199             24 => "$xmm7",
200             25 => "$xmm8",
201             26 => "$xmm9",
202             27 => "$xmm10",
203             28 => "$xmm11",
204             29 => "$xmm12",
205             30 => "$xmm13",
206             31 => "$xmm14",
207             32 => "$xmm15",
208             _ => "",
209         }
210     }
211 }
212 
213 /// An unsigned offset to a value in the local stack frame.
214 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
215 pub enum StackFrameOffset {
216     /// Offset from the current RSP, that is, the lowest address of the fixed stack allocation.
217     ///
218     /// To restore this register, read the value at the given offset from the RSP.
219     RSP(u32),
220 
221     /// Offset from the value of the frame pointer register.
222     ///
223     /// To restore this register, read the value at the given offset from the FP register, reduced
224     /// by the `frame_register_offset` value specified in the `UnwindInfo` structure. By definition,
225     /// the frame pointer register is any register other than RAX (`0`).
226     FP(u32),
227 }
228 
229 impl StackFrameOffset {
with_ctx(offset: u32, ctx: UnwindOpContext) -> Self230     fn with_ctx(offset: u32, ctx: UnwindOpContext) -> Self {
231         match ctx.frame_register {
232             Register(0) => StackFrameOffset::RSP(offset),
233             Register(_) => StackFrameOffset::FP(offset),
234         }
235     }
236 }
237 
238 impl fmt::Display for Register {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result239     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
240         f.write_str(self.name())
241     }
242 }
243 
244 /// An unwind operation corresponding to code in the function prolog.
245 ///
246 /// Unwind operations can be used to reverse the effects of the function prolog and restore register
247 /// values of parent stack frames that have been saved to the stack.
248 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
249 pub enum UnwindOperation {
250     /// Push a nonvolatile integer register, decrementing `RSP` by 8.
251     PushNonVolatile(Register),
252 
253     /// Allocate a fixed-size area on the stack.
254     Alloc(u32),
255 
256     /// Establish the frame pointer register by setting the register to some offset of the current
257     /// RSP. The use of an offset permits establishing a frame pointer that points to the middle of
258     /// the fixed stack allocation, helping code density by allowing more accesses to use short
259     /// instruction forms.
260     SetFPRegister,
261 
262     /// Save a nonvolatile integer register on the stack using a MOV instead of a PUSH. This code is
263     /// primarily used for shrink-wrapping, where a nonvolatile register is saved to the stack in a
264     /// position that was previously allocated.
265     SaveNonVolatile(Register, StackFrameOffset),
266 
267     /// Save the lower 64 bits of a nonvolatile XMM register on the stack.
268     SaveXMM(Register, StackFrameOffset),
269 
270     /// Describes the function epilog.
271     ///
272     /// This operation has been introduced with unwind info version 2 and is not implemented yet.
273     Epilog,
274 
275     /// Save all 128 bits of a nonvolatile XMM register on the stack.
276     SaveXMM128(Register, StackFrameOffset),
277 
278     /// Push a machine frame. This is used to record the effect of a hardware interrupt or
279     /// exception. Depending on the error flag, this frame has two different layouts.
280     ///
281     /// This unwind code always appears in a dummy prolog, which is never actually executed but
282     /// instead appears before the real entry point of an interrupt routine, and exists only to
283     /// provide a place to simulate the push of a machine frame. This operation records that
284     /// simulation, which indicates the machine has conceptually done this:
285     ///
286     ///  1. Pop RIP return address from top of stack into `temp`
287     ///  2. `$ss`, Push old `$rsp`, `$rflags`, `$cs`, `temp`
288     ///  3. If error flag is `true`, push the error code
289     ///
290     /// Without an error code, RSP was incremented by `40` and the following was frame pushed:
291     ///
292     /// Offset   | Value
293     /// ---------|--------
294     /// RSP + 32 | `$ss`
295     /// RSP + 24 | old `$rsp`
296     /// RSP + 16 | `$rflags`
297     /// RSP +  8 | `$cs`
298     /// RSP +  0 | `$rip`
299     ///
300     /// With an error code, RSP was incremented by `48` and the following was frame pushed:
301     ///
302     /// Offset   | Value
303     /// ---------|--------
304     /// RSP + 40 | `$ss`
305     /// RSP + 32 | old `$rsp`
306     /// RSP + 24 | `$rflags`
307     /// RSP + 16 | `$cs`
308     /// RSP +  8 | `$rip`
309     /// RSP +  0 | error code
310     PushMachineFrame(bool),
311 
312     /// A reserved operation without effect.
313     Noop,
314 }
315 
316 /// Context used to parse unwind operation.
317 #[derive(Clone, Copy, Debug, PartialEq)]
318 struct UnwindOpContext {
319     /// Version of the unwind info.
320     version: u8,
321 
322     /// The nonvolatile register used as the frame pointer of this function.
323     ///
324     /// If this register is non-zero, all stack frame offsets used in unwind operations are of type
325     /// `StackFrameOffset::FP`. When loading these offsets, they have to be based off the value of
326     /// this frame register instead of the conventional RSP. This allows the RSP to be modified.
327     frame_register: Register,
328 }
329 
330 /// An unwind operation that is executed at a particular place in the function prolog.
331 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
332 pub struct UnwindCode {
333     /// Offset of the corresponding instruction in the function prolog.
334     ///
335     /// To be precise, this is the offset from the beginning of the prolog of the end of the
336     /// instruction that performs this operation, plus 1 (that is, the offset of the start of the
337     /// next instruction).
338     ///
339     /// Unwind codes are ordered by this offset in reverse order, suitable for unwinding.
340     pub code_offset: u8,
341 
342     /// The operation that was performed by the code in the prolog.
343     pub operation: UnwindOperation,
344 }
345 
346 impl<'a> TryFromCtx<'a, UnwindOpContext> for UnwindCode {
347     type Error = error::Error;
348     #[inline]
try_from_ctx( bytes: &'a [u8], ctx: UnwindOpContext, ) -> Result<(Self, usize), Self::Error>349     fn try_from_ctx(
350         bytes: &'a [u8],
351         ctx: UnwindOpContext,
352     ) -> Result<(Self, usize), Self::Error> {
353         let mut read = 0;
354         let code_offset = bytes.gread_with::<u8>(&mut read, scroll::LE)?;
355         let operation = bytes.gread_with::<u8>(&mut read, scroll::LE)?;
356 
357         let operation_code = operation & 0xf;
358         let operation_info = operation >> 4;
359 
360         let operation = match operation_code {
361             self::UWOP_PUSH_NONVOL => {
362                 let register = Register(operation_info);
363                 UnwindOperation::PushNonVolatile(register)
364             }
365             self::UWOP_ALLOC_LARGE => {
366                 let offset = match operation_info {
367                     0 => u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 8,
368                     1 => bytes.gread_with::<u32>(&mut read, scroll::LE)?,
369                     i => {
370                         let msg = format!("invalid op info ({}) for UWOP_ALLOC_LARGE", i);
371                         return Err(error::Error::Malformed(msg));
372                     }
373                 };
374                 UnwindOperation::Alloc(offset)
375             }
376             self::UWOP_ALLOC_SMALL => {
377                 let offset = u32::from(operation_info) * 8 + 8;
378                 UnwindOperation::Alloc(offset)
379             }
380             self::UWOP_SET_FPREG => UnwindOperation::SetFPRegister,
381             self::UWOP_SAVE_NONVOL => {
382                 let register = Register(operation_info);
383                 let offset = u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 8;
384                 UnwindOperation::SaveNonVolatile(register, StackFrameOffset::with_ctx(offset, ctx))
385             }
386             self::UWOP_SAVE_NONVOL_FAR => {
387                 let register = Register(operation_info);
388                 let offset = bytes.gread_with::<u32>(&mut read, scroll::LE)?;
389                 UnwindOperation::SaveNonVolatile(register, StackFrameOffset::with_ctx(offset, ctx))
390             }
391             self::UWOP_EPILOG => {
392                 let data = u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 16;
393                 if ctx.version == 1 {
394                     let register = Register::xmm(operation_info);
395                     UnwindOperation::SaveXMM(register, StackFrameOffset::with_ctx(data, ctx))
396                 } else {
397                     // TODO: See https://weekly-geekly.github.io/articles/322956/index.html
398                     UnwindOperation::Epilog
399                 }
400             }
401             self::UWOP_SPARE_CODE => {
402                 let data = bytes.gread_with::<u32>(&mut read, scroll::LE)?;
403                 if ctx.version == 1 {
404                     let register = Register::xmm(operation_info);
405                     UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(data, ctx))
406                 } else {
407                     UnwindOperation::Noop
408                 }
409             }
410             self::UWOP_SAVE_XMM128 => {
411                 let register = Register::xmm(operation_info);
412                 let offset = u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 16;
413                 UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(offset, ctx))
414             }
415             self::UWOP_SAVE_XMM128_FAR => {
416                 let register = Register::xmm(operation_info);
417                 let offset = bytes.gread_with::<u32>(&mut read, scroll::LE)?;
418                 UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(offset, ctx))
419             }
420             self::UWOP_PUSH_MACHFRAME => {
421                 let is_error = match operation_info {
422                     0 => false,
423                     1 => true,
424                     i => {
425                         let msg = format!("invalid op info ({}) for UWOP_PUSH_MACHFRAME", i);
426                         return Err(error::Error::Malformed(msg));
427                     }
428                 };
429                 UnwindOperation::PushMachineFrame(is_error)
430             }
431             op => {
432                 let msg = format!("unknown unwind op code ({})", op);
433                 return Err(error::Error::Malformed(msg));
434             }
435         };
436 
437         let code = UnwindCode {
438             code_offset,
439             operation,
440         };
441 
442         Ok((code, read))
443     }
444 }
445 
446 /// An iterator over unwind codes for a function or part of a function, returned from
447 /// [`UnwindInfo`].
448 ///
449 /// [`UnwindInfo`]: struct.UnwindInfo.html
450 #[derive(Clone, Debug)]
451 pub struct UnwindCodeIterator<'a> {
452     bytes: &'a [u8],
453     offset: usize,
454     context: UnwindOpContext,
455 }
456 
457 impl Iterator for UnwindCodeIterator<'_> {
458     type Item = error::Result<UnwindCode>;
459 
next(&mut self) -> Option<Self::Item>460     fn next(&mut self) -> Option<Self::Item> {
461         if self.offset >= self.bytes.len() {
462             return None;
463         }
464 
465         Some(self.bytes.gread_with(&mut self.offset, self.context))
466     }
467 
size_hint(&self) -> (usize, Option<usize>)468     fn size_hint(&self) -> (usize, Option<usize>) {
469         let upper = (self.bytes.len() - self.offset) / UNWIND_CODE_SIZE;
470         // the largest codes take up three slots
471         let lower = (upper + 3 - (upper % 3)) / 3;
472         (lower, Some(upper))
473     }
474 }
475 
476 impl FusedIterator for UnwindCodeIterator<'_> {}
477 
478 /// A language-specific handler that is called as part of the search for an exception handler or as
479 /// part of an unwind.
480 #[derive(Copy, Clone, Debug, PartialEq)]
481 pub enum UnwindHandler<'a> {
482     /// The image-relative address of an exception handler and its implementation-defined data.
483     ExceptionHandler(u32, &'a [u8]),
484     /// The image-relative address of a termination handler and its implementation-defined data.
485     TerminationHandler(u32, &'a [u8]),
486 }
487 
488 /// Unwind information for a function or portion of a function.
489 ///
490 /// The unwind info structure is used to record the effects a function has on the stack pointer and
491 /// where the nonvolatile registers are saved on the stack. The unwind codes can be enumerated with
492 /// [`unwind_codes`].
493 ///
494 /// This unwind info might only be secondary information, and link to a [chained unwind handler].
495 /// For unwinding, this link shall be followed until the root unwind info record has been resolved.
496 ///
497 /// [`unwind_codes`]: struct.UnwindInfo.html#method.unwind_codes
498 /// [chained unwind handler]: struct.UnwindInfo.html#structfield.chained_info
499 #[derive(Clone)]
500 pub struct UnwindInfo<'a> {
501     /// Version of this unwind info.
502     pub version: u8,
503 
504     /// Length of the function prolog in bytes.
505     pub size_of_prolog: u8,
506 
507     /// The nonvolatile register used as the frame pointer of this function.
508     ///
509     /// If this register is non-zero, all stack frame offsets used in unwind operations are of type
510     /// `StackFrameOffset::FP`. When loading these offsets, they have to be based off the value of
511     /// this frame register instead of the conventional RSP. This allows the RSP to be modified.
512     pub frame_register: Register,
513 
514     /// Offset from RSP that is applied to the FP register when it is established.
515     ///
516     /// When loading offsets of type `StackFrameOffset::FP` from the stack, this offset has to be
517     /// subtracted before loading the value since the actual RSP was lower by that amount in the
518     /// prolog.
519     pub frame_register_offset: u32,
520 
521     /// A record pointing to chained unwind information.
522     ///
523     /// If chained unwind info is present, then this unwind info is a secondary one and the linked
524     /// unwind info contains primary information. Chained info is useful in two situations. First,
525     /// it is used for noncontiguous code segments. Second, this mechanism is sometimes used to
526     /// group volatile register saves.
527     ///
528     /// The referenced unwind info can itself specify chained unwind information, until it arrives
529     /// at the root unwind info. Generally, the entire chain should be considered when unwinding.
530     pub chained_info: Option<RuntimeFunction>,
531 
532     /// An exception or termination handler called as part of the unwind.
533     pub handler: Option<UnwindHandler<'a>>,
534 
535     /// A list of unwind codes, sorted descending by code offset.
536     code_bytes: &'a [u8],
537 }
538 
539 impl<'a> UnwindInfo<'a> {
540     /// Parses unwind information from the image at the given offset.
parse(bytes: &'a [u8], mut offset: usize) -> error::Result<Self>541     pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<Self> {
542         // Read the version and flags fields, which are combined into a single byte.
543         let version_flags: u8 = bytes.gread_with(&mut offset, scroll::LE)?;
544         let version = version_flags & 0b111;
545         let flags = version_flags >> 3;
546 
547         if version < 1 || version > 2 {
548             let msg = format!("unsupported unwind code version ({})", version);
549             return Err(error::Error::Malformed(msg));
550         }
551 
552         let size_of_prolog = bytes.gread_with::<u8>(&mut offset, scroll::LE)?;
553         let count_of_codes = bytes.gread_with::<u8>(&mut offset, scroll::LE)?;
554 
555         // Parse the frame register and frame register offset values, that are combined into a
556         // single byte.
557         let frame_info = bytes.gread_with::<u8>(&mut offset, scroll::LE)?;
558         // If nonzero, then the function uses a frame pointer (FP), and this field is the number
559         // of the nonvolatile register used as the frame pointer. The zero register value does
560         // not need special casing since it will not be referenced by the unwind operations.
561         let frame_register = Register(frame_info & 0xf);
562         // The the scaled offset from RSP that is applied to the FP register when it's
563         // established. The actual FP register is set to RSP + 16 * this number, allowing
564         // offsets from 0 to 240.
565         let frame_register_offset = u32::from((frame_info >> 4) * 16);
566 
567         // An array of items that explains the effect of the prolog on the nonvolatile registers and
568         // RSP. Some unwind codes require more than one slot in the array.
569         let codes_size = count_of_codes as usize * UNWIND_CODE_SIZE;
570         let code_bytes = bytes.gread_with(&mut offset, codes_size)?;
571 
572         // For alignment purposes, the codes array always has an even number of entries, and the
573         // final entry is potentially unused. In that case, the array is one longer than indicated
574         // by the count of unwind codes field.
575         if count_of_codes % 2 != 0 {
576             offset += 2;
577         }
578         debug_assert!(offset % 4 == 0);
579 
580         let mut chained_info = None;
581         let mut handler = None;
582 
583         // If flag UNW_FLAG_CHAININFO is set then the UNWIND_INFO structure ends with three UWORDs.
584         // These UWORDs represent the RUNTIME_FUNCTION information for the function of the chained
585         // unwind.
586         if flags & UNW_FLAG_CHAININFO != 0 {
587             chained_info = Some(bytes.gread_with(&mut offset, scroll::LE)?);
588 
589         // The relative address of the language-specific handler is present in the UNWIND_INFO
590         // whenever flags UNW_FLAG_EHANDLER or UNW_FLAG_UHANDLER are set. The language-specific
591         // handler is called as part of the search for an exception handler or as part of an unwind.
592         } else if flags & (UNW_FLAG_EHANDLER | UNW_FLAG_UHANDLER) != 0 {
593             let offset = bytes.gread_with::<u32>(&mut offset, scroll::LE)? as usize;
594             let data = &bytes[offset..];
595 
596             handler = Some(if flags & UNW_FLAG_EHANDLER != 0 {
597                 UnwindHandler::ExceptionHandler(offset as u32, data)
598             } else {
599                 UnwindHandler::TerminationHandler(offset as u32, data)
600             });
601         }
602 
603         Ok(UnwindInfo {
604             version,
605             size_of_prolog,
606             frame_register,
607             frame_register_offset,
608             chained_info,
609             handler,
610             code_bytes,
611         })
612     }
613 
614     /// Returns an iterator over unwind codes in this unwind info.
615     ///
616     /// Unwind codes are iterated in descending `code_offset` order suitable for unwinding. If the
617     /// optional [`chained_info`] is present, codes of that unwind info should be interpreted
618     /// immediately afterwards.
unwind_codes(&self) -> UnwindCodeIterator<'a>619     pub fn unwind_codes(&self) -> UnwindCodeIterator<'a> {
620         UnwindCodeIterator {
621             bytes: self.code_bytes,
622             offset: 0,
623             context: UnwindOpContext {
624                 version: self.version,
625                 frame_register: self.frame_register,
626             },
627         }
628     }
629 }
630 
631 impl fmt::Debug for UnwindInfo<'_> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result632     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
633         let count_of_codes = self.code_bytes.len() / UNWIND_CODE_SIZE;
634 
635         f.debug_struct("UnwindInfo")
636             .field("version", &self.version)
637             .field("size_of_prolog", &self.size_of_prolog)
638             .field("frame_register", &self.frame_register)
639             .field("frame_register_offset", &self.frame_register_offset)
640             .field("count_of_codes", &count_of_codes)
641             .field("chained_info", &self.chained_info)
642             .field("handler", &self.handler)
643             .finish()
644     }
645 }
646 
647 impl<'a> IntoIterator for &'_ UnwindInfo<'a> {
648     type Item = error::Result<UnwindCode>;
649     type IntoIter = UnwindCodeIterator<'a>;
650 
651     #[inline]
into_iter(self) -> Self::IntoIter652     fn into_iter(self) -> Self::IntoIter {
653         self.unwind_codes()
654     }
655 }
656 
657 /// Exception handling and stack unwind information for functions in the image.
658 pub struct ExceptionData<'a> {
659     bytes: &'a [u8],
660     offset: usize,
661     size: usize,
662     file_alignment: u32,
663 }
664 
665 impl<'a> ExceptionData<'a> {
666     /// Parses exception data from the image at the given offset.
parse( bytes: &'a [u8], directory: data_directories::DataDirectory, sections: &[section_table::SectionTable], file_alignment: u32, ) -> error::Result<Self>667     pub fn parse(
668         bytes: &'a [u8],
669         directory: data_directories::DataDirectory,
670         sections: &[section_table::SectionTable],
671         file_alignment: u32,
672     ) -> error::Result<Self> {
673         let size = directory.size as usize;
674 
675         if size % RUNTIME_FUNCTION_SIZE != 0 {
676             return Err(error::Error::from(scroll::Error::BadInput {
677                 size,
678                 msg: "invalid exception directory table size",
679             }));
680         }
681 
682         let rva = directory.virtual_address as usize;
683         let offset = utils::find_offset(rva, sections, file_alignment).ok_or_else(|| {
684             error::Error::Malformed(format!("cannot map exception_rva ({:#x}) into offset", rva))
685         })?;
686 
687         if offset % 4 != 0 {
688             return Err(error::Error::from(scroll::Error::BadOffset(offset)));
689         }
690 
691         Ok(ExceptionData {
692             bytes,
693             offset,
694             size,
695             file_alignment,
696         })
697     }
698 
699     /// The number of function entries described by this exception data.
len(&self) -> usize700     pub fn len(&self) -> usize {
701         self.size / RUNTIME_FUNCTION_SIZE
702     }
703 
704     /// Indicating whether there are functions in this entry.
is_empty(&self) -> bool705     pub fn is_empty(&self) -> bool {
706         self.len() == 0
707     }
708 
709     /// Iterates all function entries in order of their code offset.
710     ///
711     /// To search for a function by relative instruction address, use [`find_function`]. To resolve
712     /// unwind information, use [`get_unwind_info`].
713     ///
714     /// [`find_function`]: struct.ExceptionData.html#method.find_function
715     /// [`get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info
functions(&self) -> RuntimeFunctionIterator<'a>716     pub fn functions(&self) -> RuntimeFunctionIterator<'a> {
717         RuntimeFunctionIterator {
718             data: &self.bytes[self.offset..self.offset + self.size],
719         }
720     }
721 
722     /// Returns the function at the given index.
get_function(&self, index: usize) -> error::Result<RuntimeFunction>723     pub fn get_function(&self, index: usize) -> error::Result<RuntimeFunction> {
724         self.get_function_by_offset(self.offset + index * RUNTIME_FUNCTION_SIZE)
725     }
726 
727     /// Performs a binary search to find a function entry covering the given RVA relative to the
728     /// image.
find_function(&self, rva: u32) -> error::Result<Option<RuntimeFunction>>729     pub fn find_function(&self, rva: u32) -> error::Result<Option<RuntimeFunction>> {
730         // NB: Binary search implementation copied from std::slice::binary_search_by and adapted.
731         // Theoretically, there should be nothing that causes parsing runtime functions to fail and
732         // all access to the bytes buffer is guaranteed to be in range. However, since all other
733         // functions also return Results, this is much more ergonomic here.
734 
735         let mut size = self.len();
736         if size == 0 {
737             return Ok(None);
738         }
739 
740         let mut base = 0;
741         while size > 1 {
742             let half = size / 2;
743             let mid = base + half;
744             let offset = self.offset + mid * RUNTIME_FUNCTION_SIZE;
745             let addr = self.bytes.pread_with::<u32>(offset, scroll::LE)?;
746             base = if addr > rva { base } else { mid };
747             size -= half;
748         }
749 
750         let offset = self.offset + base * RUNTIME_FUNCTION_SIZE;
751         let addr = self.bytes.pread_with::<u32>(offset, scroll::LE)?;
752         let function = match addr.cmp(&rva) {
753             Ordering::Less | Ordering::Equal => self.get_function(base)?,
754             Ordering::Greater if base == 0 => return Ok(None),
755             Ordering::Greater => self.get_function(base - 1)?,
756         };
757 
758         if function.end_address > rva {
759             Ok(Some(function))
760         } else {
761             Ok(None)
762         }
763     }
764 
765     /// Resolves unwind information for the given function entry.
get_unwind_info( &self, mut function: RuntimeFunction, sections: &[section_table::SectionTable], ) -> error::Result<UnwindInfo<'a>>766     pub fn get_unwind_info(
767         &self,
768         mut function: RuntimeFunction,
769         sections: &[section_table::SectionTable],
770     ) -> error::Result<UnwindInfo<'a>> {
771         while function.unwind_info_address % 2 != 0 {
772             let rva = (function.unwind_info_address & !1) as usize;
773             function = self.get_function_by_rva(rva, sections)?;
774         }
775 
776         let rva = function.unwind_info_address as usize;
777         let offset = utils::find_offset(rva, sections, self.file_alignment).ok_or_else(|| {
778             error::Error::Malformed(format!("cannot map unwind rva ({:#x}) into offset", rva))
779         })?;
780 
781         UnwindInfo::parse(self.bytes, offset)
782     }
783 
get_function_by_rva( &self, rva: usize, sections: &[section_table::SectionTable], ) -> error::Result<RuntimeFunction>784     fn get_function_by_rva(
785         &self,
786         rva: usize,
787         sections: &[section_table::SectionTable],
788     ) -> error::Result<RuntimeFunction> {
789         let offset = utils::find_offset(rva, sections, self.file_alignment).ok_or_else(|| {
790             error::Error::Malformed(format!("cannot map exception rva ({:#x}) into offset", rva))
791         })?;
792 
793         self.get_function_by_offset(offset)
794     }
795 
796     #[inline]
get_function_by_offset(&self, offset: usize) -> error::Result<RuntimeFunction>797     fn get_function_by_offset(&self, offset: usize) -> error::Result<RuntimeFunction> {
798         debug_assert!((offset - self.offset) % RUNTIME_FUNCTION_SIZE == 0);
799         debug_assert!(offset < self.offset + self.size);
800 
801         Ok(self.bytes.pread_with(offset, scroll::LE)?)
802     }
803 }
804 
805 impl fmt::Debug for ExceptionData<'_> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result806     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
807         f.debug_struct("ExceptionData")
808             .field("file_alignment", &self.file_alignment)
809             .field("offset", &format_args!("{:#x}", self.offset))
810             .field("size", &format_args!("{:#x}", self.size))
811             .field("len", &self.len())
812             .finish()
813     }
814 }
815 
816 impl<'a> IntoIterator for &'_ ExceptionData<'a> {
817     type Item = error::Result<RuntimeFunction>;
818     type IntoIter = RuntimeFunctionIterator<'a>;
819 
820     #[inline]
into_iter(self) -> Self::IntoIter821     fn into_iter(self) -> Self::IntoIter {
822         self.functions()
823     }
824 }
825 
826 #[cfg(test)]
827 mod tests {
828     use super::*;
829 
830     #[test]
test_size_of_runtime_function()831     fn test_size_of_runtime_function() {
832         assert_eq!(
833             std::mem::size_of::<RuntimeFunction>(),
834             RUNTIME_FUNCTION_SIZE
835         );
836     }
837 
838     // Tests disabled until there is a solution for handling binary test data
839     // See https://github.com/m4b/goblin/issues/185
840 
841     // macro_rules! microsoft_symbol {
842     //     ($name:literal, $id:literal) => {{
843     //         use std::fs::File;
844     //         use std::path::Path;
845 
846     //         let path = Path::new(concat!("cache/", $name));
847     //         if !path.exists() {
848     //             let url = format!(
849     //                 "https://msdl.microsoft.com/download/symbols/{}/{}/{}",
850     //                 $name, $id, $name
851     //             );
852 
853     //             let mut response = reqwest::get(&url).expect(concat!("get ", $name));
854     //             let mut target = File::create(path).expect(concat!("create ", $name));
855     //             response
856     //                 .copy_to(&mut target)
857     //                 .expect(concat!("download ", $name));
858     //         }
859 
860     //         std::fs::read(path).expect(concat!("open ", $name))
861     //     }};
862     // }
863 
864     // lazy_static::lazy_static! {
865     //     static ref PE_DATA: Vec<u8> = microsoft_symbol!("WSHTCPIP.DLL", "4a5be0b77000");
866     // }
867 
868     // #[test]
869     // fn test_parse() {
870     //     let pe = PE::parse(&PE_DATA).expect("parse PE");
871     //     let exception_data = pe.exception_data.expect("get exception data");
872 
873     //     assert_eq!(exception_data.len(), 19);
874     //     assert!(!exception_data.is_empty());
875     // }
876 
877     // #[test]
878     // fn test_iter_functions() {
879     //     let pe = PE::parse(&PE_DATA).expect("parse PE");
880     //     let exception_data = pe.exception_data.expect("get exception data");
881 
882     //     let functions: Vec<RuntimeFunction> = exception_data
883     //         .functions()
884     //         .map(|result| result.expect("parse runtime function"))
885     //         .collect();
886 
887     //     assert_eq!(functions.len(), 19);
888 
889     //     let expected = RuntimeFunction {
890     //         begin_address: 0x1355,
891     //         end_address: 0x1420,
892     //         unwind_info_address: 0x4019,
893     //     };
894 
895     //     assert_eq!(functions[4], expected);
896     // }
897 
898     // #[test]
899     // fn test_get_function() {
900     //     let pe = PE::parse(&PE_DATA).expect("parse PE");
901     //     let exception_data = pe.exception_data.expect("get exception data");
902 
903     //     let expected = RuntimeFunction {
904     //         begin_address: 0x1355,
905     //         end_address: 0x1420,
906     //         unwind_info_address: 0x4019,
907     //     };
908 
909     //     assert_eq!(
910     //         exception_data.get_function(4).expect("find function"),
911     //         expected
912     //     );
913     // }
914 
915     // #[test]
916     // fn test_find_function() {
917     //     let pe = PE::parse(&PE_DATA).expect("parse PE");
918     //     let exception_data = pe.exception_data.expect("get exception data");
919 
920     //     let expected = RuntimeFunction {
921     //         begin_address: 0x1355,
922     //         end_address: 0x1420,
923     //         unwind_info_address: 0x4019,
924     //     };
925 
926     //     assert_eq!(
927     //         exception_data.find_function(0x1400).expect("find function"),
928     //         Some(expected)
929     //     );
930     // }
931 
932     // #[test]
933     // fn test_find_function_none() {
934     //     let pe = PE::parse(&PE_DATA).expect("parse PE");
935     //     let exception_data = pe.exception_data.expect("get exception data");
936 
937     //     // 0x1d00 is the end address of the last function.
938 
939     //     assert_eq!(
940     //         exception_data.find_function(0x1d00).expect("find function"),
941     //         None
942     //     );
943     // }
944 
945     // #[test]
946     // fn test_get_unwind_info() {
947     //     let pe = PE::parse(&PE_DATA).expect("parse PE");
948     //     let exception_data = pe.exception_data.expect("get exception data");
949 
950     //     // runtime function #0 directly refers to unwind info
951     //     let rt_function = RuntimeFunction {
952     //         begin_address: 0x1010,
953     //         end_address: 0x1090,
954     //         unwind_info_address: 0x25d8,
955     //     };
956 
957     //     let unwind_info = exception_data
958     //         .get_unwind_info(rt_function, &pe.sections)
959     //         .expect("get unwind info");
960 
961     //     // Unwind codes just used to assert that the right unwind info was resolved
962     //     let expected = &[4, 98];
963 
964     //     assert_eq!(unwind_info.code_bytes, expected);
965     // }
966 
967     // #[test]
968     // fn test_get_unwind_info_redirect() {
969     //     let pe = PE::parse(&PE_DATA).expect("parse PE");
970     //     let exception_data = pe.exception_data.expect("get exception data");
971 
972     //     // runtime function #4 has a redirect (unwind_info_address & 1).
973     //     let rt_function = RuntimeFunction {
974     //         begin_address: 0x1355,
975     //         end_address: 0x1420,
976     //         unwind_info_address: 0x4019,
977     //     };
978 
979     //     let unwind_info = exception_data
980     //         .get_unwind_info(rt_function, &pe.sections)
981     //         .expect("get unwind info");
982 
983     //     // Unwind codes just used to assert that the right unwind info was resolved
984     //     let expected = &[
985     //         28, 100, 15, 0, 28, 84, 14, 0, 28, 52, 12, 0, 28, 82, 24, 240, 22, 224, 20, 208, 18,
986     //         192, 16, 112,
987     //     ];
988 
989     //     assert_eq!(unwind_info.code_bytes, expected);
990     // }
991 
992     #[test]
test_iter_unwind_codes()993     fn test_iter_unwind_codes() {
994         let unwind_info = UnwindInfo {
995             version: 1,
996             size_of_prolog: 4,
997             frame_register: Register(0),
998             frame_register_offset: 0,
999             chained_info: None,
1000             handler: None,
1001             code_bytes: &[4, 98],
1002         };
1003 
1004         let unwind_codes: Vec<UnwindCode> = unwind_info
1005             .unwind_codes()
1006             .map(|result| result.expect("parse unwind code"))
1007             .collect();
1008 
1009         assert_eq!(unwind_codes.len(), 1);
1010 
1011         let expected = UnwindCode {
1012             code_offset: 4,
1013             operation: UnwindOperation::Alloc(56),
1014         };
1015 
1016         assert_eq!(unwind_codes[0], expected);
1017     }
1018 }
1019