1 //! A library to help grow the stack when it runs out of space.
2 //!
3 //! This is an implementation of manually instrumented segmented stacks where points in a program's
4 //! control flow are annotated with "maybe grow the stack here". Each point of annotation indicates
5 //! how far away from the end of the stack it's allowed to be, plus the amount of stack to allocate
6 //! if it does reach the end.
7 //!
8 //! Once a program has reached the end of its stack, a temporary stack on the heap is allocated and
9 //! is switched to for the duration of a closure.
10 //!
11 //! For a set of lower-level primitives, consider the `psm` crate.
12 //!
13 //! # Examples
14 //!
15 //! ```
16 //! // Grow the stack if we are within the "red zone" of 32K, and if we allocate
17 //! // a new stack allocate 1MB of stack space.
18 //! //
19 //! // If we're already in bounds, just run the provided closure on current stack.
20 //! stacker::maybe_grow(32 * 1024, 1024 * 1024, || {
21 //!     // guaranteed to have at least 32K of stack
22 //! });
23 //! ```
24 
25 #![allow(improper_ctypes)]
26 
27 #[macro_use]
28 extern crate cfg_if;
29 extern crate libc;
30 #[cfg(windows)]
31 extern crate winapi;
32 #[macro_use]
33 extern crate psm;
34 
35 use std::cell::Cell;
36 
37 /// Grows the call stack if necessary.
38 ///
39 /// This function is intended to be called at manually instrumented points in a program where
40 /// recursion is known to happen quite a bit. This function will check to see if we're within
41 /// `red_zone` bytes of the end of the stack, and if so it will allocate a new stack of at least
42 /// `stack_size` bytes.
43 ///
44 /// The closure `f` is guaranteed to run on a stack with at least `red_zone` bytes, and it will be
45 /// run on the current stack if there's space available.
46 #[inline(always)]
maybe_grow<R, F: FnOnce() -> R>(red_zone: usize, stack_size: usize, callback: F) -> R47 pub fn maybe_grow<R, F: FnOnce() -> R>(red_zone: usize, stack_size: usize, callback: F) -> R {
48     // if we can't guess the remaining stack (unsupported on some platforms) we immediately grow
49     // the stack and then cache the new stack size (which we do know now because we allocated it.
50     let enough_space = match remaining_stack() {
51         Some(remaining) => remaining >= red_zone,
52         None => false,
53     };
54     if enough_space {
55         callback()
56     } else {
57         grow(stack_size, callback)
58     }
59 }
60 
61 /// Always creates a new stack for the passed closure to run on.
62 /// The closure will still be on the same thread as the caller of `grow`.
63 /// This will allocate a new stack with at least `stack_size` bytes.
grow<R, F: FnOnce() -> R>(stack_size: usize, callback: F) -> R64 pub fn grow<R, F: FnOnce() -> R>(stack_size: usize, callback: F) -> R {
65     // To avoid monomorphizing `_grow()` and everything it calls,
66     // we convert the generic callback to a dynamic one.
67     let mut opt_callback = Some(callback);
68     let mut ret = None;
69     let ret_ref = &mut ret;
70 
71     // This wrapper around `callback` achieves two things:
72     // * It converts the `impl FnOnce` to a `dyn FnMut`.
73     //   `dyn` because we want it to not be generic, and
74     //   `FnMut` because we can't pass a `dyn FnOnce` around without boxing it.
75     // * It eliminates the generic return value, by writing it to the stack of this function.
76     //   Otherwise the closure would have to return an unsized value, which isn't possible.
77     let dyn_callback: &mut dyn FnMut() = &mut || {
78         let taken_callback = opt_callback.take().unwrap();
79         *ret_ref = Some(taken_callback());
80     };
81 
82     _grow(stack_size, dyn_callback);
83     ret.unwrap()
84 }
85 
86 /// Queries the amount of remaining stack as interpreted by this library.
87 ///
88 /// This function will return the amount of stack space left which will be used
89 /// to determine whether a stack switch should be made or not.
remaining_stack() -> Option<usize>90 pub fn remaining_stack() -> Option<usize> {
91     let current_ptr = current_stack_ptr();
92     get_stack_limit().map(|limit| current_ptr - limit)
93 }
94 
95 psm_stack_information! (
96     yes {
97         fn current_stack_ptr() -> usize {
98             psm::stack_pointer() as usize
99         }
100     }
101     no {
102         #[inline(always)]
103         fn current_stack_ptr() -> usize {
104             unsafe {
105                 let mut x = std::mem::MaybeUninit::<u8>::uninit();
106                 // Unlikely to be ever exercised. As a fallback we execute a volatile read to a
107                 // local (to hopefully defeat the optimisations that would make this local a static
108                 // global) and take its address. This way we get a very approximate address of the
109                 // current frame.
110                 x.as_mut_ptr().write_volatile(42);
111                 x.as_ptr() as usize
112             }
113         }
114     }
115 );
116 
117 thread_local! {
118     static STACK_LIMIT: Cell<Option<usize>> = Cell::new(unsafe {
119         guess_os_stack_limit()
120     })
121 }
122 
123 #[inline(always)]
get_stack_limit() -> Option<usize>124 fn get_stack_limit() -> Option<usize> {
125     STACK_LIMIT.with(|s| s.get())
126 }
127 
128 #[inline(always)]
129 #[allow(unused)]
set_stack_limit(l: Option<usize>)130 fn set_stack_limit(l: Option<usize>) {
131     STACK_LIMIT.with(|s| s.set(l))
132 }
133 
134 psm_stack_manipulation! {
135     yes {
136         struct StackRestoreGuard {
137             new_stack: *mut std::ffi::c_void,
138             stack_bytes: usize,
139             old_stack_limit: Option<usize>,
140         }
141 
142         impl StackRestoreGuard {
143             #[cfg(target_arch = "wasm32")]
144             unsafe fn new(stack_bytes: usize, _page_size: usize) -> StackRestoreGuard {
145                 let layout = std::alloc::Layout::from_size_align(stack_bytes, 16).unwrap();
146                 let ptr = std::alloc::alloc(layout);
147                 assert!(!ptr.is_null(), "unable to allocate stack");
148                 StackRestoreGuard {
149                     new_stack: ptr as *mut _,
150                     stack_bytes,
151                     old_stack_limit: get_stack_limit(),
152                 }
153             }
154 
155             #[cfg(not(target_arch = "wasm32"))]
156             unsafe fn new(stack_bytes: usize, page_size: usize) -> StackRestoreGuard {
157                 let new_stack = libc::mmap(
158                     std::ptr::null_mut(),
159                     stack_bytes,
160                     libc::PROT_NONE,
161                     libc::MAP_PRIVATE |
162                     libc::MAP_ANON,
163                     -1, // Some implementations assert fd = -1 if MAP_ANON is specified
164                     0
165                 );
166                 if new_stack == libc::MAP_FAILED {
167                     panic!("unable to allocate stack")
168                 }
169                 let guard = StackRestoreGuard {
170                     new_stack,
171                     stack_bytes,
172                     old_stack_limit: get_stack_limit(),
173                 };
174                 let above_guard_page = new_stack.add(page_size);
175                 #[cfg(not(target_os = "openbsd"))]
176                 let result = libc::mprotect(
177                     above_guard_page,
178                     stack_bytes - page_size,
179                     libc::PROT_READ | libc::PROT_WRITE
180                 );
181                 #[cfg(target_os = "openbsd")]
182                 let result = if libc::mmap(
183                         above_guard_page,
184                         stack_bytes - page_size,
185                         libc::PROT_READ | libc::PROT_WRITE,
186                         libc::MAP_FIXED | libc::MAP_PRIVATE | libc::MAP_ANON | libc::MAP_STACK,
187                         -1,
188                         0) == above_guard_page {
189                     0
190                 } else {
191                     -1
192                 };
193                 if result == -1 {
194                     drop(guard);
195                     panic!("unable to set stack permissions")
196                 }
197                 guard
198             }
199         }
200 
201         impl Drop for StackRestoreGuard {
202             fn drop(&mut self) {
203                 #[cfg(target_arch = "wasm32")]
204                 unsafe {
205                     std::alloc::dealloc(
206                         self.new_stack as *mut u8,
207                         std::alloc::Layout::from_size_align_unchecked(self.stack_bytes, 16),
208                     );
209                 }
210                 #[cfg(not(target_arch = "wasm32"))]
211                 unsafe {
212                     // FIXME: check the error code and decide what to do with it.
213                     // Perhaps a debug_assertion?
214                     libc::munmap(self.new_stack, self.stack_bytes);
215                 }
216                 set_stack_limit(self.old_stack_limit);
217             }
218         }
219 
220         fn _grow(stack_size: usize, callback: &mut dyn FnMut()) {
221             // Calculate a number of pages we want to allocate for the new stack.
222             // For maximum portability we want to produce a stack that is aligned to a page and has
223             // a size that’s a multiple of page size. Furthermore we want to allocate two extras pages
224             // for the stack guard. To achieve that we do our calculations in number of pages and
225             // convert to bytes last.
226             let page_size = page_size();
227             let requested_pages = stack_size
228                 .checked_add(page_size - 1)
229                 .expect("unreasonably large stack requested") / page_size;
230             let stack_pages = std::cmp::max(1, requested_pages) + 2;
231             let stack_bytes = stack_pages.checked_mul(page_size)
232                 .expect("unreasonably large stack requesteed");
233 
234             // Next, there are a couple of approaches to how we allocate the new stack. We take the
235             // most obvious path and use `mmap`. We also `mprotect` a guard page into our
236             // allocation.
237             //
238             // We use a guard pattern to ensure we deallocate the allocated stack when we leave
239             // this function and also try to uphold various safety invariants required by `psm`
240             // (such as not unwinding from the callback we pass to it).
241             //
242             // Other than that this code has no meaningful gotchas.
243             unsafe {
244                 let guard = StackRestoreGuard::new(stack_bytes, page_size);
245                 let above_guard_page = guard.new_stack.add(page_size);
246                 set_stack_limit(Some(above_guard_page as usize));
247                 let panic = psm::on_stack(above_guard_page as *mut _, stack_size, move || {
248                     std::panic::catch_unwind(std::panic::AssertUnwindSafe(callback)).err()
249                 });
250                 drop(guard);
251                 if let Some(p) = panic {
252                     std::panic::resume_unwind(p);
253                 }
254             }
255         }
256 
257         fn page_size() -> usize {
258             // FIXME: consider caching the page size.
259             #[cfg(not(target_arch = "wasm32"))]
260             unsafe { libc::sysconf(libc::_SC_PAGE_SIZE) as usize }
261             #[cfg(target_arch = "wasm32")]
262             { 65536 }
263         }
264     }
265 
266     no {
267         #[cfg(not(windows))]
268         fn _grow(stack_size: usize, callback: &mut dyn FnMut()) {
269             drop(stack_size);
270             callback();
271         }
272     }
273 }
274 
275 cfg_if! {
276     if #[cfg(windows)] {
277         use std::ptr;
278         use std::io;
279 
280         use winapi::shared::basetsd::*;
281         use winapi::shared::minwindef::{LPVOID, BOOL};
282         use winapi::shared::ntdef::*;
283         use winapi::um::fibersapi::*;
284         use winapi::um::memoryapi::*;
285         use winapi::um::processthreadsapi::*;
286         use winapi::um::winbase::*;
287 
288         // Make sure the libstacker.a (implemented in C) is linked.
289         // See https://github.com/rust-lang/rust/issues/65610
290         #[link(name="stacker")]
291         extern {
292             fn __stacker_get_current_fiber() -> PVOID;
293         }
294 
295         struct FiberInfo<F> {
296             callback: std::mem::MaybeUninit<F>,
297             panic: Option<Box<dyn std::any::Any + Send + 'static>>,
298             parent_fiber: LPVOID,
299         }
300 
301         unsafe extern "system" fn fiber_proc<F: FnOnce()>(data: LPVOID) {
302             // This function is the entry point to our inner fiber, and as argument we get an
303             // instance of `FiberInfo`. We will set-up the "runtime" for the callback and execute
304             // it.
305             let data = &mut *(data as *mut FiberInfo<F>);
306             let old_stack_limit = get_stack_limit();
307             set_stack_limit(guess_os_stack_limit());
308             let callback = data.callback.as_ptr();
309             data.panic = std::panic::catch_unwind(std::panic::AssertUnwindSafe(callback.read())).err();
310 
311             // Restore to the previous Fiber
312             set_stack_limit(old_stack_limit);
313             SwitchToFiber(data.parent_fiber);
314             return;
315         }
316 
317         fn _grow(stack_size: usize, callback: &mut dyn FnMut()) {
318             // Fibers (or stackful coroutines) is the only official way to create new stacks on the
319             // same thread on Windows. So in order to extend the stack we create fiber and switch
320             // to it so we can use it's stack. After running `callback` within our fiber, we switch
321             // back to the current stack and destroy the fiber and its associated stack.
322             unsafe {
323                 let was_fiber = IsThreadAFiber() == TRUE as BOOL;
324                 let mut data = FiberInfo {
325                     callback: std::mem::MaybeUninit::new(callback),
326                     panic: None,
327                     parent_fiber: {
328                         if was_fiber {
329                             // Get a handle to the current fiber. We need to use a C implementation
330                             // for this as GetCurrentFiber is an header only function.
331                             __stacker_get_current_fiber()
332                         } else {
333                             // Convert the current thread to a fiber, so we are able to switch back
334                             // to the current stack. Threads coverted to fibers still act like
335                             // regular threads, but they have associated fiber data. We later
336                             // convert it back to a regular thread and free the fiber data.
337                             ConvertThreadToFiber(ptr::null_mut())
338                         }
339                     },
340                 };
341 
342                 if data.parent_fiber.is_null() {
343                     panic!("unable to convert thread to fiber: {}", io::Error::last_os_error());
344                 }
345 
346                 let fiber = CreateFiber(
347                     stack_size as SIZE_T,
348                     Some(fiber_proc::<&mut dyn FnMut()>),
349                     &mut data as *mut FiberInfo<&mut dyn FnMut()> as *mut _,
350                 );
351                 if fiber.is_null() {
352                     panic!("unable to allocate fiber: {}", io::Error::last_os_error());
353                 }
354 
355                 // Switch to the fiber we created. This changes stacks and starts executing
356                 // fiber_proc on it. fiber_proc will run `callback` and then switch back to run the
357                 // next statement.
358                 SwitchToFiber(fiber);
359                 DeleteFiber(fiber);
360 
361                 // Clean-up.
362                 if !was_fiber {
363                     if ConvertFiberToThread() == 0 {
364                         // FIXME: Perhaps should not panic here?
365                         panic!("unable to convert back to thread: {}", io::Error::last_os_error());
366                     }
367                 }
368                 if let Some(p) = data.panic {
369                     std::panic::resume_unwind(p);
370                 }
371             }
372         }
373 
374         #[inline(always)]
375         fn get_thread_stack_guarantee() -> usize {
376             let min_guarantee = if cfg!(target_pointer_width = "32") {
377                 0x1000
378             } else {
379                 0x2000
380             };
381             let mut stack_guarantee = 0;
382             unsafe {
383                 // Read the current thread stack guarantee
384                 // This is the stack reserved for stack overflow
385                 // exception handling.
386                 // This doesn't return the true value so we need
387                 // some further logic to calculate the real stack
388                 // guarantee. This logic is what is used on x86-32 and
389                 // x86-64 Windows 10. Other versions and platforms may differ
390                 SetThreadStackGuarantee(&mut stack_guarantee)
391             };
392             std::cmp::max(stack_guarantee, min_guarantee) as usize + 0x1000
393         }
394 
395         #[inline(always)]
396         unsafe fn guess_os_stack_limit() -> Option<usize> {
397             // Query the allocation which contains our stack pointer in order
398             // to discover the size of the stack
399             //
400             // FIXME: we could read stack base from the TIB, specifically the 3rd element of it.
401             type QueryT = winapi::um::winnt::MEMORY_BASIC_INFORMATION;
402             let mut mi = std::mem::MaybeUninit::<QueryT>::uninit();
403             VirtualQuery(
404                 psm::stack_pointer() as *const _,
405                 mi.as_mut_ptr(),
406                 std::mem::size_of::<QueryT>() as SIZE_T,
407             );
408             Some(mi.assume_init().AllocationBase as usize + get_thread_stack_guarantee() + 0x1000)
409         }
410     } else if #[cfg(any(target_os = "linux", target_os="solaris", target_os = "netbsd"))] {
411         unsafe fn guess_os_stack_limit() -> Option<usize> {
412             let mut attr = std::mem::MaybeUninit::<libc::pthread_attr_t>::uninit();
413             assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
414             assert_eq!(libc::pthread_getattr_np(libc::pthread_self(),
415                                                 attr.as_mut_ptr()), 0);
416             let mut stackaddr = std::ptr::null_mut();
417             let mut stacksize = 0;
418             assert_eq!(libc::pthread_attr_getstack(
419                 attr.as_ptr(), &mut stackaddr, &mut stacksize
420             ), 0);
421             assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
422             Some(stackaddr as usize)
423         }
424     } else if #[cfg(any(target_os = "freebsd", target_os = "dragonfly"))] {
425         unsafe fn guess_os_stack_limit() -> Option<usize> {
426             let mut attr = std::mem::MaybeUninit::<libc::pthread_attr_t>::uninit();
427             assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
428             assert_eq!(libc::pthread_attr_get_np(libc::pthread_self(), attr.as_mut_ptr()), 0);
429             let mut stackaddr = std::ptr::null_mut();
430             let mut stacksize = 0;
431             assert_eq!(libc::pthread_attr_getstack(
432                 attr.as_ptr(), &mut stackaddr, &mut stacksize
433             ), 0);
434             assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
435             Some(stackaddr as usize)
436         }
437     } else if #[cfg(target_os = "openbsd")] {
438         unsafe fn guess_os_stack_limit() -> Option<usize> {
439             let mut stackinfo = std::mem::MaybeUninit::<libc::stack_t>::uninit();
440             assert_eq!(libc::pthread_stackseg_np(libc::pthread_self(), stackinfo.as_mut_ptr()), 0);
441             Some(stackinfo.assume_init().ss_sp as usize - stackinfo.assume_init().ss_size)
442         }
443     } else if #[cfg(target_os = "macos")] {
444         unsafe fn guess_os_stack_limit() -> Option<usize> {
445             Some(libc::pthread_get_stackaddr_np(libc::pthread_self()) as usize -
446                 libc::pthread_get_stacksize_np(libc::pthread_self()) as usize)
447         }
448     } else {
449         // fallback for other platforms is to always increase the stack if we're on
450         // the root stack. After we increased the stack once, we know the new stack
451         // size and don't need this pessimization anymore
452         #[inline(always)]
453         unsafe fn guess_os_stack_limit() -> Option<usize> {
454             None
455         }
456     }
457 }
458