1 //! A library to help grow the stack when it runs out of space.
2 //!
3 //! This is an implementation of manually instrumented segmented stacks where points in a program's
4 //! control flow are annotated with "maybe grow the stack here". Each point of annotation indicates
5 //! how far away from the end of the stack it's allowed to be, plus the amount of stack to allocate
6 //! if it does reach the end.
7 //!
8 //! Once a program has reached the end of its stack, a temporary stack on the heap is allocated and
9 //! is switched to for the duration of a closure.
10 //!
11 //! For a set of lower-level primitives, consider the `psm` crate.
12 //!
13 //! # Examples
14 //!
15 //! ```
16 //! // Grow the stack if we are within the "red zone" of 32K, and if we allocate
17 //! // a new stack allocate 1MB of stack space.
18 //! //
19 //! // If we're already in bounds, just run the provided closure on current stack.
20 //! stacker::maybe_grow(32 * 1024, 1024 * 1024, || {
21 //! // guaranteed to have at least 32K of stack
22 //! });
23 //! ```
24
25 #![allow(improper_ctypes)]
26
27 #[macro_use]
28 extern crate cfg_if;
29 extern crate libc;
30 #[cfg(windows)]
31 extern crate winapi;
32 #[macro_use]
33 extern crate psm;
34
35 use std::cell::Cell;
36
37 /// Grows the call stack if necessary.
38 ///
39 /// This function is intended to be called at manually instrumented points in a program where
40 /// recursion is known to happen quite a bit. This function will check to see if we're within
41 /// `red_zone` bytes of the end of the stack, and if so it will allocate a new stack of at least
42 /// `stack_size` bytes.
43 ///
44 /// The closure `f` is guaranteed to run on a stack with at least `red_zone` bytes, and it will be
45 /// run on the current stack if there's space available.
46 #[inline(always)]
maybe_grow<R, F: FnOnce() -> R>(red_zone: usize, stack_size: usize, callback: F) -> R47 pub fn maybe_grow<R, F: FnOnce() -> R>(red_zone: usize, stack_size: usize, callback: F) -> R {
48 // if we can't guess the remaining stack (unsupported on some platforms) we immediately grow
49 // the stack and then cache the new stack size (which we do know now because we allocated it.
50 let enough_space = match remaining_stack() {
51 Some(remaining) => remaining >= red_zone,
52 None => false,
53 };
54 if enough_space {
55 callback()
56 } else {
57 grow(stack_size, callback)
58 }
59 }
60
61 /// Always creates a new stack for the passed closure to run on.
62 /// The closure will still be on the same thread as the caller of `grow`.
63 /// This will allocate a new stack with at least `stack_size` bytes.
grow<R, F: FnOnce() -> R>(stack_size: usize, callback: F) -> R64 pub fn grow<R, F: FnOnce() -> R>(stack_size: usize, callback: F) -> R {
65 // To avoid monomorphizing `_grow()` and everything it calls,
66 // we convert the generic callback to a dynamic one.
67 let mut opt_callback = Some(callback);
68 let mut ret = None;
69 let ret_ref = &mut ret;
70
71 // This wrapper around `callback` achieves two things:
72 // * It converts the `impl FnOnce` to a `dyn FnMut`.
73 // `dyn` because we want it to not be generic, and
74 // `FnMut` because we can't pass a `dyn FnOnce` around without boxing it.
75 // * It eliminates the generic return value, by writing it to the stack of this function.
76 // Otherwise the closure would have to return an unsized value, which isn't possible.
77 let dyn_callback: &mut dyn FnMut() = &mut || {
78 let taken_callback = opt_callback.take().unwrap();
79 *ret_ref = Some(taken_callback());
80 };
81
82 _grow(stack_size, dyn_callback);
83 ret.unwrap()
84 }
85
86 /// Queries the amount of remaining stack as interpreted by this library.
87 ///
88 /// This function will return the amount of stack space left which will be used
89 /// to determine whether a stack switch should be made or not.
remaining_stack() -> Option<usize>90 pub fn remaining_stack() -> Option<usize> {
91 let current_ptr = current_stack_ptr();
92 get_stack_limit().map(|limit| current_ptr - limit)
93 }
94
95 psm_stack_information! (
96 yes {
97 fn current_stack_ptr() -> usize {
98 psm::stack_pointer() as usize
99 }
100 }
101 no {
102 #[inline(always)]
103 fn current_stack_ptr() -> usize {
104 unsafe {
105 let mut x = std::mem::MaybeUninit::<u8>::uninit();
106 // Unlikely to be ever exercised. As a fallback we execute a volatile read to a
107 // local (to hopefully defeat the optimisations that would make this local a static
108 // global) and take its address. This way we get a very approximate address of the
109 // current frame.
110 x.as_mut_ptr().write_volatile(42);
111 x.as_ptr() as usize
112 }
113 }
114 }
115 );
116
117 thread_local! {
118 static STACK_LIMIT: Cell<Option<usize>> = Cell::new(unsafe {
119 guess_os_stack_limit()
120 })
121 }
122
123 #[inline(always)]
get_stack_limit() -> Option<usize>124 fn get_stack_limit() -> Option<usize> {
125 STACK_LIMIT.with(|s| s.get())
126 }
127
128 #[inline(always)]
129 #[allow(unused)]
set_stack_limit(l: Option<usize>)130 fn set_stack_limit(l: Option<usize>) {
131 STACK_LIMIT.with(|s| s.set(l))
132 }
133
134 psm_stack_manipulation! {
135 yes {
136 struct StackRestoreGuard {
137 new_stack: *mut std::ffi::c_void,
138 stack_bytes: usize,
139 old_stack_limit: Option<usize>,
140 }
141
142 impl StackRestoreGuard {
143 #[cfg(target_arch = "wasm32")]
144 unsafe fn new(stack_bytes: usize, _page_size: usize) -> StackRestoreGuard {
145 let layout = std::alloc::Layout::from_size_align(stack_bytes, 16).unwrap();
146 let ptr = std::alloc::alloc(layout);
147 assert!(!ptr.is_null(), "unable to allocate stack");
148 StackRestoreGuard {
149 new_stack: ptr as *mut _,
150 stack_bytes,
151 old_stack_limit: get_stack_limit(),
152 }
153 }
154
155 #[cfg(not(target_arch = "wasm32"))]
156 unsafe fn new(stack_bytes: usize, page_size: usize) -> StackRestoreGuard {
157 let new_stack = libc::mmap(
158 std::ptr::null_mut(),
159 stack_bytes,
160 libc::PROT_NONE,
161 libc::MAP_PRIVATE |
162 libc::MAP_ANON,
163 -1, // Some implementations assert fd = -1 if MAP_ANON is specified
164 0
165 );
166 if new_stack == libc::MAP_FAILED {
167 panic!("unable to allocate stack")
168 }
169 let guard = StackRestoreGuard {
170 new_stack,
171 stack_bytes,
172 old_stack_limit: get_stack_limit(),
173 };
174 let above_guard_page = new_stack.add(page_size);
175 #[cfg(not(target_os = "openbsd"))]
176 let result = libc::mprotect(
177 above_guard_page,
178 stack_bytes - page_size,
179 libc::PROT_READ | libc::PROT_WRITE
180 );
181 #[cfg(target_os = "openbsd")]
182 let result = if libc::mmap(
183 above_guard_page,
184 stack_bytes - page_size,
185 libc::PROT_READ | libc::PROT_WRITE,
186 libc::MAP_FIXED | libc::MAP_PRIVATE | libc::MAP_ANON | libc::MAP_STACK,
187 -1,
188 0) == above_guard_page {
189 0
190 } else {
191 -1
192 };
193 if result == -1 {
194 drop(guard);
195 panic!("unable to set stack permissions")
196 }
197 guard
198 }
199 }
200
201 impl Drop for StackRestoreGuard {
202 fn drop(&mut self) {
203 #[cfg(target_arch = "wasm32")]
204 unsafe {
205 std::alloc::dealloc(
206 self.new_stack as *mut u8,
207 std::alloc::Layout::from_size_align_unchecked(self.stack_bytes, 16),
208 );
209 }
210 #[cfg(not(target_arch = "wasm32"))]
211 unsafe {
212 // FIXME: check the error code and decide what to do with it.
213 // Perhaps a debug_assertion?
214 libc::munmap(self.new_stack, self.stack_bytes);
215 }
216 set_stack_limit(self.old_stack_limit);
217 }
218 }
219
220 fn _grow(stack_size: usize, callback: &mut dyn FnMut()) {
221 // Calculate a number of pages we want to allocate for the new stack.
222 // For maximum portability we want to produce a stack that is aligned to a page and has
223 // a size that’s a multiple of page size. Furthermore we want to allocate two extras pages
224 // for the stack guard. To achieve that we do our calculations in number of pages and
225 // convert to bytes last.
226 let page_size = page_size();
227 let requested_pages = stack_size
228 .checked_add(page_size - 1)
229 .expect("unreasonably large stack requested") / page_size;
230 let stack_pages = std::cmp::max(1, requested_pages) + 2;
231 let stack_bytes = stack_pages.checked_mul(page_size)
232 .expect("unreasonably large stack requesteed");
233
234 // Next, there are a couple of approaches to how we allocate the new stack. We take the
235 // most obvious path and use `mmap`. We also `mprotect` a guard page into our
236 // allocation.
237 //
238 // We use a guard pattern to ensure we deallocate the allocated stack when we leave
239 // this function and also try to uphold various safety invariants required by `psm`
240 // (such as not unwinding from the callback we pass to it).
241 //
242 // Other than that this code has no meaningful gotchas.
243 unsafe {
244 let guard = StackRestoreGuard::new(stack_bytes, page_size);
245 let above_guard_page = guard.new_stack.add(page_size);
246 set_stack_limit(Some(above_guard_page as usize));
247 let panic = psm::on_stack(above_guard_page as *mut _, stack_size, move || {
248 std::panic::catch_unwind(std::panic::AssertUnwindSafe(callback)).err()
249 });
250 drop(guard);
251 if let Some(p) = panic {
252 std::panic::resume_unwind(p);
253 }
254 }
255 }
256
257 fn page_size() -> usize {
258 // FIXME: consider caching the page size.
259 #[cfg(not(target_arch = "wasm32"))]
260 unsafe { libc::sysconf(libc::_SC_PAGE_SIZE) as usize }
261 #[cfg(target_arch = "wasm32")]
262 { 65536 }
263 }
264 }
265
266 no {
267 #[cfg(not(windows))]
268 fn _grow(stack_size: usize, callback: &mut dyn FnMut()) {
269 drop(stack_size);
270 callback();
271 }
272 }
273 }
274
275 cfg_if! {
276 if #[cfg(windows)] {
277 use std::ptr;
278 use std::io;
279
280 use winapi::shared::basetsd::*;
281 use winapi::shared::minwindef::{LPVOID, BOOL};
282 use winapi::shared::ntdef::*;
283 use winapi::um::fibersapi::*;
284 use winapi::um::memoryapi::*;
285 use winapi::um::processthreadsapi::*;
286 use winapi::um::winbase::*;
287
288 // Make sure the libstacker.a (implemented in C) is linked.
289 // See https://github.com/rust-lang/rust/issues/65610
290 #[link(name="stacker")]
291 extern {
292 fn __stacker_get_current_fiber() -> PVOID;
293 }
294
295 struct FiberInfo<F> {
296 callback: std::mem::MaybeUninit<F>,
297 panic: Option<Box<dyn std::any::Any + Send + 'static>>,
298 parent_fiber: LPVOID,
299 }
300
301 unsafe extern "system" fn fiber_proc<F: FnOnce()>(data: LPVOID) {
302 // This function is the entry point to our inner fiber, and as argument we get an
303 // instance of `FiberInfo`. We will set-up the "runtime" for the callback and execute
304 // it.
305 let data = &mut *(data as *mut FiberInfo<F>);
306 let old_stack_limit = get_stack_limit();
307 set_stack_limit(guess_os_stack_limit());
308 let callback = data.callback.as_ptr();
309 data.panic = std::panic::catch_unwind(std::panic::AssertUnwindSafe(callback.read())).err();
310
311 // Restore to the previous Fiber
312 set_stack_limit(old_stack_limit);
313 SwitchToFiber(data.parent_fiber);
314 return;
315 }
316
317 fn _grow(stack_size: usize, callback: &mut dyn FnMut()) {
318 // Fibers (or stackful coroutines) is the only official way to create new stacks on the
319 // same thread on Windows. So in order to extend the stack we create fiber and switch
320 // to it so we can use it's stack. After running `callback` within our fiber, we switch
321 // back to the current stack and destroy the fiber and its associated stack.
322 unsafe {
323 let was_fiber = IsThreadAFiber() == TRUE as BOOL;
324 let mut data = FiberInfo {
325 callback: std::mem::MaybeUninit::new(callback),
326 panic: None,
327 parent_fiber: {
328 if was_fiber {
329 // Get a handle to the current fiber. We need to use a C implementation
330 // for this as GetCurrentFiber is an header only function.
331 __stacker_get_current_fiber()
332 } else {
333 // Convert the current thread to a fiber, so we are able to switch back
334 // to the current stack. Threads coverted to fibers still act like
335 // regular threads, but they have associated fiber data. We later
336 // convert it back to a regular thread and free the fiber data.
337 ConvertThreadToFiber(ptr::null_mut())
338 }
339 },
340 };
341
342 if data.parent_fiber.is_null() {
343 panic!("unable to convert thread to fiber: {}", io::Error::last_os_error());
344 }
345
346 let fiber = CreateFiber(
347 stack_size as SIZE_T,
348 Some(fiber_proc::<&mut dyn FnMut()>),
349 &mut data as *mut FiberInfo<&mut dyn FnMut()> as *mut _,
350 );
351 if fiber.is_null() {
352 panic!("unable to allocate fiber: {}", io::Error::last_os_error());
353 }
354
355 // Switch to the fiber we created. This changes stacks and starts executing
356 // fiber_proc on it. fiber_proc will run `callback` and then switch back to run the
357 // next statement.
358 SwitchToFiber(fiber);
359 DeleteFiber(fiber);
360
361 // Clean-up.
362 if !was_fiber {
363 if ConvertFiberToThread() == 0 {
364 // FIXME: Perhaps should not panic here?
365 panic!("unable to convert back to thread: {}", io::Error::last_os_error());
366 }
367 }
368 if let Some(p) = data.panic {
369 std::panic::resume_unwind(p);
370 }
371 }
372 }
373
374 #[inline(always)]
375 fn get_thread_stack_guarantee() -> usize {
376 let min_guarantee = if cfg!(target_pointer_width = "32") {
377 0x1000
378 } else {
379 0x2000
380 };
381 let mut stack_guarantee = 0;
382 unsafe {
383 // Read the current thread stack guarantee
384 // This is the stack reserved for stack overflow
385 // exception handling.
386 // This doesn't return the true value so we need
387 // some further logic to calculate the real stack
388 // guarantee. This logic is what is used on x86-32 and
389 // x86-64 Windows 10. Other versions and platforms may differ
390 SetThreadStackGuarantee(&mut stack_guarantee)
391 };
392 std::cmp::max(stack_guarantee, min_guarantee) as usize + 0x1000
393 }
394
395 #[inline(always)]
396 unsafe fn guess_os_stack_limit() -> Option<usize> {
397 // Query the allocation which contains our stack pointer in order
398 // to discover the size of the stack
399 //
400 // FIXME: we could read stack base from the TIB, specifically the 3rd element of it.
401 type QueryT = winapi::um::winnt::MEMORY_BASIC_INFORMATION;
402 let mut mi = std::mem::MaybeUninit::<QueryT>::uninit();
403 VirtualQuery(
404 psm::stack_pointer() as *const _,
405 mi.as_mut_ptr(),
406 std::mem::size_of::<QueryT>() as SIZE_T,
407 );
408 Some(mi.assume_init().AllocationBase as usize + get_thread_stack_guarantee() + 0x1000)
409 }
410 } else if #[cfg(any(target_os = "linux", target_os="solaris", target_os = "netbsd"))] {
411 unsafe fn guess_os_stack_limit() -> Option<usize> {
412 let mut attr = std::mem::MaybeUninit::<libc::pthread_attr_t>::uninit();
413 assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
414 assert_eq!(libc::pthread_getattr_np(libc::pthread_self(),
415 attr.as_mut_ptr()), 0);
416 let mut stackaddr = std::ptr::null_mut();
417 let mut stacksize = 0;
418 assert_eq!(libc::pthread_attr_getstack(
419 attr.as_ptr(), &mut stackaddr, &mut stacksize
420 ), 0);
421 assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
422 Some(stackaddr as usize)
423 }
424 } else if #[cfg(any(target_os = "freebsd", target_os = "dragonfly"))] {
425 unsafe fn guess_os_stack_limit() -> Option<usize> {
426 let mut attr = std::mem::MaybeUninit::<libc::pthread_attr_t>::uninit();
427 assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
428 assert_eq!(libc::pthread_attr_get_np(libc::pthread_self(), attr.as_mut_ptr()), 0);
429 let mut stackaddr = std::ptr::null_mut();
430 let mut stacksize = 0;
431 assert_eq!(libc::pthread_attr_getstack(
432 attr.as_ptr(), &mut stackaddr, &mut stacksize
433 ), 0);
434 assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
435 Some(stackaddr as usize)
436 }
437 } else if #[cfg(target_os = "openbsd")] {
438 unsafe fn guess_os_stack_limit() -> Option<usize> {
439 let mut stackinfo = std::mem::MaybeUninit::<libc::stack_t>::uninit();
440 assert_eq!(libc::pthread_stackseg_np(libc::pthread_self(), stackinfo.as_mut_ptr()), 0);
441 Some(stackinfo.assume_init().ss_sp as usize - stackinfo.assume_init().ss_size)
442 }
443 } else if #[cfg(target_os = "macos")] {
444 unsafe fn guess_os_stack_limit() -> Option<usize> {
445 Some(libc::pthread_get_stackaddr_np(libc::pthread_self()) as usize -
446 libc::pthread_get_stacksize_np(libc::pthread_self()) as usize)
447 }
448 } else {
449 // fallback for other platforms is to always increase the stack if we're on
450 // the root stack. After we increased the stack once, we know the new stack
451 // size and don't need this pessimization anymore
452 #[inline(always)]
453 unsafe fn guess_os_stack_limit() -> Option<usize> {
454 None
455 }
456 }
457 }
458