1// Copyright (c) 2007 Robert Perricone 2// Copyright (c) 2007-2016 Hartmut Kaiser 3// 4// Distributed under the Boost Software License, Version 1.0. 5// (See accompanying file LICENSE_1_0.txt or copy at 6// http://www.boost.org/LICENSE_1_0.txt) 7 8#if !(defined(__x86_64__) || defined(__amd64__)) 9#error This file is for x86 CPUs only. 10#endif 11 12#if !defined(__GNUC__) 13#error This file requires compilation with gcc. 14#endif 15 16// RDI is &from.sp 17// RSI is to.sp 18// 19// This is the simplest version of swapcontext 20// It saves registers on the old stack, saves the old stack pointer, 21// load the new stack pointer, pop registers from the new stack 22// and returns to new caller. 23// 24// RDI is set to be the parameter for the function to be called. 25// The first time RDI is the first parameter of the trampoline. 26// Otherwise it is simply discarded. 27// 28// NOTE: This function should work on any IA64 CPU. 29// NOTE: The biggest penalty is the last jump that 30// will be always mis-predicted (~50 cycles on P4). 31// 32// We try to make its address available as soon as possible 33// to try to reduce the penalty. Doing a return instead of a 34// 35// 'add $8, %esp' 36// 'jmp *%ecx' 37// 38// really kills performance. 39// 40// NOTE: popl is slightly better than mov+add to pop registers 41// so is pushl rather than mov+sub. 42 43#if defined(__APPLE__) 44#define HPX_COROUTINE_TYPE_DIRECTIVE(name) 45#else 46#define HPX_COROUTINE_TYPE_DIRECTIVE(name) ".type " #name ", @function\n\t" 47#endif 48 49// Note: .align 4 below means alignment at 2^4 boundary (16 bytes 50 51#define HPX_COROUTINE_SWAPCONTEXT(name) \ 52 asm ( \ 53 ".text \n\t" \ 54 ".align 4\n" \ 55 ".globl " #name "\n\t" \ 56 HPX_COROUTINE_TYPE_DIRECTIVE(name) \ 57 #name ":\n\t" \ 58 "movq 64(%rsi), %rcx\n\t" \ 59 "pushq %rbp\n\t" \ 60 "pushq %rbx\n\t" \ 61 "pushq %rax\n\t" \ 62 "pushq %rdx\n\t" \ 63 "pushq %r12\n\t" \ 64 "pushq %r13\n\t" \ 65 "pushq %r14\n\t" \ 66 "pushq %r15\n\t" \ 67 "movq %rsp, (%rdi)\n\t" \ 68 "movq %rsi, %rsp\n\t" \ 69 "popq %r15\n\t" \ 70 "popq %r14\n\t" \ 71 "popq %r13\n\t" \ 72 "popq %r12\n\t" \ 73 "popq %rdx\n\t" \ 74 "popq %rax\n\t" \ 75 "popq %rbx\n\t" \ 76 "popq %rbp\n\t" \ 77 "movq 80(%rsi), %rdi\n\t" \ 78 "add $8, %rsp\n\t" \ 79 "jmp *%rcx\n\t" \ 80 "ud2\n\t" \ 81 ) \ 82/**/ 83 84HPX_COROUTINE_SWAPCONTEXT(swapcontext_stack); 85HPX_COROUTINE_SWAPCONTEXT(swapcontext_stack2); 86 87#undef HPX_COROUTINE_SWAPCONTEXT 88#undef HPX_COROUTINE_TYPE_DIRECTIVE 89 90