Index: Stackless/platf/switch_amd64_unix.h =================================================================== --- Stackless/platf/switch_amd64_unix.h (revision 88911) +++ Stackless/platf/switch_amd64_unix.h (working copy) @@ -2,6 +2,8 @@ * this is the internal transfer function. * * HISTORY + * 24-Oct-11 Anselm Kruis + * Reworked based on the AMD64 ABI spec. * 26-Jul-10 Jeff Senn * Got this to work (rather than crash consistently) * on OS-X 10.6 by adding more registers to save set. @@ -36,26 +38,95 @@ #define STACK_MAGIC 0 -#define REGS_TO_SAVE "rdx", "rbx", "r12", "r13", "r14", "r15", "r9", "r8", "rdi", "rsi", "rcx", "rbp" +/* + * In order to switch the stack, we use the fact, that the compiler + * already knows how to preserve registers accross function calls. + * + * The relevant AMD64 ABI specifigation pecisely defines which registers + * must be preserved and which registers may be modified. + * We use a gcc inline assembly feature to pretend that the inline + * assembly block modifies the registers to be preserved. As a result, + * the compiler emits code to preserve those registers. + * + * The AMD64 ABI Draft 0.99.5, Figure 3.4 "Register usage" + * defines the following registers as "Preserved across + * function calls": + * %rbx callee-saved register; optionally used as base pointer + * %rsp stack pointer + * %rbp callee-saved register; optional used as frame pointer + * %r12 - %r15 callee-saved registers + * %mxcsr SSE2 control and status word + * %fcw x87 control word + * + * The compiler always preserves the %rsp register accros a function call. + * + * Depending on the usage of a frame pointer, which is optional + * for the AMD64 ABI, the compiler already preserves the %rbp + * register. Unfortunately, we must not add "rbp" to the clobber list, if + * rbp is used as a frame pointer (won't compile). Therefore we save + * rbp manually. + * + * For the other registers (except %mxcsr and %fcw) we tell the compiler, + * that we are going to clobber the registers. The compiler will then save the registers + * for us. (However the compiler gives no guarantee, when it will restore + * the registers.) And the compiler only preserves those registers, that must + * be preserved according to the calling convention. It does not preserve any other + * register, that may be modified during a function call. Therefore specifying additional + * registers has no effect at all. Take a look at the generated assembly code! + * + * If we want more control, we need to preserve the + * registers explicitly similar to %mxcsr, %fcw and %rbp. + * + */ -static int +#if 1 +/* Registers marked as clobbered, minimum set according to the ABI spec. */ +#define REGS_CLOBBERED "rbx", "r12", "r13", "r14", "r15" +#else +/* Maximum possible clobber list. It gives the same assembly code as the minimum list. + If the ABI evolves, it might be necessary to add some of these registers */ +#define REGS_CLOBBERED "memory", "rax", "rbx", "rcx", "rdx", "rsi", "rdi", \ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ + "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", \ + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", \ + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" +#endif + +/* + * You may want to make the function static enable optimizations. + * However, the ABI SPEC does not apply to static functions. Therefore + * I make slp_switch a regular global function. + */ +#if 0 +static +#endif +int slp_switch(void) { register long *stackref, stsizediff; - __asm__ volatile ("" : : : REGS_TO_SAVE); + void * rbp; int mxcsr; short x87cw; + __asm__ volatile ( + "fstcw %0\n\t" + "stmxcsr %1\n\t" + "movq %%rbp, %2\n\t" + : "=m" (x87cw), "=m" (mxcsr), "=m" (rbp) : : REGS_CLOBBERED ); __asm__ ("movq %%rsp, %0" : "=g" (stackref)); { SLP_SAVE_STATE(stackref, stsizediff); __asm__ volatile ( - "addq %0, %%rsp\n" - "addq %0, %%rbp\n" + "addq %0, %%rsp\n\t" + "addq %0, %%rbp\n\t" : : "r" (stsizediff) ); SLP_RESTORE_STATE(); + __asm__ volatile ( + "movq %2, %%rbp\n\t" + "ldmxcsr %1\n\t" + "fldcw %0\n\t" + : : "m" (x87cw), "m" (mxcsr), "m" (rbp)); return 0; } - __asm__ volatile ("" : : : REGS_TO_SAVE); } #endif