lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


On Thursday 28, Mike Pall wrote:
> Robert G. Jakabosky wrote:
> > Also attached is a patch to 'lcoco.c' to add x86_64 assembly coroutine
> > support to LuaCoco.  It saves 9 64bit registers (It might be possible to
> > lower that count, since the assembly code is inlined and the parent
> > function might not use all those registers).
>
> You can't avoid to save all callee-saves, since the grandparent
> may use them. And there is an inconsistency: +D forces rdi, but
> then you save it. The x64 ABI doesn't require rdi to be saved.
Correct, saving rdi wasn't needed and setting it each context switch is a 
waste too.  Since we only need to set it once when the coroutine first starts 
running on the new stack I have come-up with a better method.

Attached is a new patch.  This version only saves/restores 8 registers each 
context switch.  In order to pass the "lua_State *L" parameter 
to 'coco_main', a wrapper function written in assembly is used to pre-fill 
rdi before jumping to 'coco_main'.

> I've got a comprehensive collection of Lua benchmarks and have
> considered packaging them up, but I've never gotten around to it.
> Anyway, I proactively reserve the name LuaBench for it. :-)
Fine by me, just don't take to long to release it. ;-)


-- 
Robert G. Jakabosky
--- LuaJIT-1.1.4/src/lcoco.c	2008-02-05 08:00:00.000000000 -0800
+++ llvm-lua/src/lcoco.c	2008-08-30 00:31:39.000000000 -0700
@@ -134,6 +134,50 @@
   coco->arg0 = (size_t)(a0);
 #define COCO_STATE_HEAD		size_t arg0;
 
+#elif defined(__x86_64__)
+
+/* wrap function to set register %rdi == lua_State */
+void coco_wrap_main();
+__asm__ (
+"\t.text\n"
+".local coco_wrap_main\n"
+"\t.type coco_wrap_main, @function\n"
+"coco_wrap_main:\n"
+"\n"
+"\tmovq %r12, %rax\n"
+"\tmovq %r13, %rdi\n"
+"\tjmpq *%rax\n"
+);
+
+typedef void *coco_ctx[8];  /* rip, rsp, rbp, rbx, r12, r13, r14, r15 */
+static inline void coco_switch(coco_ctx from, coco_ctx to)
+{
+  __asm__ __volatile__ (
+    "leaq 1f(%%rip), %%rax\n\t"
+    "movq %%rax, (%0)\n\t" "movq %%rsp, 8(%0)\n\t" "movq %%rbp, 16(%0)\n\t"
+		"movq %%rbx, 24(%0)\n\t" "movq %%r12, 32(%0)\n\t" "movq %%r13, 40(%0)\n\t"
+		"movq %%r14, 48(%0)\n\t" "movq %%r15, 56(%0)\n\t"
+    "movq 56(%1), %%r15\n\t" "movq 48(%1), %%r14\n\t" "movq 40(%1), %%r13\n\t"
+		"movq 32(%1), %%r12\n\t" "movq 24(%1), %%rbx\n\t" "movq 16(%1), %%rbp\n\t"
+		"movq 8(%1), %%rsp\n\t"
+		"jmpq *(%1)\n"
+		"1:\n"
+    : "+S" (from), "+D" (to) : : "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", "cc");
+}
+
+#define COCO_CTX		coco_ctx
+#define COCO_SWITCH(from, to)	coco_switch(from, to);
+#define COCO_MAKECTX(coco, buf, func, stack, a0) \
+  buf[0] = (void *)(coco_wrap_main); /* rip == wrap function */ \
+  buf[1] = (void *)(stack); /* rsp == top of stack */ \
+  buf[2] = (void *)0; \
+  buf[3] = (void *)0; \
+  buf[4] = (void *)(func); /* r12 == coco_main function */ \
+  buf[5] = (void *)(a0); /* r13 == lua_State */ \
+  buf[6] = (void *)0; \
+  buf[7] = (void *)0; \
+  stack[0] = 0xdeadc0c0deadc0c0;  /* Dummy return address. */ \
+
 #elif __mips && _MIPS_SIM == _MIPS_SIM_ABI32 && !defined(__mips_eabi)
 
 /* No way to avoid the function prologue with inline assembler. So use this: */