lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Updated patch attached 'emergency_gc-5.1.3.patch'.  I fixed a few bug with 
doing a step collection from the allocator instead of a full collection. 

Also the attached 'lua_memlimit.c' program has support for full collection or 
step collection from the allocator and a '-m' option for settings the 
memlimit from the commandline instead of the hard coded 64k limit.  The 
default is still 64k.  The step collection mode will keep calling lua_gc() 
until the required amount of free space has been freed or until the collector 
finishes two full collection cycles.

On Wednesday 07, Bogdan Marinescu wrote:
> ...and the latest results (taken from Lua with the latest patch) are
> attached. More tests this time (but some of them are probably too
> simple to be relevant, like the Fibonacci implementations). I'm going
> to remove the 'fixed' allocator from the tests (as it's clearly
> inferior to the seggregated allocators), so the final battle shall
> happen between dl and tlsf (they seem to be quite similar in
> performance). And I also have to do some actual memory limiting tests,
> all I do now is give the script an amount of memory and test if it
> runs in that.
Why is the Min.size of 'f' mode almost twice the size of 'a' mode for the 
life.lua script?  Is the allocators memory block becoming fragmented?

I did some simple tests with the life.lua script using lua_memlimit.c to 
compare full gc vs. step gc.

Always Full collection:
max_memused=128000
peak_memused=60518
time=153.72 secs

Full collection at memlimit:
max_memused=65536
peak_memused=65533
time=12.75 secs

Always Step collection with step size multipler 1:
max_memused=128000
peak_memused=67353
time=15.34 secs

Always Step collection with step size multipler 2:
max_memused=128000
peak_memused=64682
time=17.88 secs

Step collection at memlimit:
max_memused=65536
peak_memused=65535
time=10.55 secs

Step collection at memlimit.
max_memused=62000
peak_memused=61999
time=27.52 secs

It looks like a step collection is much better then a full collection.

-- 
Robert G. Jakabosky
diff --git a/src/lapi.c b/src/lapi.c
index 9a39513..24e6675 100644
--- a/src/lapi.c
+++ b/src/lapi.c
@@ -656,14 +656,14 @@ LUA_API void lua_settable (lua_State *L, int idx) {
 
 LUA_API void lua_setfield (lua_State *L, int idx, const char *k) {
   StkId t;
-  TValue key;
   lua_lock(L);
   api_checknelems(L, 1);
   t = index2adr(L, idx);
   api_checkvalidindex(L, t);
-  setsvalue(L, &key, luaS_new(L, k));
-  luaV_settable(L, t, &key, L->top - 1);
-  L->top--;  /* pop value */
+  setsvalue2s(L, L->top, luaS_new(L, k));
+  api_incr_top(L);
+  luaV_settable(L, t, L->top - 1, L->top - 2);
+  L->top -= 2;  /* pop key and value */
   lua_unlock(L);
 }
 
@@ -903,11 +903,11 @@ LUA_API int lua_gc (lua_State *L, int what, int data) {
   g = G(L);
   switch (what) {
     case LUA_GCSTOP: {
-      g->GCthreshold = MAX_LUMEM;
+      set_block_gc(L);
       break;
     }
     case LUA_GCRESTART: {
-      g->GCthreshold = g->totalbytes;
+      unset_block_gc(L);
       break;
     }
     case LUA_GCCOLLECT: {
@@ -924,6 +924,7 @@ LUA_API int lua_gc (lua_State *L, int what, int data) {
       break;
     }
     case LUA_GCSTEP: {
+      if(is_block_gc(L)) break;
       lu_mem a = (cast(lu_mem, data) << 10);
       if (a <= g->totalbytes)
         g->GCthreshold = g->totalbytes - a;
diff --git a/src/ldo.c b/src/ldo.c
index 8de05f7..4c32b93 100644
--- a/src/ldo.c
+++ b/src/ldo.c
@@ -494,6 +494,7 @@ static void f_parser (lua_State *L, void *ud) {
   struct SParser *p = cast(struct SParser *, ud);
   int c = luaZ_lookahead(p->z);
   luaC_checkGC(L);
+  set_block_gc(L);  /* stop collector during parsing */
   tf = ((c == LUA_SIGNATURE[0]) ? luaU_undump : luaY_parser)(L, p->z,
                                                              &p->buff, p->name);
   cl = luaF_newLclosure(L, tf->nups, hvalue(gt(L)));
@@ -502,6 +503,7 @@ static void f_parser (lua_State *L, void *ud) {
     cl->l.upvals[i] = luaF_newupval(L);
   setclvalue(L, L->top, cl);
   incr_top(L);
+  unset_block_gc(L);
 }
 
 
diff --git a/src/lfunc.c b/src/lfunc.c
index 813e88f..d2ce63d 100644
--- a/src/lfunc.c
+++ b/src/lfunc.c
@@ -66,7 +66,6 @@ UpVal *luaF_findupval (lua_State *L, StkId level) {
   }
   uv = luaM_new(L, UpVal);  /* not found: create a new one */
   uv->tt = LUA_TUPVAL;
-  uv->marked = luaC_white(g);
   uv->v = level;  /* current value lives in the stack */
   uv->next = *pp;  /* chain it in the proper position */
   *pp = obj2gco(uv);
@@ -74,6 +73,7 @@ UpVal *luaF_findupval (lua_State *L, StkId level) {
   uv->u.l.next = g->uvhead.u.l.next;
   uv->u.l.next->u.l.prev = uv;
   g->uvhead.u.l.next = uv;
+  luaC_marknew(L, obj2gco(uv));
   lua_assert(uv->u.l.next->u.l.prev == uv && uv->u.l.prev->u.l.next == uv);
   return uv;
 }
diff --git a/src/lgc.c b/src/lgc.c
index d9e0b78..cd72428 100644
--- a/src/lgc.c
+++ b/src/lgc.c
@@ -232,8 +232,10 @@ static void traverseclosure (global_State *g, Closure *cl) {
     int i;
     lua_assert(cl->l.nupvalues == cl->l.p->nups);
     markobject(g, cl->l.p);
-    for (i=0; i<cl->l.nupvalues; i++)  /* mark its upvalues */
-      markobject(g, cl->l.upvals[i]);
+    for (i=0; i<cl->l.nupvalues; i++) { /* mark its upvalues */
+      if(cl->l.upvals[i])
+        markobject(g, cl->l.upvals[i]);
+    }
   }
 }
 
@@ -258,6 +260,7 @@ static void traversestack (global_State *g, lua_State *l) {
   CallInfo *ci;
   markvalue(g, gt(l));
   lim = l->top;
+  if(l->stack == NULL) return; /* no stack to traverse */
   for (ci = l->base_ci; ci <= l->ci; ci++) {
     lua_assert(ci->top <= l->stack_last);
     if (lim < ci->top) lim = ci->top;
@@ -419,8 +422,6 @@ static GCObject **sweeplist (lua_State *L, GCObject **p, lu_mem count) {
     else {  /* must erase `curr' */
       lua_assert(isdead(g, curr) || deadmask == bitmask(SFIXEDBIT));
       *p = curr->gch.next;
-      if (curr == g->rootgc)  /* is the first element of the list? */
-        g->rootgc = curr->gch.next;  /* adjust first */
       freeobj(L, curr);
     }
   }
@@ -437,7 +438,10 @@ static void checkSizes (lua_State *L) {
   /* check size of buffer */
   if (luaZ_sizebuffer(&g->buff) > LUA_MINBUFFER*2) {  /* buffer too big? */
     size_t newsize = luaZ_sizebuffer(&g->buff) / 2;
-    luaZ_resizebuffer(L, &g->buff, newsize);
+    /* make sure newsize is larger then the buffer's in use size. */
+    newsize = (luaZ_bufflen(&g->buff) > newsize) ? luaZ_bufflen(&g->buff) : newsize;
+    if(newsize < luaZ_sizebuffer(&g->buff))
+      luaZ_resizebuffer(L, &g->buff, newsize);
   }
 }
 
@@ -609,10 +613,14 @@ static l_mem singlestep (lua_State *L) {
 
 void luaC_step (lua_State *L) {
   global_State *g = G(L);
+  if(is_block_gc(L)) return;
+  set_block_gc(L);
   l_mem lim = (GCSTEPSIZE/100) * g->gcstepmul;
   if (lim == 0)
     lim = (MAX_LUMEM-1)/2;  /* no limit */
   g->gcdept += g->totalbytes - g->GCthreshold;
+  if(g->estimate > g->totalbytes)
+    g->estimate = g->totalbytes;
   do {
     lim -= singlestep(L);
     if (g->gcstate == GCSpause)
@@ -630,11 +638,14 @@ void luaC_step (lua_State *L) {
     lua_assert(g->totalbytes >= g->estimate);
     setthreshold(g);
   }
+  unset_block_gc(L);
 }
 
 
 void luaC_fullgc (lua_State *L) {
   global_State *g = G(L);
+  if(is_block_gc(L)) return;
+  set_block_gc(L);
   if (g->gcstate <= GCSpropagate) {
     /* reset sweep marks to sweep all elements (returning them to white) */
     g->sweepstrgc = 0;
@@ -656,6 +667,7 @@ void luaC_fullgc (lua_State *L) {
     singlestep(L);
   }
   setthreshold(g);
+  unset_block_gc(L);
 }
 
 
@@ -683,6 +695,14 @@ void luaC_barrierback (lua_State *L, Table *t) {
 }
 
 
+void luaC_marknew (lua_State *L, GCObject *o) {
+  global_State *g = G(L);
+  o->gch.marked = luaC_white(g);
+  if (g->gcstate == GCSpropagate)
+    reallymarkobject(g, o);  /* mark new objects as gray during propagate state. */
+}
+
+
 void luaC_link (lua_State *L, GCObject *o, lu_byte tt) {
   global_State *g = G(L);
   o->gch.next = g->rootgc;
diff --git a/src/lgc.h b/src/lgc.h
index 5a8dc60..a623703 100644
--- a/src/lgc.h
+++ b/src/lgc.h
@@ -37,6 +37,18 @@
 #define test2bits(x,b1,b2)	testbits(x, (bit2mask(b1, b2)))
 
 
+/*
+** Possible Garbage Collector flags.
+** Layout for bit use in 'gsflags' field in global_State structure.
+** bit 0 - Protect GC from recursive calls.
+*/
+#define GCFlagsNone		0
+#define GCBlockGCBit	0
+
+
+#define is_block_gc(L) testbit(G(L)->gcflags, GCBlockGCBit)
+#define set_block_gc(L) l_setbit(G(L)->gcflags, GCBlockGCBit)
+#define unset_block_gc(L) resetbit(G(L)->gcflags, GCBlockGCBit)
 
 /*
 ** Layout for bit use in `marked' field:
@@ -101,6 +113,7 @@ LUAI_FUNC void luaC_callGCTM (lua_State *L);
 LUAI_FUNC void luaC_freeall (lua_State *L);
 LUAI_FUNC void luaC_step (lua_State *L);
 LUAI_FUNC void luaC_fullgc (lua_State *L);
+LUAI_FUNC void luaC_marknew (lua_State *L, GCObject *o);
 LUAI_FUNC void luaC_link (lua_State *L, GCObject *o, lu_byte tt);
 LUAI_FUNC void luaC_linkupval (lua_State *L, UpVal *uv);
 LUAI_FUNC void luaC_barrierf (lua_State *L, GCObject *o, GCObject *v);
diff --git a/src/lstate.c b/src/lstate.c
index 4313b83..16c2e85 100644
--- a/src/lstate.c
+++ b/src/lstate.c
@@ -119,6 +119,8 @@ static void close_state (lua_State *L) {
 lua_State *luaE_newthread (lua_State *L) {
   lua_State *L1 = tostate(luaM_malloc(L, state_size(lua_State)));
   luaC_link(L, obj2gco(L1), LUA_TTHREAD);
+  setthvalue(L, L->top, L1); /* put thread on stack */
+  incr_top(L);
   preinit_state(L1, G(L));
   stack_init(L1, L);  /* init stack */
   setobj2n(L, gt(L1), gt(L));  /* share table of globals */
@@ -126,7 +128,8 @@ lua_State *luaE_newthread (lua_State *L) {
   L1->basehookcount = L->basehookcount;
   L1->hook = L->hook;
   resethookcount(L1);
-  lua_assert(iswhite(obj2gco(L1)));
+  lua_assert(!isdead(G(L), obj2gco(L1)));
+  L->top--; /* remove thread from stack */
   return L1;
 }
 
@@ -167,6 +170,7 @@ LUA_API lua_State *lua_newstate (lua_Alloc f, void *ud) {
   luaZ_initbuffer(L, &g->buff);
   g->panic = NULL;
   g->gcstate = GCSpause;
+  g->gcflags = GCFlagsNone;
   g->rootgc = obj2gco(L);
   g->sweepstrgc = 0;
   g->sweepgc = &g->rootgc;
diff --git a/src/lstate.h b/src/lstate.h
index 3bc575b..9de3195 100644
--- a/src/lstate.h
+++ b/src/lstate.h
@@ -71,6 +71,7 @@ typedef struct global_State {
   void *ud;         /* auxiliary data to `frealloc' */
   lu_byte currentwhite;
   lu_byte gcstate;  /* state of garbage collector */
+  lu_byte gcflags;  /* flags for the garbage collector */
   int sweepstrgc;  /* position of sweep in `strt' */
   GCObject *rootgc;  /* list of all collectable objects */
   GCObject **sweepgc;  /* position of sweep in `rootgc' */
diff --git a/src/lstring.c b/src/lstring.c
index 4911315..a84cfab 100644
--- a/src/lstring.c
+++ b/src/lstring.c
@@ -53,6 +53,9 @@ static TString *newlstr (lua_State *L, const char *str, size_t l,
   stringtable *tb;
   if (l+1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
     luaM_toobig(L);
+  tb = &G(L)->strt;
+  if ((tb->nuse + 1) > cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
+    luaS_resize(L, tb->size*2);  /* too crowded */
   ts = cast(TString *, luaM_malloc(L, (l+1)*sizeof(char)+sizeof(TString)));
   ts->tsv.len = l;
   ts->tsv.hash = h;
@@ -61,13 +64,10 @@ static TString *newlstr (lua_State *L, const char *str, size_t l,
   ts->tsv.reserved = 0;
   memcpy(ts+1, str, l*sizeof(char));
   ((char *)(ts+1))[l] = '\0';  /* ending 0 */
-  tb = &G(L)->strt;
   h = lmod(h, tb->size);
   ts->tsv.next = tb->hash[h];  /* chain new entry */
   tb->hash[h] = obj2gco(ts);
   tb->nuse++;
-  if (tb->nuse > cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
-    luaS_resize(L, tb->size*2);  /* too crowded */
   return ts;
 }
 
diff --git a/src/ltable.c b/src/ltable.c
index ec84f4f..31162fe 100644
--- a/src/ltable.c
+++ b/src/ltable.c
@@ -358,6 +358,8 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) {
 Table *luaH_new (lua_State *L, int narray, int nhash) {
   Table *t = luaM_new(L, Table);
   luaC_link(L, obj2gco(t), LUA_TTABLE);
+  sethvalue2s(L, L->top, t); /* put table on stack */
+  incr_top(L);
   t->metatable = NULL;
   t->flags = cast_byte(~0);
   /* temporary values (kept only if some malloc fails) */
@@ -367,6 +369,7 @@ Table *luaH_new (lua_State *L, int narray, int nhash) {
   t->node = cast(Node *, dummynode);
   setarrayvector(L, t, narray);
   setnodevector(L, t, nhash);
+  L->top--; /* remove table from stack */
   return t;
 }
 
diff --git a/src/lvm.c b/src/lvm.c
index ee3256a..8b5085b 100644
--- a/src/lvm.c
+++ b/src/lvm.c
@@ -295,6 +295,7 @@ void luaV_concat (lua_State *L, int total, int last) {
         if (l >= MAX_SIZET - tl) luaG_runerror(L, "string length overflow");
         tl += l;
       }
+      G(L)->buff.n = tl;
       buffer = luaZ_openspace(L, &G(L)->buff, tl);
       tl = 0;
       for (i=n; i>0; i--) {  /* concat all strings */
@@ -303,6 +304,7 @@ void luaV_concat (lua_State *L, int total, int last) {
         tl += l;
       }
       setsvalue2s(L, top-n, luaS_newlstr(L, buffer, tl));
+      luaZ_resetbuffer(&G(L)->buff);
     }
     total -= n-1;  /* got `n' strings to create 1 new */
     last -= n-1;
@@ -723,6 +725,7 @@ void luaV_execute (lua_State *L, int nexeccalls) {
         p = cl->p->p[GETARG_Bx(i)];
         nup = p->nups;
         ncl = luaF_newLclosure(L, nup, cl->env);
+        setclvalue(L, ra, ncl);
         ncl->l.p = p;
         for (j=0; j<nup; j++, pc++) {
           if (GET_OPCODE(*pc) == OP_GETUPVAL)
@@ -732,7 +735,6 @@ void luaV_execute (lua_State *L, int nexeccalls) {
             ncl->l.upvals[j] = luaF_findupval(L, base + GETARG_B(*pc));
           }
         }
-        setclvalue(L, ra, ncl);
         Protect(luaC_checkGC(L));
         continue;
       }
/*
 * Run Lua scripts with a memory limit.
 *
 * compile:
 * gcc -Wall -llua -o lua_memlimit lua_memlimit.c
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include <lua.h>
#include <lauxlib.h>
#include <lualib.h>

/*
 * DEBUG_LEVEL -
 * 1  - non-debug GC allocator, minimal debug messages & no checks.  See function 'script_alloc'
 *      it is a very basic memlimit + GC allocator.
 * 5  - debug GC allocator, with some checks enabled.
 * 10 - enables some more debug messages.
 * 20 - enables block size counter & block header.  Extra leak checking + block size validator.
 * 40 - enables a lot of debug messages.
 * 50 - enables all debug messages.
 *
 * ZERO_FREE_MEM -
 * 1 - fills all freed & allocated memory with zeros.
 *
 * ALWAYS_GC -
 * 1 - Do a full garbage collect on all allocations (where nsize > 0)
 * 0 - Only run GC when script hits it's memlimit.
 *
 * ENABLE_GC -
 * 1 - Enables full garbage collection when max memory limit is hit.
 * 0 - No GC just returns NULL when max memory limit is hit.
 *
 * ALLOW_TEMP_ALLOCS -
 * 1 - Allow the script to temporary allocate more memory then it's limit during a full GC.
 * 0 - Use hardlimit and return NULL if script tries to pass it's limit during a full GC.
 *
 * FULL_GC -
 * 1 - Run full collection cycle.
 * 0 - Run small step collection.
 *
 * GC_STEP_SIZE_MUL -
 * Step size = "need size in kilobytes" * GC_STEP_SIZE_MUL
 */
#define DEBUG_LEVEL 5
#define ZERO_FREE_MEM 0
#define ALWAYS_GC 0
#define ENABLE_GC 1
#define ALLOW_TEMP_ALLOCS 0
#define FULL_GC 0
#define GC_STEP_SIZE_MUL 1

#define MAX_BLOCK_SIZE 128 * 1024
#if DEBUG_LEVEL >= 20
#define BLOCK_HEADER	(sizeof(size_t)*2 + 16)
#else
#define BLOCK_HEADER	(0)
#endif
typedef struct {
	char			*name;
	lua_State *L;
	size_t		memused;
	size_t		peak_memused;
	size_t		max_memused;
	int				allow_tmp_allocs;
#if DEBUG_LEVEL >= 20
	void			*gc_ptr;
	int				block_count[MAX_BLOCK_SIZE + 1];
	size_t		max_blocksize;
#endif
} script_info_t;

#if DEBUG_LEVEL >= 20
static void script_check_blocks(script_info_t *info)
{
	int i = 0;
	int c = 0;
	size_t tot = 0;
	size_t new_max_blocksize=0;

	for(i = 1; i < MAX_BLOCK_SIZE && i <= info->max_blocksize; i++) {
		c = info->block_count[i];
		if(c > 0) {
#if DEBUG_LEVEL >= 50
			printf("block size=%d, count=%d\n", i, c);
#endif
			tot += i * c;
			new_max_blocksize=i;
		}
	}
	c = info->block_count[MAX_BLOCK_SIZE];
	if(c > 0) {
#if DEBUG_LEVEL >= 50
		printf("block size >= %d, count=%d\n", MAX_BLOCK_SIZE, c);
#endif
		tot += c;
		new_max_blocksize=MAX_BLOCK_SIZE;
	}
	info->max_blocksize=new_max_blocksize;
#if DEBUG_LEVEL >= 40
	printf("mem_total=%zd, max_blocksize=%zd\n", tot, info->max_blocksize);
#endif
	assert(tot == info->memused);
}

static void script_update_block_counts(script_info_t *info, size_t osize, size_t nsize)
{
	if(osize > 0) {
		if(osize < MAX_BLOCK_SIZE) {
			assert(--(info->block_count[osize]) >= 0);
		} else {
			info->block_count[MAX_BLOCK_SIZE] -= osize;
			assert(info->block_count[MAX_BLOCK_SIZE] >= 0);
		}
	}
	if(nsize > 0) {
		if(nsize > info->max_blocksize) info->max_blocksize = nsize;
		if(nsize < MAX_BLOCK_SIZE) {
			assert(++(info->block_count[nsize]) >= 0);
		} else {
			info->block_count[MAX_BLOCK_SIZE] += nsize;
			assert(info->block_count[MAX_BLOCK_SIZE] >= 0);
		}
	}
}
#endif

static void script_run_gc(script_info_t *info, size_t need)
{
#if FULL_GC
	(void)need;
	lua_gc(info->L, LUA_GCCOLLECT, 0);
#else
	size_t step_size = ((need >> 10) + 1) * GC_STEP_SIZE_MUL;
	size_t old_memused = info->memused;
	int cycle_count = 0;
	do {
		if(lua_gc(info->L, LUA_GCSTEP, step_size)) {
			/* only allow completing the last cycle and starting a new cycle. */
			if((++cycle_count) > 1) break;
		}
	} while((info->memused + need) >= info->max_memused);
	//printf("freed=%zd, need=%d\n", (old_memused - info->memused),need);
#endif
}

static void *script_debug_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
{
	script_info_t *info=(script_info_t *)ud;
	size_t old_size = info->memused;
	int run_gc = 0;
#if DEBUG_LEVEL >= 20
	size_t bsize = -1;
#endif

#if DEBUG_LEVEL >= 40
	if(osize > info->memused) {
		printf("osize(%zd) > memused(%zd)\n", osize, info->memused);
	}
#endif
	assert(osize <= info->memused);

#if DEBUG_LEVEL >= 20
	if(ptr) {
		ptr -= BLOCK_HEADER;
		bsize = *((size_t *)ptr);
		assert(bsize == osize);
		assert(info->gc_ptr != ptr); /* check for second alloc call on same ptr. */
	}
#endif
	info->memused -= osize;
	if (nsize == 0) {
#if DEBUG_LEVEL >= 40
		if(ptr) printf("free 1 (%p), osize=%zd, bsize=%zd\n", ptr, osize, bsize);
#endif
#if DEBUG_LEVEL >= 20
		script_update_block_counts(info, osize, 0);
		script_check_blocks(info);
#endif
#if ZERO_FREE_MEM
		if(osize > 0) {
#if DEBUG_LEVEL >= 45
			printf("1 memset(%p,0,%zd)\n", ptr, osize + BLOCK_HEADER);
#endif
			memset(ptr, 0, osize + BLOCK_HEADER);
		}
#endif
		free(ptr);
		return NULL;
	}
	info->memused += nsize;
	if(nsize > osize && info->memused >= info->max_memused) {
#if ENABLE_GC
		run_gc = 1;
#if ALWAYS_GC
	} else if(info->L != NULL) {
		run_gc = 1;
#endif
	}
	if(run_gc && info->allow_tmp_allocs == 0) {
#if DEBUG_LEVEL >= 40
		printf("LOW MEM: 1 osize=%zd, nsize=%zd, used=%zu, peak=%zu, need=%zd\n", osize, nsize,
			info->memused, info->peak_memused, (info->memused - info->max_memused));
#endif
		info->memused = old_size;
#if DEBUG_LEVEL >= 20
		info->gc_ptr = ptr;
#endif
		/* try to free memory by collecting garbage. */
		info->allow_tmp_allocs = ALLOW_TEMP_ALLOCS;
		script_run_gc(info, (nsize > osize)?(nsize - osize):0);
		info->allow_tmp_allocs = 0;
#if DEBUG_LEVEL >= 20
		info->gc_ptr = NULL;
#endif
#if DEBUG_LEVEL >= 40
		printf("LOW MEM: 2 used=%zu, peak=%zu\n", info->memused, info->peak_memused);
#endif
		/* check memory usage again. */
		old_size = info->memused;
		info->memused -= osize;
		info->memused += nsize;
		if(info->memused >= info->max_memused) {
			info->memused = old_size;
			printf("OUT OF MEMORY: memused=%zd, osize=%zd, nsize=%zd\n", info->memused, osize, nsize);
			return NULL;
		}
#else
		info->memused = old_size;
		return NULL;
#endif
	}
	if(info->memused > info->peak_memused) info->peak_memused = info->memused;
#if ZERO_FREE_MEM
	if(osize > nsize) {
#if DEBUG_LEVEL >= 45
		printf("2 memset(%p,0,%zd)\n", ptr + nsize + BLOCK_HEADER, osize - nsize);
#endif
		memset(ptr + nsize + BLOCK_HEADER, 0, osize - nsize);
	}
#endif
#if DEBUG_LEVEL >= 40
	if(ptr) printf("free 2 (%p), osize=%zd, bsize=%zd\n", ptr, osize, bsize);
#endif
#if DEBUG_LEVEL >= 20
	script_update_block_counts(info, osize, nsize);
	script_check_blocks(info);
#endif
	ptr = realloc(ptr, nsize + BLOCK_HEADER);
#if ZERO_FREE_MEM
	if(osize < nsize) {
#if DEBUG_LEVEL >= 45
		printf("3 memset(%p,0,%zd)\n", ptr + osize + BLOCK_HEADER, nsize - osize);
#endif
		memset(ptr + osize + BLOCK_HEADER, 0, nsize - osize);
	}
#endif
#if DEBUG_LEVEL >= 40
	if(ptr) printf("alloc  (%p), nsize=%zd\n", ptr, nsize);
#endif
#if DEBUG_LEVEL >= 20
	*((size_t *)ptr) = nsize;
	ptr += BLOCK_HEADER;
#endif
	return ptr;
}

static void *script_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
{
	script_info_t *info=(script_info_t *)ud;
	size_t old_size = info->memused;

	info->memused -= osize;
	if (nsize == 0) {
		free(ptr);
		return NULL;
	}
	info->memused += nsize;
	if(nsize > osize && info->memused >= info->max_memused && info->allow_tmp_allocs == 0) {
#if ENABLE_GC
		info->memused = old_size;
		/* try to free memory by collecting garbage. */
		info->allow_tmp_allocs = ALLOW_TEMP_ALLOCS;
		script_run_gc(info, (nsize > osize)?(nsize - osize):0);
		info->allow_tmp_allocs = 0;
		/* check memory usage again. */
		old_size = info->memused;
		info->memused -= osize;
		info->memused += nsize;
		if(info->memused >= info->max_memused) {
			info->memused = old_size;
#if DEBUG_LEVEL >= 1
			printf("OUT OF MEMORY: memused=%zd, osize=%zd, nsize=%zd\n", info->memused, osize, nsize);
#endif
			return NULL;
		}
#else
		info->memused = old_size;
		return NULL;
#endif
	}
	if(info->memused > info->peak_memused) info->peak_memused = info->memused;
	return realloc(ptr, nsize);
}

static int print_memstats(lua_State *L)
{
	script_info_t *info;

	lua_getallocf(L, (void *)(&info));

#if DEBUG_LEVEL >= 10
	printf("%s: before GC memused=%zd, peak_memused=%zd\n", info->name,
		info->memused, info->peak_memused);
	script_run_gc(info, info->memused);
	printf("%s: after GC  memused=%zd, peak_memused=%zd\n", info->name,
		info->memused, info->peak_memused);
#else
	printf("%s: memused=%zd, peak_memused=%zd\n", info->name,
		info->memused, info->peak_memused);
#endif
	return 0;
}

static int array_fill(lua_State *L)
{
	size_t l;
	const char *s = luaL_checklstring(L, 1, &l);
	int n = luaL_checkint(L, 2);
	int i;
	lua_createtable(L, n, 0);
	for(i=0; i < n; i++) {
		lua_pushstring(L, s);
		lua_rawseti(L, -2, i);
	}
	return 1;
}

static lua_State *create_newstate(script_info_t *info)
{
	lua_State *L;

#if DEBUG_LEVEL >= 5
	L = lua_newstate(script_debug_alloc, info);
#else
	L = lua_newstate(script_alloc, info);
#endif
	if(L == NULL) return L;
	/* load libs */
	info->L = L;
	luaL_openlibs(L);
	lua_register(L,"print_memstats", print_memstats);
	lua_register(L,"array_fill", array_fill);
	return L;
}

static int run_script(char *script_name, int mem_limit)
{
	script_info_t *info;
	lua_State *L;
	int status;

	/* run script */
	info = (script_info_t *)calloc(1, sizeof(script_info_t));
	info->max_memused = mem_limit;
	info->allow_tmp_allocs = 0;
	info->name = script_name;
	info->memused = 0;
	info->peak_memused = 0;
	/* create lua state & load script file. */
	L=create_newstate(info);
	status = luaL_loadfile(L, info->name);
	if(status != 0) {
		fprintf(stderr,"Failed to load script: %s\n",lua_tostring(L,-1));
		lua_close(L);
		free(info);
		return -1;
	}
	/* execute script. */
	status = lua_pcall(L, 0, LUA_MULTRET, 0);
	if(status != 0) {
		fprintf(stderr,"%s: %s\n", script_name,lua_tostring(L,-1));
	}
	lua_close(L);
	printf("%s: memused=%zd, peak_memused=%zd\n", info->name,
		info->memused, info->peak_memused);
	/* check for memory leak. */
	assert(info->memused == 0);
	free(info);
	return 0;
}

int main(int argc, char *argv[])
{
	int mem_limit = 64 * 1024;
	int rc = 0;
	int i = 0;

	if(argc < 2) {
		printf("usage: %s <lua script 1> [<lua script 2>]\n", argv[0]);
		exit(1);
	}

	/* run scripts. */
	for(i=1; i < argc && rc == 0; i++) {
		if(argv[i][0] == '-') {
			switch(argv[i][1]) {
			case 'm':
				i++;
				if(i < argc) {
					mem_limit = atoi(argv[i]);
					printf("new mem_limit = %d\n", mem_limit);
				}
				break;
			default:
				printf("unkown option '%s'\n", argv[i]);
				break;
			}
			continue;
		}
		rc = run_script(argv[i], mem_limit);
	}

	return 0;
}