lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]

On Sat, Oct 31, 2009 at 08:03:14PM +0100, Edgar Toernig wrote:
> and if profiling shows that it's too slow or generates too much
> garbage (the {...} table), write a C function that puts the
> binary representation into a string:
>     int bundle(lua_State *L)
>     {
>         lua_Number buf[32]; // assume no padding
>         int i, n;
>         n = lua_gettop(L);
>         if (n > 32)
>             luaL_error(L, "sorry pal, too many arguments");
>         for (i = 0; i < n; ++i)
>             buf[i] = luaL_checknumber(L, i+1);
>         lua_pushlstring(L, (char*)buf, n * sizeof(*buf));
>         return 1;
>     }

Nice idea. The arguments aren't restricted to numbers, but I could use
this technique to encode whatever the arguments are:
    switch lua_type(L, i) {
        case LUA_TNUMBER:
            // write LUA_TNUMBER, then the number, to buf
        case LUA_TTABLE:
            // write LUA_TTABLE, then lua_topointer(L, i), to buf
        // etc

If this needs to be fully general, what should I do for strings? I'd
have to stuff the whole string into buf, wouldn't I? Perhaps one word to
indicate that it's a string, and one word to indicate the length
(I guess these could be combined, if I store the length + an offset
higher than any LUA_TYPE), followed by the whole string. (I'm sure this
would be reinventing the wheel...what do people standardly use for
compact, needn't-be-human-readable serialization?)

Now, I did say in my original email let's not worry for now about weak
keys. However, I do in fact want to have my cache tables weak-keyed too.
And this strategy wouldn't work in that case.

Here's what I have come up with. It involves one userdatum + one table
per self-deleting table, plus two user-datums and four table rows per
key,value pair in a self-deleting table. Not horrible; but that is some
real overhead. Not sure if it's worth it.

Here's the code. The self-deleting table is implemented as a userdatum
with __index and __newindex methods. It will start off with size 0, but
won't delete itself until its size is incremented and then decreases
again to zero. This version has the self-deleting table fully weak (both
with keys and values); and non-nil entries can't be overwritten; and
when its size gets to zero, it deletes itself from a parent table
specified when it was created. This can all be changed.

Another way to do the deletion would be to have the parent table be
weak-valued, but have a strong ref stored elsewhere to the self-deleting
userdatum. When the userdatum's size drops to zero, it deletes that
strong ref and then just lets itself be dropped from the parent table
and gc'd normally.

local pairs, collectgarbage, setmetatable, getmetatable, assert, newproxy = pairs, collectgarbage, setmetatable, getmetatable, assert, newproxy
require "debug";  local getfenv, setfenv = debug.getfenv, debug.setfenv
local print = print
local format, tostring = string.format, tostring

local wk_mt = {__mode = 'k'}
local function weakkey() return setmetatable({}, wk_mt) end

local selfdel_mappings = setmetatable({}, {__mode = 'kv'})
local selfdel_values = setmetatable({}, {__mode = 'v'})

local baseuv = newproxy(true)
local uv_mt = getmetatable(baseuv)
function uv_mt.__gc(self)
	if self ~= baseuv then
		local k = selfdel_mappings[self]
		local env = getfenv(self)
		if k == nil then
			-- value is being gc'd before key
			local uk = env[k]
			assert(uk ~= nil and selfdel_mappings[uk] == nil)
			env[k] = nil
		-- we only decrement size in the uv.__gc, in case both uk and uv are gc'd in the same collection cycle 
		local parent = env[PARENTKEY]
		assert(parent[env[KEYKEY]] ~= nil)
		local size = env[SIZEKEY]
		if size == 1 then
			parent[env[KEYKEY]] = nil
			env[SIZEKEY] = size - 1
-- reuse baseuv as a sentinel for the env table
SIZEKEY = baseuv

local baseuk = newproxy(true)
local uk_mt = getmetatable(baseuk)
function uk_mt.__gc(self)
	if self ~= baseuk then
		local uv = selfdel_mappings[self]
		assert(selfdel_mappings[uv] == nil)
		local env = getfenv(self)
		if uv == nil then
			-- this key,value already deleted
			-- parent[key] may still exist, if it has other elements
			assert(selfdel_values[uv] ~= nil)
			selfdel_values[uv] = nil
-- reuse baseuk as a sentinel for the env table
KEYKEY = baseuk

local baseu = newproxy(true)
local u_mt = getmetatable(baseu)
function u_mt.__index(self, k)
	local env = getfenv(self)
	local uk = env[k]
	local uv = selfdel_mappings[uk]
	local v = selfdel_values[uv]
	return v
-- reuse baseu as a sentinel for the env table

function u_mt.__newindex(self, k, v)
	local env = getfenv(self)
	assert(env, "already deleted")
	assert(env[k] == nil, "can't modify existing entry")
	if v ~= nil then
		env[SIZEKEY] = env[SIZEKEY] + 1
		local uk = newproxy(baseuk)
		setfenv(uk, env)
		env[k] = uk
		local uv = newproxy(baseuv)
		setfenv(uv, env)
		selfdel_values[uv] = v
		selfdel_mappings[uk] = uv
		selfdel_mappings[uv] = k

function make_selfdel(parent, key)
	local u = newproxy(baseu)
	local env = weakkey()
	env[PARENTKEY], env[KEYKEY], env[SIZEKEY] = parent, key, 0
	setfenv(u, env)
	parent[key] = u
	return u

local function test()
    local parent = {}
    local u = make_selfdel(parent, 10)
    local alpha = setmetatable({}, {__tostring=function() return "alpha" end})
    local beta = setmetatable({}, {__tostring=function() return "beta" end})
    local gamma = setmetatable({}, {__tostring=function() return "gamma" end})
    local delta = setmetatable({}, {__tostring=function() return "delta" end})

    local function collect(n) for i=1,n do collectgarbage("collect") end end

    u[20] = alpha
    u[30] = beta
    assert(parent[10][20] ~= nil and parent[10][30] ~= nil)
    beta = nil
    assert(parent[10][20] ~= nil and parent[10][30] == nil)
    alpha = nil
    u[delta] = gamma
    assert(parent[10][20] == nil and parent[10][delta] ~= nil)
    u[20] = delta
    assert(parent[10][20] ~= nil and parent[10][delta] ~= nil)
    delta = nil
    gamma = nil
    assert(parent[10] == nil)

I also thought up a different scheme, which needed a number of userdata
proportional to the number of non-terminal cache tables one was using,
rather than proportional to the number of leaf entries in the cache
hierarchy. It's definitely less elegant; I don't know whether it'd be
any more efficient.