  lua-l archive

• Subject: Avoiding FFI- allocations + using SSE-vectors
• From: Wolfgang Pupp <wolfgang.pupp@...>
• Date: Mon, 6 Feb 2012 01:48:31 +0100

```I just tried another approach:
A circular buffer for intermediate results- there are no more
allocations for calling arithmetic functions, *BUT* if you don't copy
results (and just keep references instead), they *will* be overwritten
(sooner or later).
I didn't find any cheap and easy way to check how many times an object
is still referenced, and I think this is a pretty decent usability vs.

I also tried to make it use SSE, and that seems to work just fine
(MinGW on Win7 32). It needs a tiny wrapper-dll, because LuaJIT can't
directly call ffi-functions with vector arguments (yet!)- so I pass
them via pointers.
I only implemented single-precision-4-float addition for now anyway-
it's just a proof of concept, I think ffi- vector operations are
somewhere on Mike's TODO-list (maybe someone will even sponsor that
and we'll have it in a blink ;).

Every kind of opinion/feedback is much appreciated- I'm learning,
after all, and sometimes even a plain "you're doing it wrong" helps a
LOT ;)

It's used like this:
v4sf = require 'v4sf'
local a, b = v4sf(1, 2, 3, 4), v4sf(1, 0, 1, 0)
print(a+b)  --intermediate result, don't keep a reference for too long!
local c = v4sf(a+b)  --when you *need* to keep the value
print(c)

Here is the code:
local M = {_NAME = 'v4sf', _VERSION = '0.1', _DESCRIPTION = [[
Module for 4d-vectors, single precision.
Vectors are immutable once constructed.
Vectors returned by metamethods (addition, etc.) are only temporary values and
_MUST_ be copied if you want to keep a reference to them.
Construct new vectors by calling the module table itself or its new- function.
Functions:
- new(<number> a, <number> b, <number> c, <number> d)
returns <v4sf> A vector with elements a, b, c, d
- new(<v4sf> v)
returns <v4sf> A copy of vector v
Types:
- <v4sf>: A vector with 4 (single-precision-float) elements.
- __add(<v4sf> a, <v4sf> b) returns <v4sf> The sum of a and b
- __tostring(<v4sf> v)
returns <string> Temporary values are marked with a 'tmp '- prefix.
]]}

local ffi = require'ffi'
--[[Can't call SSE functions directly (LuaJIT NYI), so we have to cheat a bit.
lua_sse.c is compiled like this (MinGW gcc):
gcc -O2 -msse -shared -o lua_sse.dll lua_sse.c
and should look like this:
#include <xmmintrin.h>
void lua_mm_add_ps(__m128 *r, __m128 *a, __m128 *b) { *r =
--]]
ffi.cdef[[
typedef float m128 __attribute__ ((__vector_size__ (16)));
void lua_mm_add_ps(m128 *r, m128 *a, m128 *b);
]]

do
local metatable
local ctype = ffi.typeof 'm128'
local cBuffer = {}
local cBufferIdx = 1
local function new_tmp(a, b, c, d)
return setmetatable({tmp = true, cdata = ctype{{a, b, c, d}}}, metatable)
end
function M.new(a, b, c, d)
if type(a) == 'table' and getmetatable(a) == metatable then
--copy constructor
return setmetatable({cdata = ctype{{a, a, a, a}}}, metatable)
end
return setmetatable({cdata = ctype{{a, b, c, d}}}, metatable)
end
metatable = {
__tostring = function(v)
if v.tmp then
return ("tmp v4sf: %g,%g,%g,%g"):format(v.cdata,
v.cdata, v.cdata, v.cdata)
else
return ("v4sf: %g,%g,%g,%g"):format(v.cdata,
v.cdata, v.cdata, v.cdata)
end
end,
local tmp = cBuffer[cBufferIdx]
cBufferIdx = (cBufferIdx+1) % #cBuffer
return tmp
end,
__index = function(v, k)
if type(k) == 'number' and k >= 0 and k < 4 then
return v.cdata[k]
end
end,
__newindex = function(v, k, newValue)
error "Assigning to vectors is not allowed"
end
}
for i=1,10 do cBuffer[i] = new_tmp() end
setmetatable(M, {
__call = function(_, a, b, c, d) return M.new(a, b, c, d) end,
__tostring = function(m) return m._DESCRIPTION end,
})
end

return M

```