[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Re: Avoiding LuaJIT FFI allocations (Was: LuaJIT - Is ffi.alloca possible?)
- From: Adam Strzelecki <ono@...>
- Date: Sun, 5 Feb 2012 14:29:33 +0100
> Calling collectgarbage periodically from your matrix constructor might
> help, if that isn't a big no-no for you...
> I experimented around with this code a bit, and the concept seems to work:
Thanks for sharing that! Unfortunately trying to tune that as good as I can, I haven't managed to make this method even as fast as regular heap allocation (almost 10s vs 1s via regular heap). I guess forcing garbage collect might have too much overhead there. FYI heap allocation seems to be on par with C/C++ malloc (comparable results when just allocating blocks like that), however making C++ class allocated on stack makes stuff work almost 20x faster. (BTW. I know these samples does not make sense, as calculated "v" variable is just to ensure allocation is not optimized out).
heap in 0.940712 seconds
pool in 10.052809 seconds
c++ in 0.057713 seconds
-- test.cpp
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
class mat4 {
public:
float m11, m21, m31, m41;
float m12, m22, m32, m42;
float m13, m23, m33, m43;
float m14, m24, m34, m44;
};
int main (int argc, char const *argv[])
{
int i;
float v;
struct timeval start, end;
gettimeofday(&start, NULL);
for(i = 0; i < 20000000; i++) {
mat4 m;
v += m.m11;
}
gettimeofday(&end, NULL);
printf("c++ in %.12g seconds\n", (double)(end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec)/1000000.0);
return 0;
}
-- test.lua
local ffi = require 'ffi'
ffi.cdef [[
typedef struct {
float m11, m21, m31, m41;
float m12, m22, m32, m42;
float m13, m23, m33, m43;
float m14, m24, m34, m44;
} mat4;
]]
local GCInterval = 10000
local mat4pool = ffi.new('mat4[1000000]')
local mat4cnt = 0
local mat4GCCnt = 0
local function mat4release(m)
mat4pool[mat4cnt] = m
mat4cnt = mat4cnt + 1
-- print('release')
end
local mat4t = ffi.metatype('mat4', {})
function mat4(...)
if mat4GCCnt >= GCInterval then
collectgarbage 'collect'
mat4GCCnt = 1
else
mat4GCCnt = mat4GCCnt + 1
end
local m
if mat4cnt > 0 then
mat4cnt = mat4cnt - 1
m = mat4pool[mat4cnt]
-- m.m11, m.m21, m.m31, m.m41, m.m12, m.m22, m.m32, m.m42, m.m13, m.m23, m.m33, m.m43, m.m14, m.m24, m.m34, m.m44 = ...
-- print('-----')
else
-- print('alloc')
m = mat4t(...)
end
ffi.gc(m, mat4release)
return m
end
local start = os.clock()
local v = 0
for i = 0,20000000 do
local m = mat4t()
v = v + m.m11
end
print(string.format('heap in %f seconds', os.clock()-start))
local start = os.clock()
local v = 0
for i = 0,20000000 do
local m = mat4()
v = v + m.m11
end
print(string.format('pool in %f seconds', os.clock()-start))
Regards,
--
Adam Strzelecki