lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


> Calling collectgarbage periodically from your matrix constructor might
> help, if that isn't a big no-no for you...
> I experimented around with this code a bit, and the concept seems to work:

Thanks for sharing that! Unfortunately trying to tune that as good as I can, I haven't managed to  make this method even as fast as regular heap allocation (almost 10s vs 1s via regular heap). I guess forcing garbage collect might have too much overhead there. FYI heap allocation seems to be on par with C/C++ malloc (comparable results when just allocating blocks like that), however making C++ class allocated on stack makes stuff work almost 20x faster. (BTW. I know these samples does not make sense, as calculated "v" variable is just to ensure allocation is not optimized out).

heap in  0.940712 seconds
pool in 10.052809 seconds
c++  in  0.057713 seconds

-- test.cpp
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
class mat4 {
public:
    float m11, m21, m31, m41;
    float m12, m22, m32, m42;
    float m13, m23, m33, m43;
    float m14, m24, m34, m44;
};
int main (int argc, char const *argv[])
{
	int i;
	float v;
	struct timeval start, end;
	gettimeofday(&start, NULL);
	for(i = 0; i < 20000000; i++) {
		mat4 m;
		v += m.m11;
	}
	gettimeofday(&end, NULL);
	printf("c++ in %.12g seconds\n", (double)(end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec)/1000000.0);
	return 0;
}

-- test.lua
local ffi = require 'ffi'
ffi.cdef [[
typedef struct {
  float m11, m21, m31, m41;
  float m12, m22, m32, m42;
  float m13, m23, m33, m43;
  float m14, m24, m34, m44;
} mat4;
]]

local GCInterval = 10000
local mat4pool   = ffi.new('mat4[1000000]')
local mat4cnt    = 0
local mat4GCCnt  = 0
local function mat4release(m)
  mat4pool[mat4cnt] = m
  mat4cnt = mat4cnt + 1
  -- print('release')
end
local mat4t = ffi.metatype('mat4', {})
function mat4(...)
  if mat4GCCnt >= GCInterval then
    collectgarbage 'collect'
    mat4GCCnt = 1
  else
    mat4GCCnt = mat4GCCnt + 1
  end
  local m
  if mat4cnt > 0 then
    mat4cnt = mat4cnt - 1
    m = mat4pool[mat4cnt]
    -- m.m11, m.m21, m.m31, m.m41, m.m12, m.m22, m.m32, m.m42, m.m13, m.m23, m.m33, m.m43, m.m14, m.m24, m.m34, m.m44 = ...
    -- print('-----')
  else
    -- print('alloc')
    m = mat4t(...)
  end
  ffi.gc(m, mat4release)
  return m
end
local start = os.clock()
local v = 0
for i = 0,20000000 do
  local m = mat4t()
  v = v + m.m11
end
print(string.format('heap in %f seconds', os.clock()-start))
local start = os.clock()
local v = 0
for i = 0,20000000 do
  local m = mat4()
  v = v + m.m11
end
print(string.format('pool in %f seconds', os.clock()-start))

Regards,
-- 
Adam Strzelecki