lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Hi All,

I have worked on vector and matrix "classes" using FFI, and views for them, see
code below for implementation, benchmarks, and tests (please remember to change
the module name used in require).

Pseudo usage code:

local v = alg.Vec(10)
v[1] = v[2] 
local view = v:sub(2, 4) -- From element 2 to element 4
local n = v:size()

local m = alg.Mat(2,4)
m[1][1] = m[2][2]
n = m:rows()
n = m:cols()
local r = m:row(2) -- Vector view of the second row
local c = m:col(2)
view = m:sub(1, 2, 3, 4) -- Sub matrix view, rows from 1 to 2, cols from 3 to 4

The offset (default is first element indexed by 1) can be changed with the local
variable offset.
The bounds checks can be enabled/disabled via the local variable checkbounds
(more on this below).

In summary, the performance is very close to the C-style implementation of
vector and matrixes (try the benchmark :P).
For some reason there is a very minor difference (even with bounds checking
disabled) between the vector class and the C-style equivalent which is not
present in the matrix case. It's very minor but I am curious about it, maybe I
made a stupid mistake :)
 

At the moment, the only issue is that I cannot enable bound checks on the column
index in
m[row][col]
as this would require having
m[i]
be defined (for numeric types) as
m:row(i)
At the moment the (temporary) vector view that is created is not optimized away. 
It is my understanding that there is a plan to implement this kind of
optimizations in LuaJIT in the future. Or is the struct object vecview_t too
complex to be optimized away in any case?
Any suggestion on alternative approaches?

Code should work, but has not been tested extensively.

Any comment/question is more than welcome!


Cheers,
KR

-- Module for vector and matrix algebra.
local _M = {}
local ffi = require("ffi")

ffi.cdef[[
void *malloc(size_t size);
void free(void *ptr);

typedef struct { 
  double* m_ptr;
  int32_t m_size;
  int32_t m_stride;
} vec_t;

typedef struct { 
  double* m_ptr;
  int32_t m_size;
  int32_t m_stride;
} vecview_t;

typedef struct { 
  double* m_ptr;
  int32_t m_rows;
  int32_t m_cols;
  int32_t m_stride;
} mat_t;

typedef struct { 
  double* m_ptr;
  int32_t m_rows;
  int32_t m_cols;
  int32_t m_stride;
} matview_t;  
]]

-- TODO: Implement :copy() functionality.
local offset = 1
local checkbounds = true

function copy_tbl(t)
  local t2 = {}
  for k,v in pairs(t) do
    t2[k] = v
  end
  return t2
end

------------------------- Vector ---------------------
local vec_mtdispatch

local function vec__len(self) return self.m_size end

local function vec_unchecked__index(self, i)
  if type(i)=="number" then 
    return self.m_ptr[(i-offset)*self.m_stride] 
  else 
    return vec_mtdispatch[i] -- For member methods.
  end 
end
local function vec__index(self, i)
  if type(i)=="number" then assert(0 <= i-offset and i-offset < self.m_size) end
  return vec_unchecked__index(self, i)
end

local function vec_unchecked__newindex(self, i, v)
  -- Only numbers can be assigned. 
  self.m_ptr[(i-offset)*self.m_stride] = v 
end
local function vec__newindex(self, i, v)
  assert(0 <= i-offset and i-offset < self.m_size)
  vec_unchecked__newindex(self, i, v)
end

local function vec__tostring(self)
  local tbl = {}  
  for i=offset,#self-1+offset do tbl[i] = tostring(self[i]) end      
  return table.concat(tbl, " ")
end

local function block__gc(self) ffi.C.free(self.m_ptr) end

local vecview_mt = {
  __len = vec__len,
  __tostring = vec__tostring,
}
if checkbounds then
  vecview_mt.__index = vec__index
  vecview_mt.__newindex = vec__newindex
else
  vecview_mt.__index = vec_unchecked__index
  vecview_mt.__newindex = vec_unchecked__newindex
end

local vec_mt = copy_tbl(vecview_mt)
vec_mt.__gc = block__gc

local vec_ct = ffi.metatype("vec_t", vec_mt)
local vecview_ct = ffi.metatype("vecview_t", vecview_mt)

local function vec_unchecked_sub(self, first, last)
  return vecview_ct(self.m_ptr +first -offset, last-first +1, self.m_stride)
end
local function vec_sub(self, first, last)
  assert(first <= last and 0 <= first-offset and last-offset < self.m_size)
  return vec_unchecked_sub(self, first, last)
end

vec_mtdispatch = {
  stride = function(self) return self.m_stride end,
  offset = function(self) return offset end,
}
if checkbounds then
  vec_mtdispatch.sub = vec_sub
else 
  vec_mtdispatch.sum = vec_unchecked_sub
end

local function vec_ctor(size)
   -- Allocate array + struct.
  return vec_ct(ffi.C.malloc(size*8), size, 1)
end
------------------------- Vector ---------------------
 
------------------------- Matrix ---------------------
local mat_mtdispatch

local function mat_unchecked__index(self, i)
  if type(i)=="number" then 
    return self.m_ptr + (i-offset)*self.m_stride - offset
  else 
    return mat_mtdispatch[i] 
  end 
end
local function mat__index(self, i)
  if type(i)=="number" then assert(0 <= i-offset and i-offset < self.m_rows) end
  return mat_unchecked__index(self, i)
end

local function mat__newindex(self, i, v) error("Should not be called") end

local function mat__tostring(self)
  local tbl = {}  
  for row=offset,self.m_rows-1+offset do 
    local rowTbl = {}
    for col=offset,self.m_cols-1+offset do 
      rowTbl[col] = tostring(self[row][col]) 
    end      
    tbl[row] = table.concat(rowTbl, " ")
  end      
  return table.concat(tbl, "\n")
end

local matview_mt = {
  __tostring = mat__tostring,
  __newindex = mat__newindex
}
if checkbounds then
  matview_mt.__index = mat__index
else
  matview_mt.__index = mat_unchecked__index
end

local mat_mt = copy_tbl(matview_mt)
mat_mt.__gc = block_gc

local mat_ct = ffi.metatype("mat_t", mat_mt)
local matview_ct = ffi.metatype("matview_t", matview_mt)

local function mat_unchecked_row(self, i) 
  return vecview_ct(self.m_ptr + (i-offset)*self.m_stride, self.m_cols, 1)
end
local function mat_row(self, i) 
  assert(0 <= i-offset and i-offset < self.m_rows)
  return mat_unchecked_row(self, i)
end 

local function mat_unchecked_col(self, i) 
  return vecview_ct(self.m_ptr +i -offset, self.m_rows, self.m_stride)
end
local function mat_col(self, i) 
  assert(0 <= i-offset and i-offset < self.m_cols) 
  return mat_unchecked_col(self, i)
end

local function mat_unchecked_sub(self, firstRow, lastRow, firstCol, lastCol)
  return matview_ct(self.m_ptr+(firstRow-offset)*self.m_stride
  + firstCol-offset, lastRow-firstRow+1, lastCol-firstCol+1
  , self.m_stride)
end
local function mat_sub(self, firstRow, lastRow, firstCol, lastCol)
  assert(firstRow <= lastRow and 0 <= firstRow-offset)
  assert(lastRow-offset < self.m_rows and firstCol <= lastCol)
  assert(0 <= firstCol-offset and lastCol-offset < self.m_cols)
  return mat_unchecked_sub(self, firstRow, lastRow, firstCol, lastCol)
end

mat_mtdispatch = {
  rows = function(self) return self.m_rows end,
  cols = function(self) return self.m_cols end,
  stride = function(self) return self.m_stride end,
  offset = function(self) return offset end  
}
if checkbounds then 
  mat_mtdispatch.row = mat_row
  mat_mtdispatch.col = mat_col
  mat_mtdispatch.sub = mat_sub
else
  mat_mtdispatch.row = mat_unchecked_row
  mat_mtdispatch.col = mat_unchecked_col
  mat_mtdispatch.sub = mat_unchecked_sub
end

local function mat_ctor(rows, cols)
   return mat_ct(ffi.C.malloc(rows*cols*8), rows, cols, cols)
end
------------------------- Matrix ---------------------

_M.Vec = vec_ctor
_M.Mat = mat_ctor

return _M

-- Module for vector and matrix algebra END

--------------- TEST
local alg = require("algebra") -- Change this!
local ffi = require("ffi")

local function test_vec(x)
  print("print(vec): ", x)
  print("#vec: ", #x)
  print("vec:offset()", x:offset())
  print("vec:stride()", x:stride())
end

local function test_mat(x)
  print("print(mat): ", print(x))
  print("mat:rows() ", x:rows())
  print("mat:cols() ", x:cols())
  print("mat:stride() ", x:stride())
  print("mat:offset() ", x:offset())
  for i=1,x:rows() do
    print("\nview = x:row(i), i: ",i) 
    local view = x:row(i)
    test_vec(view)
  end
  for i=1,x:cols() do
    print("\nview = x:col(i), i: ",i) 
    local view = x:col(i)
    test_vec(view)
  end
end


local vec = alg.Vec(8)
for i=1,#vec do
  vec[i] = i
end
print("vec = alg.Vec(8), filled from 1 to 8")
test_vec(vec)

local subvec = vec:sub(3,5)
print("\nsubvec = vec:sub(3,5)")
test_vec(subvec)

local mat = alg.Mat(5, 5)
print("\nmat = alg.Mat(5, 5), filled from 1 increasing row-wise")
local incr = 1
for row=1,mat:rows() do
  for col=1,mat:cols() do
    mat[row][col] = incr
    incr = incr + 1
  end
end
test_mat(mat)

local matSub = mat:sub(2, 3, 3, 5) 
print("\nmatSub = mat:sub(2, 3, 3, 5)")
test_mat(matSub)
--------------- TEST END

--------------- BENCHMARK
local alg = require("algebra") -- Change this!
local ffi = require("ffi")

local n = 1e6
local incr = 0.000156
local rep = 500
local start
local y = 0.0
local z = 0.0
local c = 0.0
local nrows = 1e3
local ncols = 1e3

function bench_vec(x)
  for irep=1,rep do 
	  --write
	  for i=1,n do
		x[i] = incr
	  end
	  --read
	  for i=1,n do
		y = y + x[i]
	  end
  end
end

function bench_mat(x)
  for irep=1,rep do 
	  --write
	  for i=1,nrows do
      for j=1,ncols do
        x[i][j] = incr
      end
		end
	  --read
	  for i=1,nrows do
      for j=1,ncols do
        z = z + x[i][j]
      end
		end
  end
end

function bench_matplain(x)
  for irep=1,rep do 
	  --write
	  for i=1,nrows do
      for j=1,ncols do
        x[(i-1)*nrows + j-1] = incr
      end
		end
	  --read
	  for i=1,nrows do
      for j=1,ncols do
        z = z + x[(i-1)*nrows + j-1]
      end
		end
  end
end

assert(n==nrows*ncols)

-- Check
for i=1,2*n*rep do c = c + incr end
local checkdiff = incr-c/(2*n*rep)

local vecplain = ffi.new("double[?]", n)
print("\nC-style vec(n) run")
start = os.clock()
bench_vec(vecplain)
print("secs: ", os.clock() - start)

local vec = alg.Vec(n)
print("\nVec(n) run")
start = os.clock()
bench_vec(vec)
print("secs: ", os.clock() - start)

assert(incr-y/(2*n*rep)==checkdiff)

local matplain = ffi.new("double[?]", n)
print("\nC-style mat(nrows, ncols) run")
start = os.clock()
bench_matplain(matplain)
print("secs: ", os.clock() - start)

local mat = alg.Mat(nrows, ncols)
print("\nMat(nrows, ncols) run")
start = os.clock()
bench_mat(mat)
print("secs: ", os.clock() - start)

assert(incr-z/(2*n*rep)==checkdiff)
----------- BENCHMARK END