[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Re: LuaJIT2 performance for number crunching
- From: Mike Pall <mikelu-1102@...>
- Date: Sat, 19 Feb 2011 17:11:51 +0100
Francesco Abbate wrote:
> For the other size it seems that there is a small problem because the
> performance of LuaJIT2 are in this case below my expectations.
Fixed implementation attached. Much faster.
--Mike
# order = 5
$(include 'ode-defs.lua.in')
# ah = { 1.0/4.0, 3.0/8.0, 12.0/13.0, 1.0, 1.0/2.0 }
# b3 = { 3.0/32.0, 9.0/32.0 }
# b4 = { 1932.0/2197.0, -7200.0/2197.0, 7296.0/2197.0}
# b5 = { 8341.0/4104.0, -32832.0/4104.0, 29440.0/4104.0, -845.0/4104.0}
# b6 = { -6080.0/20520.0, 41040.0/20520.0, -28352.0/20520.0, 9295.0/20520.0, -5643.0/20520.0}
# c1 = 902880.0/7618050.0
# c3 = 3953664.0/7618050.0
# c4 = 3855735.0/7618050.0
# c5 = -1371249.0/7618050.0
# c6 = 277020.0/7618050.0
-- These are the differences of fifth and fourth order coefficients
-- for error estimation */
# ec = { 0.0, 1.0 / 360.0, 0.0, -128.0 / 4275.0, -2197.0 / 75240.0, 1.0 / 50.0, 2.0 / 55.0 }
# y_err_only = (a_dydt == 0)
local function rkf45_evolve(s, f, t1)
local t, h = s.t, s.h
local hadj, inc
local $(VL'y')
local $(VL'k1') = $(AL's.dydt')
if t + h > t1 then h = t1 - t end
while h > 0 do
$(VL'y') = $(AL's.y')
local rmax = 0
do
# for i = 0, N-1 do
local ytmp_$(i) = y_$(i) + $(ah[1]) * h * k1_$(i)
# end
-- k2 step
local $(VL'k2') = f(t + $(ah[1]) * h, $(VL'ytmp'))
# for i = 0, N-1 do
ytmp_$(i) = y_$(i) + h * ($(b3[1]) * k1_$(i) + $(b3[2]) * k2_$(i))
# end
-- k3 step
local $(VL'k3') = f(t + $(ah[2]) * h, $(VL'ytmp'))
# for i = 0, N-1 do
ytmp_$(i) = y_$(i) + h * ($(b4[1]) * k1_$(i) + $(b4[2]) * k2_$(i) + $(b4[3]) * k3_$(i))
# end
-- k4 step
local $(VL'k4') = f(t + $(ah[3]) * h, $(VL'ytmp'))
# for i = 0, N-1 do
ytmp_$(i) = y_$(i) + h * ($(b5[1]) * k1_$(i) + $(b5[2]) * k2_$(i) + $(b5[3]) * k3_$(i) + $(b5[4]) * k4_$(i))
# end
-- k5 step
local $(VL'k5') = f(t + $(ah[4]) * h, $(VL'ytmp'))
# for i = 0, N-1 do
ytmp_$(i) = y_$(i) + h * ($(b6[1]) * k1_$(i) + $(b6[2]) * k2_$(i) + $(b6[3]) * k3_$(i) + $(b6[4]) * k4_$(i) + $(b6[5]) * k5_$(i))
# end
-- k6 step and final sum
-- since k2 is no more used we can use k2 to store k6
local $(VL'k6') = f(t + $(ah[5]) * h, $(VL'ytmp'))
local di
# for i = 0, N-1 do
di = $(c1) * k1_$(i) + $(c3) * k3_$(i) + $(c4) * k4_$(i) + $(c5) * k5_$(i) + $(c6) * k6_$(i)
y_$(i) = y_$(i) + h * di
# end
# if not y_err_only then
local $(VL'dydt') = f(t + h, $(VL'y'))
# for i = 0, N-1 do
s.dydt[$(i)] = dydt_$(i)
# end
# end
local yerr, r, d0
# for i = 0, N-1 do
yerr = h * ($(ec[2]) * k1_$(i) + $(ec[4]) * k3_$(i) + $(ec[5]) * k4_$(i) + $(ec[6]) * k5_$(i) + $(ec[7]) * k6_$(i))
# if y_err_only then
d0 = $(eps_rel) * ($(a_y) * abs(y_$(i))) + $(eps_abs)
# else
d0 = $(eps_rel) * ($(a_y) * abs(y_$(i)) + $(a_dydt) * abs(h * dydt_$(i))) + $(eps_abs)
# end
r = abs(yerr) / abs(d0)
rmax = max(r, rmax)
# end
end
hadj, inc = hadjust(rmax, h)
if inc >= 0 then break end
h = hadj
end
# if y_err_only then
local $(VL'dydt') = f(t + h, $(VL'y'))
# for i = 0, N-1 do
s.dydt[$(i)] = dydt_$(i)
# end
# end
# for i = 0, N-1 do
s.y[$(i)] = y_$(i)
# end
s.t = t + h
s.h = hadj
return h
end
return {new= ode_new, init= ode_init, evolve= rkf45_evolve}