[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Load large amount of data fast
- From: Alexander Gladysh <agladysh@...>
- Date: Sun, 17 Oct 2010 03:14:23 +0400
Hi, list!
Apologies for a lazy question, I have not done my own homework.
I've got a large file (3M entries, 250 MB) with data.
Each entry is one line with a small Lua table:
{ foo = 1; bar = 2; baz = 'text' };
(Actually, there are two different entry formats, but that does not matter.)
I need to load this data fast enough. (Faster than several hours that
my original loader runs on LJ2, and it still had not stopped.)
So, if you know an implementation than ad-hoc unoptimized one below,
please share.
Alexander.
local load_huge_table_list = function(filename)
local result = { }
local env =
{
A_ = function(t) result[#result + 1] = t end; -- TODO: Fragile?
}
local f = assert(io.open(filename, "r"))
local MAX_LINES_IN_CHUNK = 1024
local cur_lines_in_chunk = 0
local function_open = false
local header = "local A_=A_;\n" -- File header
local next_line = nil
local need_open_line = false
local need_close_line = false
-- local w = function(f) return function() local s = f()
io.write(s or "") return s end end
local chunk = assert(
load(
function()
cur_lines_in_chunk = cur_lines_in_chunk + 1 -- TODO: I
suspect off-by-one error here.
while true do
if header then
local str = header
header = nil
return str
end
if next_line then
if need_open_line then
need_open_line = false
return "A_"
elseif need_close_line then
need_close_line = false
next_line = false
return "\n"
else
need_close_line = true
return next_line
end
end
if not f then
return nil
end
next_line = f:read("*l") -- TODO: Read larger chunks?
if not next_line then
f:close()
f = nil
if function_open then
function_open = false
return "end)();\n"
end
return nil -- EOF
end
need_open_line = true
if not function_open then
function_open = true
return "(function()\n"
elseif cur_lines_in_chunk > MAX_LINES_IN_CHUNK then --
TODO: And here symmetrical off-by-one?
assert(function_open)
cur_lines_in_chunk = 0
return "end)();\n(function()\n"
end
end
end,
"="..filename
)
)
assert(not f)
setfenv(chunk, env)
chunk()
return result
end