lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Hi, list!

Apologies for a lazy question, I have not done my own homework.

I've got a large file  (3M entries, 250 MB) with data.
Each entry is one line with a small Lua table:

{ foo = 1; bar = 2; baz = 'text' };

(Actually, there are two different entry formats, but that does not matter.)

I need to load this data fast enough. (Faster than several hours that
my original loader runs on LJ2, and it still had not stopped.)

So, if you know an implementation than ad-hoc unoptimized one below,
please share.

Alexander.

  local load_huge_table_list = function(filename)
    local result = { }

    local env =
    {
      A_ = function(t) result[#result + 1] = t end; -- TODO: Fragile?
    }

    local f = assert(io.open(filename, "r"))

    local MAX_LINES_IN_CHUNK = 1024
    local cur_lines_in_chunk = 0

    local function_open = false
    local header = "local A_=A_;\n" -- File header
    local next_line = nil
    local need_open_line = false
    local need_close_line = false

    -- local w = function(f) return function() local s = f()
io.write(s or "") return s end end

    local chunk = assert(
        load(
            function()
              cur_lines_in_chunk = cur_lines_in_chunk + 1 -- TODO: I
suspect off-by-one error here.
              while true do
                if header then
                  local str = header
                  header = nil
                  return str
                end
                if next_line then
                  if need_open_line then
                    need_open_line = false
                    return "A_"
                  elseif need_close_line then
                    need_close_line = false
                    next_line = false
                    return "\n"
                  else
                    need_close_line = true
                    return next_line
                  end
                end

                if not f then
                  return nil
                end

                next_line = f:read("*l") -- TODO: Read larger chunks?
                if not next_line then
                  f:close()
                  f = nil

                  if function_open then
                    function_open = false
                    return "end)();\n"
                  end

                  return nil -- EOF
                end

                need_open_line = true

                if not function_open then
                  function_open = true
                  return "(function()\n"
                elseif cur_lines_in_chunk > MAX_LINES_IN_CHUNK then --
TODO: And here symmetrical off-by-one?
                  assert(function_open)
                  cur_lines_in_chunk = 0
                  return "end)();\n(function()\n"
                end
              end
            end,
            "="..filename
          )
      )

    assert(not f)

    setfenv(chunk, env)
    chunk()

    return result
  end