lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Can you not build an index and look up data on the fly with an __index metamethod?

On 2010-10-16 5:16 PM, "Alexander Gladysh" <agladysh@gmail.com> wrote:

Hi, list!

Apologies for a lazy question, I have not done my own homework.

I've got a large file  (3M entries, 250 MB) with data.
Each entry is one line with a small Lua table:

{ foo = 1; bar = 2; baz = 'text' };

(Actually, there are two different entry formats, but that does not matter.)

I need to load this data fast enough. (Faster than several hours that
my original loader runs on LJ2, and it still had not stopped.)

So, if you know an implementation than ad-hoc unoptimized one below,
please share.

Alexander.

 local load_huge_table_list = function(filename)
   local result = { }

   local env =
   {
     A_ = function(t) result[#result + 1] = t end; -- TODO: Fragile?
   }

   local f = assert(io.open(filename, "r"))

   local MAX_LINES_IN_CHUNK = 1024
   local cur_lines_in_chunk = 0

   local function_open = false
   local header = "local A_=A_;\n" -- File header
   local next_line = nil
   local need_open_line = false
   local need_close_line = false

   -- local w = function(f) return function() local s = f()
io.write(s or "") return s end end

   local chunk = assert(
       load(
           function()
             cur_lines_in_chunk = cur_lines_in_chunk + 1 -- TODO: I
suspect off-by-one error here.
             while true do
               if header then
                 local str = header
                 header = nil
                 return str
               end
               if next_line then
                 if need_open_line then
                   need_open_line = false
                   return "A_"
                 elseif need_close_line then
                   need_close_line = false
                   next_line = false
                   return "\n"
                 else
                   need_close_line = true
                   return next_line
                 end
               end

               if not f then
                 return nil
               end

               next_line = f:read("*l") -- TODO: Read larger chunks?
               if not next_line then
                 f:close()
                 f = nil

                 if function_open then
                   function_open = false
                   return "end)();\n"
                 end

                 return nil -- EOF
               end

               need_open_line = true

               if not function_open then
                 function_open = true
                 return "(function()\n"
               elseif cur_lines_in_chunk > MAX_LINES_IN_CHUNK then --
TODO: And here symmetrical off-by-one?
                 assert(function_open)
                 cur_lines_in_chunk = 0
                 return "end)();\n(function()\n"
               end
             end
           end,
           "="..filename
         )
     )

   assert(not f)

   setfenv(chunk, env)
   chunk()

   return result
 end