Read Defined Chunks

lua-users home
wiki

Showing revision 12
With this function you can read defined chunks (until a given delimiter is found) from an file or from io.stdin Its a complete rewrite of the first version: now its fast, doesn't concatenate strings unnecessary, needs less memory and is flexible. The purpose is to process a) files with tons of megabaytes b) mixed formated input e.g. MIME multipart messages are a mix of lines \r\n and binary data Please note a simple non Lua standard enhancement: I use the number variable lua.maxread to set the chunksizes used for io.reads(chunk of bytes) at a central point. Replace this variable by your preferred chunksize e.g. 2^13 for 8KB


   function io.readuntil(Filehandle,Delimiter,Collect,Limit)

-- Filehandle (userdata)
-- Delimiter (string, optional); max. length is lua.maxread; optional because coroutine.resume() also accepts <delimiter>
-- Collect (boolean, optional) = true (default); read until <delimiter> is found or end of file or <limit> is reached and return string at once
--                             = false; return string also before <delimiter> is found or end of file or <limit> is reached
-- Limit (number, optional); number of bytes to read from <filehandle>; default is unlimited

-- <function>=cooroutine.resume(Function,Delimiter,Collect)

-- Function (thread); returned from io.readuntil()
-- Delimiter (string, optional); see io.readuntil()
-- Collect (boolean, optional); see io.readuntil()

-- return (boolean); = true; no error
--                   = false; an error occured and the second argument returned is the errormessage
--        (string or nil) = nil; end of file
--        (boolean) = true; delimiter found
--                  = false; delimiter not found

-- note: if the coroutine returns true,<string>,false then
--          if <collect> = false it does not have to be the end of file
--                       = true  the end of file is reached and the next coroutine.resume returns true,nil(,nil)

      if type(Delimiter)=='boolean' then
         Collect,Delimiter = Delimiter,Collect
      end
      if type(Delimiter)=='number' then
         Limit,Delimiter = Delimiter,nil
      end
      if type(Collect)=='number' then
         Limit,Collect = Collect,nil
      end

      return coroutine.create(function(NewDelimiter,NewCollect)

         local Next=function(NewDelimiter,NewCollect)

                       if type(NewDelimiter)=='boolean' then
                          NewCollect,NewDelimiter = NewDelimiter,nil
                       end

                       return NewDelimiter or Delimiter,NewCollect or Collect

                    end

         Delimiter,Collect = Next(NewDelimiter,NewCollect)

         local Chunksize,Chunk,Length,First,Second,SearchFrom,GetFrom,FoundFrom,FoundTo = lua.maxread,{},0,1,2,1,1

         if Limit and Length+Chunksize>Limit then
            Chunk[First]=Limit-Length>0 and Filehandle:read(Limit-Length)
         else
            Chunk[First]=Filehandle:read(Chunksize)
         end

         if Chunk[First] then
            Length=Length+string.len(Chunk[First])

            while true do

               if string.len(Delimiter)>Chunksize then
                  error('io.readuntil: delimiter to long')
               end

               FoundFrom,FoundTo = string.find(Chunk[First],Delimiter,SearchFrom,true)
               if FoundFrom then
--                delimiter found in first chunk
                  Delimiter,Collect = Next(coroutine.yield(string.sub(Chunk[First],GetFrom,FoundFrom-1),true))
                  SearchFrom,GetFrom = FoundTo+1,FoundTo+1
               else

                  if Limit and Length+Chunksize>Limit then
                     Chunk[Second]=Limit-Length>0 and Filehandle:read(Limit-Length)
                  else
                     Chunk[Second]=Filehandle:read(Chunksize)
                  end

                  if Chunk[Second] then
                     Length=Length+string.len(Chunk[Second])

--                   concatenate end of first chunk with start of second chunk so that a possible splitted delimiter must be found
                     FoundFrom,FoundTo = string.find(string.sub(Chunk[First],string.len(Chunk[First])-string.len(Delimiter)+2)..string.sub(Chunk[Second],1,string.len(Delimiter)-1),Delimiter,1,true)
                     if FoundFrom then
--                      delimiter is splitted between first and second chunk
                        Delimiter,Collect = Next(coroutine.yield(string.sub(Chunk[First],GetFrom,string.len(Chunk[First])-string.len(Delimiter)+FoundFrom),true))
                        First,Second = Second,First
                        SearchFrom,GetFrom = FoundFrom+1,FoundFrom+1
                     else
--                      delimiter isn't splitted between first and second chunk
                        if Collect then
                           SearchFrom=string.len(Chunk[First])+1
                           Chunk[First]=Chunk[First]..Chunk[Second]
                        else
                           if string.len(Chunk[First])>=GetFrom then
                              Delimiter,Collect = Next(coroutine.yield(string.sub(Chunk[First],GetFrom),false))
                           end
                           First,Second = Second,First
                           SearchFrom,GetFrom = 1,1
                        end
                     end
                  else
--                   no delimiter found and no further input
                     break
                  end
               end
            end

            if string.len(Chunk[First])>=GetFrom then
--             return rest of first chunk
               coroutine.yield(string.sub(Chunk[First],GetFrom),false)
            end

         end

      end)

-- return (thread); a coroutine

   end

RecentChanges · preferences
edit · history · current revision
Edited July 26, 2004 12:59 am GMT (diff)