Showing revision 12
With this function you can read defined chunks (until a given delimiter is found) from an file or from io.stdin
Its a complete rewrite of the first version: now its fast, doesn't concatenate strings unnecessary, needs less memory and is flexible.
The purpose is to process a) files with tons of megabaytes b) mixed formated input e.g. MIME multipart messages are a mix of lines \r\n and binary data
Please note a simple non Lua standard enhancement: I use the number variable lua.maxread to set the chunksizes used for io.reads(chunk of bytes) at a central point. Replace this variable by your preferred chunksize e.g. 2^13 for 8KB
function io.readuntil(Filehandle,Delimiter,Collect,Limit)
-- Filehandle (userdata)
-- Delimiter (string, optional); max. length is lua.maxread; optional because coroutine.resume() also accepts <delimiter>
-- Collect (boolean, optional) = true (default); read until <delimiter> is found or end of file or <limit> is reached and return string at once
-- = false; return string also before <delimiter> is found or end of file or <limit> is reached
-- Limit (number, optional); number of bytes to read from <filehandle>; default is unlimited
-- <function>=cooroutine.resume(Function,Delimiter,Collect)
-- Function (thread); returned from io.readuntil()
-- Delimiter (string, optional); see io.readuntil()
-- Collect (boolean, optional); see io.readuntil()
-- return (boolean); = true; no error
-- = false; an error occured and the second argument returned is the errormessage
-- (string or nil) = nil; end of file
-- (boolean) = true; delimiter found
-- = false; delimiter not found
-- note: if the coroutine returns true,<string>,false then
-- if <collect> = false it does not have to be the end of file
-- = true the end of file is reached and the next coroutine.resume returns true,nil(,nil)
if type(Delimiter)=='boolean' then
Collect,Delimiter = Delimiter,Collect
end
if type(Delimiter)=='number' then
Limit,Delimiter = Delimiter,nil
end
if type(Collect)=='number' then
Limit,Collect = Collect,nil
end
return coroutine.create(function(NewDelimiter,NewCollect)
local Next=function(NewDelimiter,NewCollect)
if type(NewDelimiter)=='boolean' then
NewCollect,NewDelimiter = NewDelimiter,nil
end
return NewDelimiter or Delimiter,NewCollect or Collect
end
Delimiter,Collect = Next(NewDelimiter,NewCollect)
local Chunksize,Chunk,Length,First,Second,SearchFrom,GetFrom,FoundFrom,FoundTo = lua.maxread,{},0,1,2,1,1
if Limit and Length+Chunksize>Limit then
Chunk[First]=Limit-Length>0 and Filehandle:read(Limit-Length)
else
Chunk[First]=Filehandle:read(Chunksize)
end
if Chunk[First] then
Length=Length+string.len(Chunk[First])
while true do
if string.len(Delimiter)>Chunksize then
error('io.readuntil: delimiter to long')
end
FoundFrom,FoundTo = string.find(Chunk[First],Delimiter,SearchFrom,true)
if FoundFrom then
-- delimiter found in first chunk
Delimiter,Collect = Next(coroutine.yield(string.sub(Chunk[First],GetFrom,FoundFrom-1),true))
SearchFrom,GetFrom = FoundTo+1,FoundTo+1
else
if Limit and Length+Chunksize>Limit then
Chunk[Second]=Limit-Length>0 and Filehandle:read(Limit-Length)
else
Chunk[Second]=Filehandle:read(Chunksize)
end
if Chunk[Second] then
Length=Length+string.len(Chunk[Second])
-- concatenate end of first chunk with start of second chunk so that a possible splitted delimiter must be found
FoundFrom,FoundTo = string.find(string.sub(Chunk[First],string.len(Chunk[First])-string.len(Delimiter)+2)..string.sub(Chunk[Second],1,string.len(Delimiter)-1),Delimiter,1,true)
if FoundFrom then
-- delimiter is splitted between first and second chunk
Delimiter,Collect = Next(coroutine.yield(string.sub(Chunk[First],GetFrom,string.len(Chunk[First])-string.len(Delimiter)+FoundFrom),true))
First,Second = Second,First
SearchFrom,GetFrom = FoundFrom+1,FoundFrom+1
else
-- delimiter isn't splitted between first and second chunk
if Collect then
SearchFrom=string.len(Chunk[First])+1
Chunk[First]=Chunk[First]..Chunk[Second]
else
if string.len(Chunk[First])>=GetFrom then
Delimiter,Collect = Next(coroutine.yield(string.sub(Chunk[First],GetFrom),false))
end
First,Second = Second,First
SearchFrom,GetFrom = 1,1
end
end
else
-- no delimiter found and no further input
break
end
end
end
if string.len(Chunk[First])>=GetFrom then
-- return rest of first chunk
coroutine.yield(string.sub(Chunk[First],GetFrom),false)
end
end
end)
-- return (thread); a coroutine
end
RecentChanges · preferences
edit · history · current revision
Edited July 26, 2004 12:59 am GMT (diff)