[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Re: Lua Performance, IO / Regexp
- From: Bradley Smith <gmane@...>
- Date: Thu, 25 Oct 2007 01:01:07 -0700
-- Based on
http://dalkescientific.com/writings/diary/archive/2007/10/07/wide_finder.html#dalke-wf-11
local PATTERN = "GET /ongoing/When/%d%d%dx/(%d%d%d%d/%d%d/%d%d/[^ .]+) "
local count = {}
setmetatable(count, {__index = function(t, k) return 0 end})
local hits = {}
local match = string.match
local find = string.find
local sub = string.sub
local CHUNKSIZE = 25*1024 -- tunable parameter
-- First pass: read file and count all approximate matches
local file = io.open(arg[1], "rb")
while true do
local text = file:read(CHUNKSIZE)
if not text then
break
end
text = text .. (file:read("*l") or "")
local j = 0
local i = find(text, 'GET /ongoing/When/', j)
while i do
j = find(text, ' ', i+19)
local get = sub(text, i, j)
count[get] = count[get] + 1
i = find(text, 'GET /ongoing/When/', j)
end
end
-- Second pass: filter approximates for proper matches
for k, v in pairs(count) do
local m = match(k, PATTERN)
if m then
hits[m] = v
end
end
local hits_index = {}
for key in pairs(hits) do
hits_index[#hits_index+1] = key
end
table.sort(hits_index, function(x,y) return hits[x] > hits[y] end)
for i, key in ipairs(hits_index) do
print(i .. ". " .. key .. " : " .. hits[key])
if i == 10 then
break
end
end