lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


-- Based on http://dalkescientific.com/writings/diary/archive/2007/10/07/wide_finder.html#dalke-wf-11

local PATTERN = "GET /ongoing/When/%d%d%dx/(%d%d%d%d/%d%d/%d%d/[^ .]+) "

local count = {}
setmetatable(count, {__index = function(t, k) return 0 end})
local hits = {}
local match = string.match
local find = string.find
local sub = string.sub
local CHUNKSIZE = 25*1024 -- tunable parameter

-- First pass: read file and count all approximate matches
local file = io.open(arg[1], "rb")
while true do
  local text = file:read(CHUNKSIZE)
  if not text then
    break
  end
  text = text .. (file:read("*l") or "")
  local j = 0
  local i = find(text, 'GET /ongoing/When/', j)
  while i do
    j = find(text, ' ', i+19)
    local get = sub(text, i, j)
    count[get] = count[get] + 1
    i = find(text, 'GET /ongoing/When/', j)
  end
end

-- Second pass: filter approximates for proper matches
for k, v in pairs(count) do
  local m = match(k, PATTERN)
  if m then
    hits[m] = v
  end
end

local hits_index = {}
for key in pairs(hits) do
  hits_index[#hits_index+1] = key
end
table.sort(hits_index, function(x,y) return hits[x] > hits[y] end)

for i, key in ipairs(hits_index) do
  print(i .. ". " .. key .. " : " .. hits[key])
  if i == 10 then
    break
  end
end