|
#!/usr/bin/lua PayloadFile = arg[1] SVMOutput = arg[2] NNGrams = arg[3] HamFlag = arg[4] -- Use ASCII characters 32-126 ASCII=' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~' GramTable = {}; GramCounter = {}; GramID = {}; GramIDR = {}; GramResults = {}; Counter = 1 -- Ensure that we received the expected parameters -- ToDo add input validation if (PayloadFile == nil or SVMOutput == nil or NNGrams == nil or HamFlag == nil) then print("Usage: NGramsCreator.lua <Payload File> <SVMOutput File> <Number of NGrams> <Ham Flag>") return end -- Open output file --io.output(io.open(SVMOutput,"a")); function GenerateNGrams(Symbols, Length, Part) -- Recursively generate the grams based on the given length (NNGrams) Part = Part or "" if Length == 0 then --print(Part); GramID[Part] = Counter; --print(GramID[Part],Part) GramIDR[Counter] = Part; GramCounter[Part] = 0; Counter = Counter + 1 return end for Counter = 1, #Symbols do GenerateNGrams(Symbols, Length - 1, Part .. Symbols:sub(Counter,Counter)) end end function CreateSVMData(Payload, SVMOutput, HamFlag) GenerateNGrams(ASCII, NNGrams) -- for j, k in ipairs(GramTable) do print(j,k) end PayloadLength = #Payload - 1 for PayloadCounter = 1, PayloadLength do PayloadEnd = PayloadCounter + NNGrams -1 Gram = Payload:sub(PayloadCounter, PayloadEnd) -- Check if we've reached the end if #Gram ~= tonumber(NNGrams) then break end --print("[" .. Gram .. "]",GramCounter[Gram]) if GramCounter[Gram] == nil then GramCounter[Gram] = 0 end GramCounter[Gram] = GramCounter[Gram] + 1 end return end function PrintResults() io.output(io.open(SVMOutput,"a")); -- Show final result for i,v in pairs(GramCounter) do if v ~= nil then --print(GramID[i],i,v) table.insert(GramResults,GramID[i]) end end table.sort(GramResults) io.write(HamFlag, " ") for _,a in pairs(GramResults) do --print(a .. ":" .. GramCounter[GramIDR[a]]) io.write(a,":",GramCounter[GramIDR[a]]," ") end io.write("\n") io.close() return end function main() for line in io.lines(PayloadFile) do --print(line) CreateSVMData(line, SVMOutput, HamFlag) end PrintResults() end main()