#!/usr/bin/lua
PayloadFile = arg[1]
SVMOutput = arg[2]
NNGrams = arg[3]
HamFlag = arg[4]
-- Use ASCII characters 32-126
ASCII=' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'
GramTable = {};
GramCounter = {};
GramID = {};
GramIDR = {};
GramResults = {};
Counter = 1
-- Ensure that we received the expected parameters
-- ToDo add input validation
if (PayloadFile == nil or SVMOutput == nil or NNGrams == nil or HamFlag == nil) then
print("Usage: NGramsCreator.lua <Payload File> <SVMOutput File> <Number of NGrams> <Ham Flag>")
return
end
-- Open output file
--io.output(io.open(SVMOutput,"a"));
function GenerateNGrams(Symbols, Length, Part)
-- Recursively generate the grams based on the given length (NNGrams)
Part = Part or ""
if Length == 0 then
--print(Part);
GramID[Part] = Counter;
--print(GramID[Part],Part)
GramIDR[Counter] = Part;
GramCounter[Part] = 0;
Counter = Counter + 1
return
end
for Counter = 1, #Symbols do
GenerateNGrams(Symbols, Length - 1, Part .. Symbols:sub(Counter,Counter))
end
end
function CreateSVMData(Payload, SVMOutput, HamFlag)
GenerateNGrams(ASCII, NNGrams)
-- for j, k in ipairs(GramTable) do print(j,k) end
PayloadLength = #Payload - 1
for PayloadCounter = 1, PayloadLength do
PayloadEnd = PayloadCounter + NNGrams -1
Gram = Payload:sub(PayloadCounter, PayloadEnd)
-- Check if we've reached the end
if #Gram ~= tonumber(NNGrams) then
break
end
--print("[" .. Gram .. "]",GramCounter[Gram])
if GramCounter[Gram] == nil then
GramCounter[Gram] = 0
end
GramCounter[Gram] = GramCounter[Gram] + 1
end
return
end
function PrintResults()
io.output(io.open(SVMOutput,"a"));
-- Show final result
for i,v in pairs(GramCounter) do
if v ~= nil then
--print(GramID[i],i,v)
table.insert(GramResults,GramID[i])
end
end
table.sort(GramResults)
io.write(HamFlag, " ")
for _,a in pairs(GramResults) do
--print(a .. ":" .. GramCounter[GramIDR[a]])
io.write(a,":",GramCounter[GramIDR[a]]," ")
end
io.write("\n")
io.close()
return
end
function main()
for line in io.lines(PayloadFile) do
--print(line)
CreateSVMData(line, SVMOutput, HamFlag)
end
PrintResults()
end
main()