Lua Project List To Xml |
|
WARNING: The program will read and interpret HTML. Future changes in the format of the list *WILL* break the parser.
#!/usr/bin/env lua local fname = "uses.html" os.execute("wget -q -O " .. fname .. " http://www.lua.org/uses.html") local fp = io.open(fname, "r") if fp == nil then print("Error opening file '" .. fname .. "'.") os.exit(1) end local s = fp:read("*a") fp:close() -- Remove optional spaces from the tags. s = string.gsub(s, "\n", " ") s = string.gsub(s, " *< *", " <") s = string.gsub(s, " *> *", "> ") s = string.gsub(s, "> *<", "><") -- Put all the tags in lowercase. s = string.gsub(s, "(<[^ >]+)", string.lower) -- Remove images, scripts, etc. s = string.gsub(s, "<img[^>]*>", "") s = string.gsub(s, "<script[^>]*>.-</script>", "") -- "Normalize" links for future use s = string.gsub(s, "(<a[^>]*HREF *=)", string.lower) s = string.gsub(s, "<a[^>]*href *= *", "<a href=") print("<?xml version=\"1.0\" encoding=\"iso-8859-1\" ?>") print("<luauses>") for tmp in string.gfind(s, "<h3>.-<hr>") do -- Current data format (without spaces and line-breaks): -- <h3> -- <a NAME="1" HREF="APPURL">APPNAM</a> -- <br><small><em>USER</em></small> -- </h3> -- DESCR [can have html here] -- <p> Contact: <a HREF="EMAIL">CONTACT</a> -- <hr> local i, f, app = string.find(tmp, "<h3>(.-)</h3>") if app then app = string.gsub(app, "</?em>", "") app = string.gsub(app, "<br>", "") i, f, appurl, appnam = string.find(app, "<a href=\"([^\"> ]*)\"[^>]*>([^<]*)<") if appurl == nil then i, f, appnam = string.find(app, "<a[^>]*>([^<]*)</a>") appurl = "" end end i, f, user = string.find(tmp, "<small>(.-)</small>") if user then user = string.gsub(user, "</?.->", "") user = string.gsub(user, "&", "&") else user = "" end i, f, desc = string.find(tmp, "</h3>(.-)<hr>") if desc then i, f, cont = string.find(desc, "<p> *Contact: *(.*)") if cont then desc = string.gsub(desc, "<p> *Contact:(.*)", "") cont = string.gsub(cont, "<p> *Contact: *", "") i, f, email, name = string.find(cont, "<a href=\"([^ \"]+)\"[^>]*>([^<]+)<") if name == nil then name = cont email = "" end if email then email = string.gsub(email, "mailto:/?/?", "") else email = "" end else name = "" email = "" end desc = string.gsub(desc, "&", "&") desc = string.gsub(desc, "<", "<") desc = string.gsub(desc, ">", ">") else desc = "" end print(" <use>") print(" <app>" .. appnam .. "</app>") print(" <url>" .. appurl .. "</url>") print(" <user>" .. user .. "</user>") print(" <desc>" .. desc .. "</desc>") print(" <contact>" .. name .. "</contact>") print(" <email>" .. email .. "</email>") print(" </use>") end print("</luauses>")