[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Re: Lua vs. XML
- From: Richard Hundt <richardhundt@...>
- Date: Mon, 01 Feb 2010 12:43:12 +0100
Alexander Gladysh wrote:
Hi, list!
I need to load some trivial XMLs into Lua data at run-time. XML-s are
trivial, but do use UTF-8, CDATA and fancy entities, so library must
handle them well.
I wanted to use some luarocks-installable module, but failed to find
one that would install on my Ubuntu box.
Both luaexpat (which also looks too heavy for my needs) and
lua-xmlreader (which looks stalled at version 0.1) rocks fail to build
(I may provide bug reports if anyone interested).
All other Lua XML bindings I know about are not listed in the
available rocks list. (And I'd like to avoid to try them one after
another to find they just don't work for me...)
Is the situation with XML handling in Lua really that dire as I
imagine it, or am I just having a bad day? What do you use to handle
XML in Lua?
By the way, in my opinion, lua-users Wiki page on XML really sucks. :(
http://lua-users.org/wiki/LuaXml
(No, I'm not volunteering to fix it — I do not have enough data.)
Alexander.
I've found an old XML lpeg grammar I had lying around. It's a bit raw in
that it doesn't do the whole DOM API thing and it's not wrapped up in a
module, but it's tweakable, so in case you can't find anything else...
here it is.
-Rich
require "lpeg"
-- Edit these "classes" to build a DOM parser if you want
Node = { }
Node.__index = Node
Document = { }
Document.__index = Document
Element = { }
Element.__index = Element
-- alternatively, make these callbacks do what you need
local function create_document(xdoc)
return setmetatable(xdoc, Document)
end
local function create_doctype_node(node)
node.name = "#doctype-declaration"
return setmetatable(node, Node)
end
local function create_element_node(node)
return setmetatable(node, Element)
end
local function create_cdata_node(node)
node.name = "#cdata"
return setmetatable(node, Node)
end
local function create_text_node(node)
node.name = "#text"
return setmetatable(node, Node)
end
local function create_comment_node(node)
node.name = "#comment"
return setmetatable(node, Node)
end
local function create_procins_node(node)
node.name = "#processing-instruction"
return setmetatable(node, Node)
end
-- below is the LPeg grammar, tweak as needed, otherwise it
-- calls the callback functions above as it collects nodes
local m = lpeg
local function parse_error(s, i)
local msg = (#s < i + 20) and s:sub(i) or s:sub(i,i+20) .. "..."
msg = ("parse error near '%s'"):format(msg)
error(msg, 2)
end
local function backref(s, i, c)
if type(c) ~= "string" then
return nil
end
local e = #c + i
if s:sub(i, e - 1) == c then
return e
else
return nil
end
end
local function Cbr(name)
return m.Cmt(m.Cb(name), backref)
end
local function fold_attributes(list)
local atts = { }
for i=1,#list,2 do
atts[list[i]] = list[i+1]
end
return atts
end
XML = m.P{ "xml";
xml = m.V"document" / create_document + m.V"eos";
document = m.Ct(
m.V"doctype"^-1 * m.V"markup"
) * (m.V"eos" + parse_error);
doctype = m.Ct(
"<!DOCTYPE" * m.Cg((1 - m.P">")^1, "data") *
m.Cg(m.Cc"DOCTYPE_DECLARATION", "type") * ">"
) / create_doctype_node;
markup = (
m.V"element" / create_element_node
+ m.V"cdata" / create_cdata_node
+ m.V"text" / create_text_node
+ m.V"comment" / create_comment_node
+ m.V"procins" / create_procins_node
)^0;
element = m.Ct(
m.V"tagOpen" * m.V"markup" *
(#m.V"tagClose" + parse_error) *
m.V"tagClose" + m.V"tagOpenEmpty"
);
tagOpen = "<" * (
m.V"tagName" * m.V"attrs" * m.V"ws"^0 * ">"
+ m.V"ws"^0 * ">"
);
tagOpenEmpty = "<" * (
m.V"tagName" * m.V"attrs" * m.V"ws"^0 * "/>"
+ m.V"ws"^0 * "/>"
);
tagClose = "</" * Cbr("name") * m.V"ws"^0 * ">";
tagName = m.Cg(m.V"ident", "name") *m.Cg(m.Cc"ELEMENT_NODE", "type");
text = m.Ct(
m.Cg((1 - m.S"><")^1, "data") * m.Cg(m.Cc"TEXT_NODE", "type")
);
cdata = m.Ct(
"<![CDATA[" * m.Cg((1 - m.P"]]>")^0, "data") * "]]>" *
m.Cg(m.Cc"CDATA_SECTION_NODE", "type")
);
comment = m.Ct(
"<!--" * m.Cg((1 - m.P"-->")^0, "data") * "-->" *
m.Cg(m.Cc"COMMENT_NODE", "type")
);
procins = m.Ct(
"<?" * m.Cg((1 - m.P"?>")^0, "data") * "?>" *
m.Cg(m.Cc"PROCESSING_INSTRUCTION_NODE", "type")
);
attrs = m.Cg(
m.Ct(m.V"attr"^0) / fold_attributes, "attributes"
);
attr = m.V"ws"^1 * m.C(m.V"ident") * m.P"=" * m.V"str";
str = ('"' * m.C((1 - m.P'"')^0) * '"')
+ ("'" * m.C((1 - m.P"'")^0) * "'");
ident = (m.R("az", "AZ", "09") + m.S"_.:-")^1;
ws = m.S" \t\r\n";
eos = -m.P(1);
}
function test()
local xmlstr = [=[
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="895" name="ku-session-keeper" id="ku-session-keeper" />
<script src="/kudu/jslib/ku/core.js" type="text/javascript" />
<link rel="stylesheet" href="/kudu/css/default.css" />
</head>
<body>
<!--this is a comment baby-->
<div ku:replace="header" />
<div ku:include="content" />
<form ku:action="doit">...</form>
<script type="text/javascript" id="my-script"><![CDATA[
if ( foo > 42 ) {
alert("cheese");
}
]]>
</script>
<div foo="bar" baz="quux">This is My Div</div>
</body>
</html>
]=]
local xmldoc = XML:match(xmlstr)
local html = xmldoc[3]
assert(html.attributes.xmlns == "http://www.w3.org/1999/xhtml")
local body
for i,node in ipairs(html) do
if node.name == "body" then
body = node
break
end
end
assert(body[2].data == "this is a comment baby")
local script
for i,node in ipairs(body) do
if node.name == "script" then
script = node
break
end
end
assert(script[1].data)
assert(script[1].type == "CDATA_SECTION_NODE")
end
-- uncomment to run the tests
--test()
-- this code is free software and may be used and/or distributed under
-- the same terms as Lua itself