lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Alexander Gladysh wrote:
Hi, list!

I need to load some trivial XMLs into Lua data at run-time. XML-s are
trivial, but do use UTF-8, CDATA and fancy entities, so library must
handle them well.

I wanted to use some luarocks-installable module, but failed to find
one that would install on my Ubuntu box.

Both luaexpat (which also looks too heavy for my needs) and
lua-xmlreader (which looks stalled at version 0.1) rocks fail to build
(I may provide bug reports if anyone interested).

All other Lua XML bindings I know about are not listed in the
available rocks list. (And I'd like to avoid to try them one after
another to find they just don't work for me...)

Is the situation with XML handling in Lua really that dire as I
imagine it, or am I just having a bad day? What do you use to handle
XML in Lua?

By the way, in my opinion, lua-users Wiki page on XML really sucks. :(

http://lua-users.org/wiki/LuaXml

(No, I'm not volunteering to fix it — I do not have enough data.)

Alexander.


I've found an old XML lpeg grammar I had lying around. It's a bit raw in that it doesn't do the whole DOM API thing and it's not wrapped up in a module, but it's tweakable, so in case you can't find anything else... here it is.

-Rich
require "lpeg"

-- Edit these "classes" to build a DOM parser if you want
Node = { }
Node.__index = Node

Document = { }
Document.__index = Document

Element = { }
Element.__index = Element

-- alternatively, make these callbacks do what you need
local function create_document(xdoc)
   return setmetatable(xdoc, Document)
end
local function create_doctype_node(node)
   node.name = "#doctype-declaration"
   return setmetatable(node, Node)
end
local function create_element_node(node)
   return setmetatable(node, Element)
end
local function create_cdata_node(node)
   node.name = "#cdata"
   return setmetatable(node, Node)
end
local function create_text_node(node)
   node.name = "#text"
   return setmetatable(node, Node)
end
local function create_comment_node(node)
   node.name = "#comment"
   return setmetatable(node, Node)
end
local function create_procins_node(node)
   node.name = "#processing-instruction"
   return setmetatable(node, Node)
end

-- below is the LPeg grammar, tweak as needed, otherwise it
-- calls the callback functions above as it collects nodes
local m = lpeg

local function parse_error(s, i)
   local msg = (#s < i + 20) and s:sub(i) or s:sub(i,i+20) .. "..."
   msg = ("parse error near '%s'"):format(msg)
   error(msg, 2)
end

local function backref(s, i, c)
   if type(c) ~= "string" then
      return nil
   end
   local e = #c + i
   if s:sub(i, e - 1) == c then
      return e
   else
      return nil
   end
end

local function Cbr(name)
   return m.Cmt(m.Cb(name), backref)
end

local function fold_attributes(list)
   local atts = { }
   for i=1,#list,2 do
      atts[list[i]] = list[i+1]
   end
   return atts
end


XML = m.P{ "xml";
   xml = m.V"document" / create_document + m.V"eos";

   document = m.Ct(
      m.V"doctype"^-1 * m.V"markup"
   ) * (m.V"eos" + parse_error);

   doctype = m.Ct(
      "<!DOCTYPE" * m.Cg((1 - m.P">")^1, "data") *
      m.Cg(m.Cc"DOCTYPE_DECLARATION", "type") * ">"
   ) / create_doctype_node;

   markup = (
      m.V"element"   / create_element_node
      + m.V"cdata"   / create_cdata_node
      + m.V"text"    / create_text_node
      + m.V"comment" / create_comment_node
      + m.V"procins" / create_procins_node
   )^0;

   element = m.Ct(
      m.V"tagOpen" * m.V"markup" *
      (#m.V"tagClose" + parse_error) *
      m.V"tagClose" + m.V"tagOpenEmpty"
   );

   tagOpen = "<" * (
      m.V"tagName" * m.V"attrs" * m.V"ws"^0 * ">"
      + m.V"ws"^0 * ">"
   );
   tagOpenEmpty = "<" * (
      m.V"tagName" * m.V"attrs" * m.V"ws"^0 * "/>"
      + m.V"ws"^0 * "/>"
   );
   tagClose = "</" * Cbr("name") * m.V"ws"^0 * ">";
   tagName = m.Cg(m.V"ident", "name") *m.Cg(m.Cc"ELEMENT_NODE", "type");
   text = m.Ct(
      m.Cg((1 - m.S"><")^1, "data") * m.Cg(m.Cc"TEXT_NODE", "type")
   );
   cdata = m.Ct(
      "<![CDATA[" * m.Cg((1 - m.P"]]>")^0, "data") * "]]>" *
      m.Cg(m.Cc"CDATA_SECTION_NODE", "type")
   );
   comment = m.Ct(
      "<!--" * m.Cg((1 - m.P"-->")^0, "data") * "-->" *
      m.Cg(m.Cc"COMMENT_NODE", "type")
   );
   procins = m.Ct(
      "<?" * m.Cg((1 - m.P"?>")^0, "data") * "?>" *
      m.Cg(m.Cc"PROCESSING_INSTRUCTION_NODE", "type")
   );
   attrs = m.Cg(
      m.Ct(m.V"attr"^0) / fold_attributes, "attributes"
   );
   attr = m.V"ws"^1 * m.C(m.V"ident") * m.P"=" * m.V"str";
   str = ('"' * m.C((1 - m.P'"')^0) * '"')
      + ("'" * m.C((1 - m.P"'")^0) * "'");
   ident = (m.R("az", "AZ", "09") + m.S"_.:-")^1;
   ws = m.S" \t\r\n";
   eos = -m.P(1);
}

function test()
   local xmlstr = [=[
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>

<html xmlns="http://www.w3.org/1999/xhtml";>
<head>
    <meta content="895" name="ku-session-keeper" id="ku-session-keeper" />
    <script src="/kudu/jslib/ku/core.js" type="text/javascript" />
    <link rel="stylesheet" href="/kudu/css/default.css" />
</head>
<body>
    <!--this is a comment baby-->
    <div ku:replace="header" />
    <div ku:include="content" />
    <form ku:action="doit">...</form>
    <script type="text/javascript" id="my-script"><![CDATA[
    if ( foo > 42 ) {
        alert("cheese");
    }
    ]]>
    </script>
    <div foo="bar" baz="quux">This is My Div</div>
</body>
</html>
]=]

   local xmldoc = XML:match(xmlstr)
   local html = xmldoc[3]
   assert(html.attributes.xmlns == "http://www.w3.org/1999/xhtml";)
   local body
   for i,node in ipairs(html) do
      if node.name == "body" then
         body = node
         break
      end
   end
   assert(body[2].data == "this is a comment baby")
   local script
   for i,node in ipairs(body) do
      if node.name == "script" then
         script = node
         break
      end
   end
   assert(script[1].data)
   assert(script[1].type == "CDATA_SECTION_NODE")
end

-- uncomment to run the tests
--test()

-- this code is free software and may be used and/or distributed under
-- the same terms as Lua itself