[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Re: xml pull parser
- From: Rici Lake <lua@...>
- Date: Mon, 21 Mar 2005 08:40:18 -0500
On 21-Mar-05, at 7:54 AM, PA wrote:
Alternatively, somebody, somewhere, somehow must have written this a
dozen time already. Is there not a little code sample somewhere to
show how to decode an XML string in Lua? Sigh...
Probably. Although the issues are subtle. I don't address any of them
here; this is simply a working reimplementation of the same
transformation.
do
local ents = {
lt = '<',
gt = '>',
amp = '&',
quot = '"',
apos = "'"
}
local maxutf8 = tonumber('10FFFF', 16)
local function entity2char(hash, str)
if hash == '#' then
-- turn hex into c-style hex
local utfcode = tonumber((string.gsub(str, '^x', '0x')))
if utfcode and utfcode < 256 then
return string.char(utfcode)
end
elseif ents[str] then
return ents[str]
end
return '&'..hash..str..';'
end
function decode(str)
return str and string.gsub(str, '&(#?)(%w+);', entity2char)
end
end
--- some tests
=decode '&apos; is how you write ''
=decode '&amp; is an ampersand.'
=decode '"I said 'Stop right there!' & I
<strong>meant it!</strong>" the webmaster shouted,
htmlifying instinctively'
-- Codes and noncodes
=decode 'Some invalid numeric escapes include b2;, $g;'
=decode "Take out the &garbage;! Don't leave it for mañana! The
sooner the β!"
-- What was I saying about iso-8859-1?
=decode 'mañana or mañana?'
-->
> =decode '&apos; is how you write ''
' is how you write '
> =decode '&amp; is an ampersand.'
& is an ampersand.
> =decode '"I said 'Stop right there!' & I
<strong>meant it!</strong>" the webmaster shouted,
htmlifying instinctively'
"I said 'Stop right there!' & I <strong>meant it!</strong>" the
webmaster shouted, htmlifying instinctively
> -- Codes and noncodes
> =decode 'Some invalid numeric escapes include b2;, $g;'
Some invalid numeric escapes include b2;, $g;
> =decode "Take out the &garbage;! Don't leave it for mañana! The
sooner the β!"
Take out the &garbage;! Don't leave it for ma?ana! The sooner the
β!
> -- What was I saying about iso-8859-1?
> =decode 'mañana or mañana?'
ma?ana or mañana?