[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Re: Yieldable LPEG Parser
- From: William Ahern <william@...>
- Date: Wed, 1 Feb 2012 16:27:16 -0800
On Wed, Feb 01, 2012 at 03:51:41PM -0200, Roberto Ierusalimschy wrote:
> > [Hmm, that brings to mind another question: How much of the input
> > string does it have to accumulate in memory? Can it start discarding
> > earlier portions of it at some point? If not, it wouldn't be so useful
> > for the use I have in mind: parsing really big files without needing to
> > have them entirely in memory.]
>
> That is an important point. Also, do you have benchmarks comparing your
> patch to standard LPeg?
>
Attached is my LPeg JSON library used for testing. The numbers below are
from the timetrials.lua utility included with JSON4Lua, tweaked for Lua 5.2,
to take the module name as an argument, and to increase the outer loop
counts from 100 to 500. The LPeg versions were compiled identically.
I get very roughly the same runtime, though admittedly slower. I'll abstain
from making any wild guesses on the cause as I'll invariably be wrong, I'm
sure.
% for M in json json-10-2; do for I in 1 2 3; do \
time ./timetrials.lua ${M} >/dev/null; \
done; done
lpeg 0.10 (yieldable)
./timetrials.lua ${M} > /dev/null 3.53s user 0.01s system 99% cpu 3.564
total
lpeg 0.10 (yieldable)
./timetrials.lua ${M} > /dev/null 3.53s user 0.01s system 99% cpu 3.557
total
lpeg 0.10 (yieldable)
./timetrials.lua ${M} > /dev/null 3.53s user 0.01s system 99% cpu 3.551
total
lpeg 0.10
./timetrials.lua ${M} > /dev/null 3.45s user 0.01s system 99% cpu 3.495
total
lpeg 0.10
./timetrials.lua ${M} > /dev/null 3.45s user 0.01s system 99% cpu 3.480
total
lpeg 0.10
./timetrials.lua ${M} > /dev/null 3.45s user 0.01s system 99% cpu 3.486
total
--
-- JSON Parser
--
local lpeg = require("lpeg")
local function utf8(cap)
local code = tonumber(cap, 16)
local c1, c2, c3
if bit32.band(code, 0xF800) ~= 0 then
c1 = bit32.bor(0xE0, bit32.band(0x0F, bit32.rshift(code, 12)))
c2 = bit32.bor(0x80, bit32.band(0x3F, bit32.rshift(code, 6)))
c3 = bit32.bor(0x80, bit32.band(0x3F, code))
return string.char(c1, c2, c3)
elseif bit32.band(code, 0x0780) ~= 0 then
c1 = bit32.bor(0xC0, bit32.band(0x1F, bit32.rshift(code, 6)))
c2 = bit32.bor(0x80, bit32.band(0x3F, code))
return string.char(c1, c2)
else
return string.char(code)
end
end -- utf8
local xdigit = lpeg.R("09", "AF", "af")
local unicode = lpeg.P"u" * ((xdigit * xdigit * xdigit * xdigit) / utf8)
local escaped = (lpeg.P'"' * lpeg.Cc('"'))
+ (lpeg.P"\\" * lpeg.Cc("\\"))
+ (lpeg.P"/" * lpeg.Cc("/"))
+ (lpeg.P"b" * lpeg.Cc("\b"))
+ (lpeg.P"f" * lpeg.Cc("\f"))
+ (lpeg.P"n" * lpeg.Cc("\n"))
+ (lpeg.P"r" * lpeg.Cc("\r"))
+ (lpeg.P"t" * lpeg.Cc("\t"))
local encoded = lpeg.P"\\" * (unicode + escaped)
local regular = lpeg.C((-lpeg.S'\\"' * lpeg.P(1)))
local string = lpeg.Ct(lpeg.P'"' * (regular + encoded)^0 * lpeg.P'"') / function (t) return table.concat(t) end
local number = ((lpeg.S"+-"^-1 * lpeg.R"09"^1 * lpeg.P(lpeg.P"." * lpeg.R"09"^1)^-1 * lpeg.P(lpeg.S"eE" * lpeg.S"+-"^-1 * lpeg.R"09"^1)^-1) / function (cap) return tonumber(cap) end)
local boolean = ((lpeg.P"true" * lpeg.Cc(true)) + (lpeg.P"false" * lpeg.Cc(false)))
local null = lpeg.P"null" * lpeg.Cc(nil)
local simple = string + number + boolean + null
local space = lpeg.S(" \t\r\n")^0
local t2o = function(caps)
local t = {}
for i = 0, #caps/2 - 1 do
t[caps[i * 2 + 1]] = caps[i * 2 + 2]
end
return t
end
local Key, Value, Object, Array, Simple = lpeg.V"Key", lpeg.V"Value", lpeg.V"Object", lpeg.V"Array", lpeg.V"Simple"
local Grammar = lpeg.P{ "Value",
Key = space * string * space,
Value = space * (Object + Array + Simple) * space,
Array = lpeg.Ct("[" * (lpeg.P","^-1 * Value)^0 * "]"),
Object = lpeg.Ct("{" * (lpeg.P","^-1 * Key * lpeg.P":" * Value)^0 * "}") / t2o,
Simple = simple,
}
local function decode(string)
return lpeg.match(Grammar, string)
end
--
-- JSON Composer
--
local string = _G.string
local ugly = (lpeg.P'"' * lpeg.Cc('\\"'))
+ (lpeg.P'\\' * lpeg.Cc('\\\\'))
+ (lpeg.P'\b' * lpeg.Cc('\\b'))
+ (lpeg.P'\f' * lpeg.Cc('\\f'))
+ (lpeg.P'\n' * lpeg.Cc('\\n'))
+ (lpeg.P'\r' * lpeg.Cc('\\r'))
+ (lpeg.P'\t' * lpeg.Cc('\\t'))
local safe = lpeg.C((-ugly * lpeg.P(1))^1)
-- Match a prefix of safe characters, followed by end-of-string or a series
-- of ugly + safe matches. Tries to optimize for the case where no escaping
-- is necessary.
local both = (safe + lpeg.Cc("")) * (-lpeg.P(1) + lpeg.Ct((ugly + safe)^0))
local function quote(val)
local one, rem = lpeg.match(both, val)
if not rem then
return '"' .. one .. '"'
else
return '"' .. one .. table.concat(rem) .. '"'
end
end
local function isarray(t)
local k, lk
for k in function() lk = next(t, lk); return lk end do
if type(k) ~= "number" then
return false
end
end
return true
end
local function pretty(io, val, depth)
if type(val) == "string" then
io:write(quote(val))
elseif type(val) == "number" then
io:write(val)
elseif type(val) == "boolean" then
if val then
io:write("true")
else
io:write("false")
end
elseif type(val) == "table" then
if isarray(val) then
io:write("[\n")
for i = 1, #val do
io:write(string.rep("\t", depth + 1))
pretty(io, val[i], depth + 1)
io:write(",\n")
end
io:write(string.rep("\t", depth), "]")
else
io:write("{\n")
for k, v in pairs(val) do
io:write(string.rep("\t", depth + 1))
pretty(io, k, depth + 1)
io:write(" : ")
pretty(io, v, depth + 1)
io:write(",\n")
end
io:write(string.rep("\t", depth), "}")
end
else
io:write("null")
end
if depth == 0 then
io:write("\n")
end
end -- pretty()
local function compact(io, val)
if type(val) == "string" then
io:write(quote(val))
elseif type(val) == "number" then
io:write(val)
elseif type(val) == "boolean" then
if val then
io:write("true")
else
io:write("false")
end
elseif type(val) == "table" then
if isarray(val) then
io:write("[")
for i = 1, #val do
compact(io, val[i])
io:write(",")
end
io:write("]")
else
io:write("{")
for k, v in pairs(val) do
compact(io, k)
io:write(":")
compact(io, v)
io:write(",")
end
io:write("}")
end
else
io:write("null")
end
end -- compact()
local buffer = {
write = function(self, data, ...)
if data then
table.insert(self, data)
self:write(...)
end
end
}
local function encode(t, _pretty)
local buffer = setmetatable({}, { __index = buffer })
if _pretty then
pretty(buffer, t, 0)
else
compact(buffer, t, 0)
end
return table.concat(buffer)
end
return { decode = decode, encode = encode }