lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


On Wed, Feb 01, 2012 at 03:51:41PM -0200, Roberto Ierusalimschy wrote:
> > [Hmm, that brings to mind another question:  How much of the input
> > string does it have to accumulate in memory?  Can it start discarding
> > earlier portions of it at some point?  If not, it wouldn't be so useful
> > for the use I have in mind: parsing really big files without needing to
> > have them entirely in memory.]
> 
> That is an important point. Also, do you have benchmarks comparing your
> patch to standard LPeg?
> 

Attached is my LPeg JSON library used for testing. The numbers below are
from the timetrials.lua utility included with JSON4Lua, tweaked for Lua 5.2,
to take the module name as an argument, and to increase the outer loop
counts from 100 to 500. The LPeg versions were compiled identically.

I get very roughly the same runtime, though admittedly slower. I'll abstain
from making any wild guesses on the cause as I'll invariably be wrong, I'm
sure.

% for M in json json-10-2; do for I in 1 2 3; do \
	time ./timetrials.lua ${M} >/dev/null; \
done; done
lpeg 0.10 (yieldable)
./timetrials.lua ${M} > /dev/null  3.53s user 0.01s system 99% cpu 3.564
total
lpeg 0.10 (yieldable)
./timetrials.lua ${M} > /dev/null  3.53s user 0.01s system 99% cpu 3.557
total
lpeg 0.10 (yieldable)
./timetrials.lua ${M} > /dev/null  3.53s user 0.01s system 99% cpu 3.551
total
lpeg 0.10
./timetrials.lua ${M} > /dev/null  3.45s user 0.01s system 99% cpu 3.495
total
lpeg 0.10
./timetrials.lua ${M} > /dev/null  3.45s user 0.01s system 99% cpu 3.480
total
lpeg 0.10
./timetrials.lua ${M} > /dev/null  3.45s user 0.01s system 99% cpu 3.486
total
--
-- JSON Parser
--
local lpeg = require("lpeg")

local function utf8(cap)
	local code = tonumber(cap, 16)
	local c1, c2, c3

	if bit32.band(code, 0xF800) ~= 0 then
		c1 = bit32.bor(0xE0, bit32.band(0x0F, bit32.rshift(code, 12)))
		c2 = bit32.bor(0x80, bit32.band(0x3F, bit32.rshift(code, 6)))
		c3 = bit32.bor(0x80, bit32.band(0x3F, code))

		return string.char(c1, c2, c3)
	elseif bit32.band(code, 0x0780) ~= 0 then
		c1 = bit32.bor(0xC0, bit32.band(0x1F, bit32.rshift(code, 6)))
		c2 = bit32.bor(0x80, bit32.band(0x3F, code))

		return string.char(c1, c2)
	else
		return string.char(code)
	end
end -- utf8

local xdigit  = lpeg.R("09", "AF", "af")
local unicode = lpeg.P"u" * ((xdigit * xdigit * xdigit * xdigit) / utf8)
local escaped = (lpeg.P'"' * lpeg.Cc('"'))
              + (lpeg.P"\\" * lpeg.Cc("\\"))
              + (lpeg.P"/" * lpeg.Cc("/"))
              + (lpeg.P"b" * lpeg.Cc("\b"))
              + (lpeg.P"f" * lpeg.Cc("\f"))
              + (lpeg.P"n" * lpeg.Cc("\n"))
              + (lpeg.P"r" * lpeg.Cc("\r"))
              + (lpeg.P"t" * lpeg.Cc("\t"))
local encoded = lpeg.P"\\" * (unicode + escaped)
local regular = lpeg.C((-lpeg.S'\\"' * lpeg.P(1)))
local string  = lpeg.Ct(lpeg.P'"' * (regular + encoded)^0 * lpeg.P'"') / function (t) return table.concat(t) end
local number  = ((lpeg.S"+-"^-1 * lpeg.R"09"^1 * lpeg.P(lpeg.P"." * lpeg.R"09"^1)^-1 * lpeg.P(lpeg.S"eE" * lpeg.S"+-"^-1 * lpeg.R"09"^1)^-1) / function (cap) return tonumber(cap) end)
local boolean = ((lpeg.P"true" * lpeg.Cc(true)) + (lpeg.P"false" * lpeg.Cc(false)))
local null    = lpeg.P"null" * lpeg.Cc(nil)
local simple  = string + number + boolean + null
local space   = lpeg.S(" \t\r\n")^0

local t2o = function(caps) 
	local t = {}
	for i = 0, #caps/2 - 1 do
		t[caps[i * 2 + 1]] = caps[i * 2 + 2]
	end
	return t
end

local Key, Value, Object, Array, Simple = lpeg.V"Key", lpeg.V"Value", lpeg.V"Object", lpeg.V"Array", lpeg.V"Simple"

local Grammar = lpeg.P{ "Value",
	Key = space * string * space,
	Value = space * (Object + Array + Simple) * space,
	Array = lpeg.Ct("[" * (lpeg.P","^-1 * Value)^0 * "]"),
	Object = lpeg.Ct("{" * (lpeg.P","^-1 * Key * lpeg.P":" * Value)^0 * "}") / t2o,
	Simple = simple,
}


local function decode(string)
	return lpeg.match(Grammar, string)
end


--
-- JSON Composer
--
local string = _G.string


local ugly = (lpeg.P'"' * lpeg.Cc('\\"'))
           + (lpeg.P'\\' * lpeg.Cc('\\\\'))
           + (lpeg.P'\b' * lpeg.Cc('\\b'))
           + (lpeg.P'\f' * lpeg.Cc('\\f'))
           + (lpeg.P'\n' * lpeg.Cc('\\n'))
           + (lpeg.P'\r' * lpeg.Cc('\\r'))
           + (lpeg.P'\t' * lpeg.Cc('\\t'))
local safe = lpeg.C((-ugly * lpeg.P(1))^1)

-- Match a prefix of safe characters, followed by end-of-string or a series
-- of ugly + safe matches. Tries to optimize for the case where no escaping
-- is necessary.
local both = (safe + lpeg.Cc("")) * (-lpeg.P(1) + lpeg.Ct((ugly + safe)^0))

local function quote(val)
	local one, rem = lpeg.match(both, val)

	if not rem then
		return '"' .. one .. '"'
	else
		return '"' .. one .. table.concat(rem) .. '"'
	end
end


local function isarray(t)
	local k, lk

	for k in function() lk = next(t, lk); return lk end do
		if type(k) ~= "number" then
			return false
		end
	end

	return true
end


local function pretty(io, val, depth)
	if type(val) == "string" then
		io:write(quote(val))
	elseif type(val) == "number" then
		io:write(val)
	elseif type(val) == "boolean" then
		if val then
			io:write("true")
		else
			io:write("false")
		end
	elseif type(val) == "table" then
		if isarray(val) then
			io:write("[\n")

			for i = 1, #val do
				io:write(string.rep("\t", depth + 1))
				pretty(io, val[i], depth + 1)
				io:write(",\n")
			end

			io:write(string.rep("\t", depth), "]")
		else
			io:write("{\n")

			for k, v in pairs(val) do
				io:write(string.rep("\t", depth + 1))
				pretty(io, k, depth + 1)
				io:write(" : ")
				pretty(io, v, depth + 1)
				io:write(",\n")
			end

			io:write(string.rep("\t", depth), "}")
		end
	else
		io:write("null")
	end

	if depth == 0 then
		io:write("\n")
	end
end -- pretty()


local function compact(io, val)
	if type(val) == "string" then
		io:write(quote(val))
	elseif type(val) == "number" then
		io:write(val)
	elseif type(val) == "boolean" then
		if val then
			io:write("true")
		else
			io:write("false")
		end
	elseif type(val) == "table" then
		if isarray(val) then
			io:write("[")

			for i = 1, #val do
				compact(io, val[i])
				io:write(",")
			end

			io:write("]")
		else
			io:write("{")

			for k, v in pairs(val) do
				compact(io, k)
				io:write(":")
				compact(io, v)
				io:write(",")
			end

			io:write("}")
		end
	else
		io:write("null")
	end
end -- compact()


local buffer = {
	write = function(self, data, ...)
		if data then
			table.insert(self, data)
			self:write(...)
		end
	end
}

local function encode(t, _pretty)
	local buffer = setmetatable({}, { __index = buffer })
	
	if _pretty then
		pretty(buffer, t, 0)
	else
		compact(buffer, t, 0)
	end

	return table.concat(buffer)
end

return { decode = decode, encode = encode }