lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


On Sat, Aug 28, 2010 at 3:43 PM, David Kolf <kolf@gmx.de> wrote:
> Today I learned how to use the LPeg module and in my local test version
> of dkjson I already added optional support for it. The speed up for
> decoding is significant, so I will probably release a version 2.0 soon.

i haven't used it in a while; but while learning to use the 're' layer
on top of LPeg I got this JSON parser:



local re = require ("re")

local builtin = {
	['true'] = true,
	['false'] = false,
	['null'] = nil,
}

local function pair2table (t, k, v, ...)
	if not k or not v then return t end
	t[k] = v
	return pair2table (t, ...)
end

local function n2utf8 (n)
	local char,floor = string.char, math.floor
	if n < 0x80 then
		return char (n)
	elseif n < 0x800 then
		return char (0xC0+floor (n/0x40),
					0x80+n%0x40)
	elseif n <= 0x10000 then
		return char (0xE0+floor (n/0x1000),
					0x80+floor ((n%0x1000)/0x40),
					0x80+n%0x40)
	elseif n <= 0x110000 then
		return char (0xF0+floor (n/0x40000),
					0x80+floor ((n%0x40000)/0x1000),
					0x80+floor ((n%0x1000)/0x40),
					0x80+n%0x40)
	end
end

json = re.compile ([[
	value	<- (%s* ( <object>
					/ <array>
					/ <builtin>
					/ <string>
					/ <number> ) %s* )
	object	<- ('{' (<pair> (',' <pair>)*)? %s* '}')	-> gotObj
	pair	<- (<string> ':' <value>)				--	-> gotPair
	array	<- ('[' (<value> (',' <value>)*)?  %s*']')	-> {}
	builtin	<- ('true' / 'false' / 'null')				-> gotBuiltin
	string	<- (%s* '"' {~ ( '\"'
						/ '\\'
						/ '\/'
						/ '\b'
						/ '\f'
						/ '\n'
						/ '\r'
						/ '\t'
						/ <unicode>
						/ [^\"] )* ~} '"' %s*)			-> gotString
	unicode	<- '\u'->'' [0-9a-zA-Z]^4 -> hex2utf8
	number	<- ( '-'? %d+ ('.' %d+)? ([eE][-+]? %d+)? )	-> gotNumber
]],{
	gotObj = function (...) return pair2table ({}, ...) end,
	gotBuiltin = function (tk) return builtin[tk] end,
	gotString = function (s) return tostring(s) end,
	gotNumber = function (n) return tonumber(n) end,
	hex2utf8 = function (h) return n2utf8(tonumber(h, 16)) end,
})



as you can see, both LPEG's power and JSON's simplicity bring the task
well within the realm of "left as an exercise to the reader"



-- 
Javier