Lua Balanced

lua-users home
wiki

Difference (from prior major revision) (author diff)

Changed: 42c42
=== Tests/Examples ===
== Source ==

Changed: 44,475c44

-- luabalanced_test.lua
-- test for luabalanced.lua

local lb = require "luabalanced"
local tuple = require "tuple"

-- utility function for test suite.
local function asserteq(a, b)
if a ~= b then
error(tostring(a) .. ' == ' .. tostring(b) .. ' failed', 2)
end
end

-- utility function (wrap function: store return in tuple and protect)
local function wrap2(f)
return function(s, pos)
local res = tuple(pcall(function() return f(s, pos) end))
if not res[1] then
return 'error'
else
return tuple(unpack(res, 2, res.n))
end
end
end

--## match_bracketed tests

-- test wrapper for lb.match_bracketed
local mb = wrap2(lb.match_bracketed)

-- trivial tests
asserteq(mb'', tuple(nil, 1))
asserteq(mb'a', tuple(nil, 1))
asserteq(mb'{', 'error')
asserteq(mb'}', tuple(nil, 1))
asserteq(mb'{[}]', 'error')
asserteq(mb('[{}]'), tuple('[{}]', 5))

-- test with pos
asserteq(mb('[][a(a)a].', 3), tuple('[a(a)a]', 10))

-- test with strings
asserteq(mb('[ "]" ]'), tuple('[ "]" ]', 8))
asserteq(mb("[ '[' ]"), tuple("[ '[' ]", 8))
asserteq(mb("[ [=[ ]=] ]"), tuple("[ [=[ ]=] ]", 12))
asserteq(mb("[[ ] ]]"), tuple("[[ ] ]]", 8))
asserteq(mb("[=[ [ ]=]"), tuple("[=[ [ ]=]", 10))

--## match_expression tests

-- test wrapper for lb.match_expression
local me = wrap2(lb.match_expression)
asserteq(me'a', tuple('a', 2))
asserteq(me'a b=c', tuple('a ', 3))
asserteq(me'a and b', tuple('a and b', 8))
asserteq(me'a and b ', tuple('a and b ', 9))
asserteq(me'a and b c', tuple('a and b ', 9))
asserteq(me'a+b', tuple('a+b', 4))
asserteq(me'a+b b=c', tuple('a+b ', 5))
asserteq(me'{function()end}+b c', tuple('{function()end}+b ', 19))
asserteq(me'{} e', tuple('{} ', 4))
asserteq(me'() e', tuple('() ', 4))
asserteq(me'"" e', tuple('"" ', 4))
asserteq(me"'' e", tuple("'' ", 4))
asserteq(me'a[1] e', tuple('a[1] ', 6))
asserteq(me'ab.cd e', tuple('ab.cd ', 7))
asserteq(me'ab:cd() e', tuple('ab:cd() ', 9))
asserteq(me'(x) (y) z', tuple('(x) (y) ', 9))
asserteq(me'x >= y', tuple('x >= y', 7))

-- numbers
asserteq(me'1e2 a', tuple('1e2 ', 5))
asserteq(me'1e+2 a', tuple('1e+2 ', 6))
asserteq(me'1.2e+2 a', tuple('1.2e+2 ', 8))
asserteq(me'.2e+2 a', tuple('.2e+2 ', 7))

-- comments
asserteq(me'a+ -- b\nc', tuple('a+ -- b\nc', 10))
asserteq(me'a --[[]] b', tuple('a --[[]] ', 10))
asserteq(me'a+ --[[]] b', tuple('a+ --[[]] b', 12))
asserteq(me'a --[[]] + b', tuple('a --[[]] + b', 13))
asserteq(me'a+ --[[]] --[=[]=] b', tuple('a+ --[[]] --[=[]=] b', 21))
asserteq(me'a+ -- b\n -- b\n b c', tuple('a+ -- b\n -- b\n b ', 18))

-- check for exceptions giving lots of possibly not syntactically
-- correct data.
local text = io.open'luabalanced_test.lua':read'*a'
for i=1,#text do
local res = me(text,i)
if res[1] == 'error' and not res[2]:match('syntax error') then
error(res[2])
end
end

--## match_explist tests

local ml = function(...)
local res = wrap2(lb.match_explist)(...)
res[1] = table.concat(res[1], '|')
return res
end
asserteq(ml ' d', tuple(' ', 2))
asserteq(ml 'a+b,b*c d', tuple('a+b|b*c ', 9))

--## match_namelist tests

local ml = function(...)
local res = wrap2(lb.match_namelist)(...)
res[1] = table.concat(res[1], '|')
return res
end
asserteq(ml ' ', tuple('', 1))
asserteq(ml 'a b', tuple('a', 3))
asserteq(ml 'a,b d', tuple('a|b', 5))
asserteq(ml 'a,b+d', tuple('a|b', 4))


--## gsub tests

local ls = lb.gsub

local function f(u, s)
return '[' .. u .. ':' .. s .. ']'
end

asserteq(ls('', f), '')
asserteq(ls(' ', f), '[e: ]')
asserteq(ls(' "z" ;', f), '[e: ][s:"z"][e: ;]')
asserteq(ls(' --[[z]] ;', f), '[e: ][c:--[[z]]][e: ;]')
asserteq(ls(' --z\n ;', f), '[e: ][c:--z\n][e: ;]')
asserteq(ls(' --z', f), '[e: ][c:--z]')
asserteq(ls('[][=[ ] ]=] ;', f), '[e:[]][s:[=[ ] ]=]][e: ;]')
asserteq(ls('a - b --[[d]] .. "--"', f), '[e:a - b ][c:--[[d]]][e: .. ][s:"--"]')

print 'DONE'



=== Implementation ===


-- luabalanced.lua
-- Extracted delimited Lua sequences from strings.[1]
-- Inspired by Damian Conway's Text::Balanced[2] in Perl.
--
-- [1] http://lua-users.org/wiki/LuaBalanced
-- [2] http://search.cpan.org/dist/Text-Balanced/lib/Text/Balanced.pm
--
-- (c) 2008, David Manura, Licensed under the same terms as Lua (MIT license).
--

local M = {}

local assert = assert
local table_concat = table.concat

-- map opening brace <-> closing brace.
local ends = { ['('] = ')', ['{'] = '}', ['['] = ']' }
local begins = {}; for k,v in pairs(ends) do begins[v] = k end


-- Match Lua string in string <s> starting at position <pos>.
-- Returns <string>, <posnew>, where <string> is the matched
-- string (or nil on no match) and <posnew> is the character
-- following the match (or <pos> on no match).
-- Supports all Lua string syntax: "...", '...', [[...]], [=[...]=], etc.
local function match_string(s, pos)
pos = pos or 1
local posa = pos
local c = s:sub(pos,pos)
if c == '"' or c == "'" then
pos = pos + 1
while 1 do
pos = assert(s:find("[" .. c .. "\\]", pos), 'syntax error')
if s:sub(pos,pos) == c then
local part = s:sub(posa, pos)
return part, pos + 1
else
pos = pos + 2
end
end
else
local sc = s:match("^%[(=*)%[", pos)
if sc then
local _; _, pos = s:find("%]" .. sc .. "%]", pos)
assert(pos)
local part = s:sub(posa, pos)
return part, pos + 1
else
return nil, pos
end
end
end
M.match_string = match_string


-- Match bracketed Lua expression, e.g. "(...)", "{...}", "[...]", "[[...]]",
-- [=[...]=], etc.
-- Function interface is similar to match_string.
local function match_bracketed(s, pos)
pos = pos or 1
local posa = pos
local ca = s:sub(pos,pos)
if not ends[ca] then
return nil, pos
end
local stack = {}
while 1 do
pos = s:find('[%(%{%[%)%}%]\"\']', pos)
assert(pos, 'syntax error: unbalanced')
local c = s:sub(pos,pos)
if c == '"' or c == "'" then
local part; part, pos = match_string(s, pos)
assert(part)
elseif ends[c] then -- open
local mid, posb
if c == '[' then mid, posb = s:match('^%[(=*)%[()', pos) end
if mid then
pos = s:match('%]' .. mid .. '%]()', posb)
assert(pos, 'syntax error: long string not terminated')
if #stack == 0 then
local part = s:sub(posa, pos-1)
return part, pos
end
else
stack[#stack+1] = c
pos = pos + 1
end
else -- close
assert(stack[#stack] == assert(begins[c]), 'syntax error: unbalanced')
stack[#stack] = nil
if #stack == 0 then
local part = s:sub(posa, pos)
return part, pos+1
end
pos = pos + 1
end
end
end
M.match_bracketed = match_bracketed


-- Match Lua comment, e.g. "--...\n", "--[[...]]", "--[=[...]=]", etc.
-- Function interface is similar to match_string.
local function match_comment(s, pos)
pos = pos or 1
if s:sub(pos, pos+1) ~= '--' then
return nil, pos
end
pos = pos + 2
local partt, post = match_string(s, pos)
if partt then
return '--' .. partt, post
end
local part; part, pos = s:match('^([^\n]*\n?)()', pos)
return '--' .. part, pos
end


-- Match Lua expression, e.g. "a + b * c[e]".
-- Function interface is similar to match_string.
local wordop = {['and']=true, ['or']=true, ['not']=true}
local is_compare = {['>']=true, ['<']=true, ['~']=true}
local function match_expression(s, pos)
pos = pos or 1
local posa = pos
local lastident
local poscs, posce
while pos do
local c = s:sub(pos,pos)
if c == '"' or c == "'" or c == '[' and s:find('^[=%[]', pos+1) then
local part; part, pos = match_string(s, pos)
assert(part, 'syntax error')
elseif c == '-' and s:sub(pos+1,pos+1) == '-' then
-- note: handle adjacent comments in loop to properly support
-- backtracing (poscs/posce).
poscs = pos
while s:sub(pos,pos+1) == '--' do
local part; part, pos = match_comment(s, pos)
assert(part)
pos = s:match('^%s*()', pos)
posce = pos
end
elseif c == '(' or c == '{' or c == '[' then
local part; part, pos = match_bracketed(s, pos)
elseif c == '=' and s:sub(pos+1,pos+1) == '=' then
pos = pos + 2 -- skip over two-char op containing '='
elseif c == '=' and is_compare[s:sub(pos-1,pos-1)] then
pos = pos + 1 -- skip over two-char op containing '='
elseif c:match'^[%)%}%];,=]' then
local part = s:sub(posa, pos-1)
return part, pos
elseif c:match'^[%w_]' then
local newident,newpos = s:match('^([%w_]+)()', pos)
if pos ~= posa and not wordop[newident] then -- non-first ident
local pose = ((posce == pos) and poscs or pos) - 1
while s:match('^%s', pose) do pose = pose - 1 end
local ce = s:sub(pose,pose)
if ce:match'[%)%}\'\"%]]' or
ce:match'[%w_]' and not wordop[lastident]
then
local part = s:sub(posa, pos-1)
return part, pos
end
end
lastident, pos = newident, newpos
else
pos = pos + 1
end
pos = s:find('[%(%{%[%)%}%]\"\';,=%w_%-]', pos)
end
local part = s:sub(posa, #s)
return part, #s+1
end
M.match_expression = match_expression


-- Match name list (zero or more names). E.g. "a,b,c"
-- Function interface is similar to match_string,
-- but returns array as match.
local function match_namelist(s, pos)
pos = pos or 1
local list = {}
while 1 do
local c = #list == 0 and '^' or '^%s*,%s*'
local item, post = s:match(c .. '([%a_][%w_]*)%s*()', pos)
if item then pos = post else break end
list[#list+1] = item
end
return list, pos
end
M.match_namelist = match_namelist


-- Match expression list (zero or more expressions). E.g. "a+b,b*c".
-- Function interface is similar to match_string,
-- but returns array as match.
local function match_explist(s, pos)
pos = pos or 1
local list = {}
while 1 do
if #list ~= 0 then
local post = s:match('^%s*,%s*()', pos)
if post then pos = post else break end
end
local item; item, pos = match_expression(s, pos)
assert(item, 'syntax error')
list[#list+1] = item
end
return list, pos
end
M.match_explist = match_explist


-- Replace snippets of code in Lua code string <s>
-- using replacement function f(u,sin) --> sout.
-- <u> is the type of snippet ('c' = comment, 's' = string,
-- 'e' = any other code).
-- Snippet is replaced with <sout> (unless <sout> is nil or false, in
-- which case the original snippet is kept)
-- This is somewhat analogous to string.gsub .
local function gsub(s, f)
local pos = 1
local posa = 1
local sret = ''
while 1 do
pos = s:find('[%-\'\"%[]', pos)
if not pos then break end
if s:match('^%-%-', pos) then
local exp = s:sub(posa, pos-1)
if #exp > 0 then sret = sret .. (f('e', exp) or exp) end
local comment; comment, pos = match_comment(s, pos)
sret = sret .. (f('c', assert(comment)) or comment)
posa = pos
else
local posb = s:find('^[\'\"%[]', pos)
local str
if posb then str, pos = match_string(s, posb) end
if str then
local exp = s:sub(posa, posb-1)
if #exp > 0 then sret = sret .. (f('e', exp) or exp) end
sret = sret .. (f('s', str) or str)
posa = pos
else
pos = pos + 1
end
end
end
local exp = s:sub(posa)
if #exp > 0 then sret = sret .. (f('e', exp) or exp) end
return sret
end
M.gsub = gsub


return M



The following file is used by the test suite:


-- tuple.lua
-- Simple tuple implementation using tables.
-- (c) 2008, David Manura, Licensed under the same terms as Lua (MIT license).

local select = select
local tostring = tostring
local setmetatable = setmetatable
local table_concat = table.concat

local mt = {}
local function tuple(...)
local t = setmetatable({n=select('#',...), ...}, mt)
return t
end
function mt:__tostring()
local ts = {}
for i=1,self.n do local v = self[i]
ts[#ts+1] = type(v) == 'string' and string.format('%q', v) or tostring(self[i])
end
return 'tuple(' .. table_concat(ts, ',') .. ')'
end
function mt.__eq(a, b)
if a.n ~= b.n then return false end
for i=1,a.n do
if a[i] ~= b[i] then return false end
end
return true
end

return tuple


Downloadable from [github].

LuaBalanced provides functions for matching delimited snippets of Lua code in a string.

Home page: http://lua-users.org/wiki/LuaBalanced (this page)

Description

This module can, for example, match a Lua string, Lua comment, or Lua expression. It is useful in particular for source filters or parsing Lua snippets embedded in another language. It is inspired by Damian Conway's Text::Balanced [1] in Perl. The unique feature of this implementation is that that it does not rigorously lex and parse the Lua grammar. It doesn't need to. It assumes during the parse that the Lua code is syntactically correct (which can be verified later using loadstring). By assuming this, extraction of delimited sequences is significantly simplified yet can still be robust, and it also supports supersets of the Lua grammar. The code, which is written entirely in Lua, is just under 200 lines of Lua code (compare to Yueliang used in MetaLua, where the lexer alone is a few hundred lines).

Projects using this module: ListComprehensions

Examples

local lb = require "luabalanced"

-- Extract Lua expression starting at position 4.
print(lb.match_expression("if x^2 + x > 5 then print(x) end", 4))
--> x^2 + x > 5     16

-- Extract Lua string starting at (default) position 1.
print(lb.match_string([["test\"123" .. "more"]]))
--> "test\"123"     12

-- Break Lua code into code types.
lb.gsub([[
  local x = 1  -- test
  print("x=", x)
]], function(u, s)
  print(u .. '[' .. s .. ']')
end)
--[[output:
e[  local x = 1  ]
c[-- test
]
e[  print(]
s["x="]
e[, x)
]
]]

Source

Downloadable from [github].

Author

DavidManura

Status

This module is new and likely still has some bugs.


RecentChanges · preferences
edit · history
Last edited September 11, 2010 5:11 pm GMT (diff)