require "lpeg"
-- A nicer syntax for local variables in LPeg patterns.
set = function (name, o)
if getmetatable(o) == getmetatable(lpeg.P"") -- if o is a pattern
then return o:Cg(name) -- then name := eval(o)
else return lpeg.Cc(o):Cg(name) -- else name := o (constant)
end
end
get = function (name) return lpeg.Cb(name) end
group = function (cmds) return lpeg.Cg(cmds) end
-- A trivial example:
patt = set("x", "2") *
get("x") *
group(set("x", "3") *
get("x")
) *
get("x")
print(patt:match"") --> 2 3 2
-- A big example:
-- binary operators, with precedence.
--
-- Suppose that E is an _expression_ in tree form - for example, this:
--
-- +
-- / \
-- E = * <
-- / \ / \
-- 2 3 4 5
--
-- Then to find out how to print E in infix notation with need to
-- determine, for each of its "wires", whether we need or not to add
-- parentheses around the subexpression under it. The best way to do
-- that is to associate to each operator three numbers, that we will
-- call "top", "left" and "right" (op.t, op.l, or.r). We get his:
-- ___
-- /19 \
-- / + \
-- |_18_20_|
-- / \()
-- ___ ___
-- /21 \ /17 \
-- / * \ / < \
-- |_20_22_| |_18_18_|
-- / \ / \
-- 2 3 4 5
--
-- The wires that need parentheses are the ones in which the number
-- above is higher than the number below, and we get:
--
-- 2 * 3 + (4 < 5)
--
-- This idea can be easily adapted to parsing. After reading the "*"
-- we need to parse its second argument - the longest "_expression_
-- under the `*'" possible. Let's look at a bigger example:
--
-- &______________.
-- | |
-- <=_. >__.
-- | | | |
-- 2 +__. 7 8
-- /------------------------\ | |
-- /----------------\ 3 *_____.
-- /-----------\ | |
-- /-------\ *__. 6
-- /---\ /---\ | |
-- 2 <= 3 + 4 * 5 * 6 & 7 > 8 4 5
--
-- The second argument for the `+' in that _expression_ is the longest
-- "_expression_ under the `+'" possible, starting at the "4". In the
-- grammar below, this will be an "_expression_ without complements" -
-- just the "4" - followed by two "complements", "* 5" and "* 6",
-- which we will be added to the "4" using lpeg.Cf:
--
--
-- Note: I took the idea of op.t, op.l and op.r from Andrej Bauer's
-- PLZoo (but he uses that only for printing, not for parsing)...
-- See:
--
ops = {}
newbinop = function (l, t, r, str)
for op in str:gmatch("[^ ]+") do ops[op] = {l=l, t=t, r=r} end
end
newbinop(10, 11, 12, "->")
newbinop(12, 13, 14, "or")
newbinop(14, 15, 16, "&")
newbinop(18, 17, 18, "== <= >= < > != <-")
newbinop(18, 19, 20, "+ -")
newbinop(20, 21, 22, "* /")
pack_bin = function (e1, op, e2) return "("..op.." "..e1.." "..e2..")" end
apply_all = function (e, T) return T[#T](e, unpack(T, 1, #T-1)) end
above = function (lop, rop) return lop == "" or ops[lop].r < ops[rop].t end
is_binop = function (subj, pos, c) if ops[c] then return true, c end end
is_under = function (subj, pos, lop, op)
if above(lop, op) then return true, op end
end
P = lpeg.P
V = lpeg.V
Cc = lpeg.Cc
sp = lpeg.S" \t\n"^0
sP = function (str) return sp * P(str) end
Expr_grammar = {
"Expr", -- initial rule name
Num = sp * (lpeg.R"09"^1):C(),
Parenexpr = sP"(" * V"Expr" * sP")",
--
Binop_any = sp * ((lpeg.P(2):Cmt(is_binop) +
lpeg.P(1):Cmt(is_binop))),
Binop_under = (get"lop" * V"Binop_any"):Cmt(is_under),
Binop = set("lop", V"Binop_under") * get("lop"),
--
Complement = group(V"Binop" * V"Expr_under" * Cc(pack_bin)):Ct(),
--
Expr_woc = V"Num" + V"Parenexpr",
Expr_under = (V"Expr_woc" * V"Complement"^0):Cf(apply_all),
Expr = group(set("lop", "") * V"Expr_under"),
}
Expr_pattern = lpeg.P(Expr_grammar)
print(Expr_pattern:match"2 <= 3 + 4 * 5 * 6 & 7 > 8")
--> (& (<= 2 (+ 3 (* (* 4 5) 6))) (> 7 8))