[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: LPEG: character escapes in 're' module
- From: Andrew Zhilin <andrew_zhilin@...>
- Date: Tue, 10 Jun 2008 09:30:08 -0700 (PDT)
Hi!
Today I tried to write some simple grammar (PEG grammar, in fact) in
'lpeg' and then rewrite it in 're' syntax. I was slightly disappointed
with the fact that 're' module syntax do not support escapes in
literals and in character classes - i.e., I was not able to find the
way to express "EndOfLine <- '\r\n' / '\n' / '\r'". As a result I
took the liberty of making some change to re.lua.
All old tests from test.lua still work, but some constructions like
eqcharset(compile[[ '\' ]], m.P"\\")
eqcharset(compile[[ [\] ]], m.P"\\")
are not relevant any more; and
eqcharset(compile[[ '\\' ]], m.P"\\")
eqcharset(compile[[ '\r' ]], m.P"\r")
eqcharset(compile[[ '\073']], m.P"I")
eqcharset(compile[[ [\n-A] ]], m.R"\nA")
eqcharset(compile[[ [\n-\A] ]], m.R"\n\A")
eqcharset(compile[[ [\r] ]], m.P"\r")
eqcharset(compile[[ [\n\r\t\'\"\[\]\\] ]], m.S"\n\r\t\'\"[]\\")
are now possible.
Patch follows.
Cheers,
zOOn
========================================================================
--- re.lua 2008-03-07 20:24:00.000000000 +0300
+++ rex.lua 2008-06-10 16:38:41.777236700 +0400
@@ -5,7 +5,7 @@
local tonumber, type, print, error = tonumber, type, print, error
local mt = getmetatable(m.P(0))
-module "re"
+module(...)
local any = m.P(1)
@@ -82,10 +82,42 @@
local num = m.C(m.R"09"^1) * S / tonumber
-local String = "'" * m.C((any - "'")^0) * "'" +
- '"' * m.C((any - '"')^0) * '"'
+local function unescChar(c2)
+ if c2 == 'a' then return '\a'
+ elseif c2 == 'b' then return '\b'
+ elseif c2 == 'f' then return '\f'
+ elseif c2 == 'n' then return '\n'
+ elseif c2 == 't' then return '\t'
+ elseif c2 == 'r' then return '\r'
+ elseif c2 == 'v' then return '\v'
+ end
+ return c2
+end
+
+local char = _G.string.char
+
+local function unescDecimal(ddd)
+ local n = tonumber(ddd)
+ if n <= 255 then
+ return char(n)
+ else
+ error(("out of range escape sequence: \\%d"):format(n));
+ end
+end
+
+local DecimalCode =
+ m.R"09"*m.R"09"*m.R"09" + -- NOTE: for geting errors like in lua
+ m.R"09"*m.R"09"^-1
+
+local Char = '\\'*(DecimalCode/unescDecimal + any/unescChar) + m.C(any-'\\')
-local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / m.R
+local StringBuffer =
+ "'"*m.Ct((Char - "'")^0)*"'"
++ '"'*m.Ct((Char - '"')^0)*'"'
+
+local String = StringBuffer / _G.table.concat
+
+local Range = (Char * '-' * (Char-']')) / function(l,r) return m.R(l..r) end
local Cat = "%" * Identifier / function (c,Defs)
local cat = Defs and Defs[c] or Predef[c]
@@ -93,8 +125,7 @@
return cat
end
-
-local item = Cat + Range + m.C(any)
+local item = Cat + Range + Char
local Class =
"["