lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Hi! 

 Today I tried to write some simple grammar (PEG grammar, in fact) in
'lpeg' and then rewrite it in 're' syntax. I was slightly disappointed
with the fact that 're' module syntax do not support escapes in
literals and in character classes - i.e., I was not able to find the
way to express "EndOfLine <- '\r\n' / '\n' / '\r'".  As a result I
took the liberty of making some change to re.lua.

All old tests from test.lua still work, but some constructions like

    eqcharset(compile[[ '\' ]], m.P"\\")
    eqcharset(compile[[ [\] ]], m.P"\\")

are not relevant any more; and

    eqcharset(compile[[ '\\' ]], m.P"\\")
    eqcharset(compile[[ '\r' ]], m.P"\r")
    eqcharset(compile[[ '\073']], m.P"I")

    eqcharset(compile[[ [\n-A] ]], m.R"\nA")
    eqcharset(compile[[ [\n-\A] ]], m.R"\n\A")
    eqcharset(compile[[ [\r] ]], m.P"\r")
    eqcharset(compile[[ [\n\r\t\'\"\[\]\\] ]], m.S"\n\r\t\'\"[]\\")
 
are now possible.

Patch follows. 

Cheers,
      zOOn

========================================================================
--- re.lua    2008-03-07 20:24:00.000000000 +0300
+++ rex.lua    2008-06-10 16:38:41.777236700 +0400
@@ -5,7 +5,7 @@
 local tonumber, type, print, error = tonumber, type, print, error
 local mt = getmetatable(m.P(0))
 
-module "re"
+module(...)
 
 local any = m.P(1)
 
@@ -82,10 +82,42 @@
 
 local num = m.C(m.R"09"^1) * S / tonumber
 
-local String = "'" * m.C((any - "'")^0) * "'" +
-               '"' * m.C((any - '"')^0) * '"'
+local function unescChar(c2)
+  if     c2 == 'a'  then return '\a'
+  elseif c2 == 'b'  then return '\b'
+  elseif c2 == 'f'  then return '\f'
+  elseif c2 == 'n'  then return '\n'
+  elseif c2 == 't'  then return '\t'
+  elseif c2 == 'r'  then return '\r'
+  elseif c2 == 'v'  then return '\v'
+  end
+  return c2
+end
+
+local char = _G.string.char
+
+local function unescDecimal(ddd)
+  local n = tonumber(ddd)
+  if n <= 255 then 
+    return char(n) 
+  else 
+    error(("out of range escape sequence: \\%d"):format(n));
+  end
+end
+
+local DecimalCode = 
+  m.R"09"*m.R"09"*m.R"09" + -- NOTE: for geting errors like in lua
+  m.R"09"*m.R"09"^-1
+
+local Char = '\\'*(DecimalCode/unescDecimal + any/unescChar) + m.C(any-'\\')
 
-local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / m.R
+local StringBuffer =
+  "'"*m.Ct((Char - "'")^0)*"'"
++ '"'*m.Ct((Char - '"')^0)*'"'
+
+local String = StringBuffer / _G.table.concat
+
+local Range = (Char * '-' * (Char-']')) / function(l,r) return m.R(l..r) end
 
 local Cat = "%" * Identifier / function (c,Defs)
   local cat =  Defs and Defs[c] or Predef[c]
@@ -93,8 +125,7 @@
   return cat
 end
 
-
-local item = Cat + Range + m.C(any)
+local item = Cat + Range + Char
 
 local Class =
     "["