lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


For some reason the initial email shows up as encrypted in my mail client (which I am pretty sure is a bug), can you guys see it? :)

sorry about that. thanks!

On Jul 6, 2017, at 17:11, Matthias Dörfelt <lists@mokafolio.de> wrote:

Hi All,

I am trying to write a lexer using LPeg for a very simply markup language that is very closely inspired by https://pugjs.org/ . It uses indentation to represent a hierarchy which as far as I can tell is something that is not super easily dealt with in LPeg. Here is a basic working prototype just to see if I can get the hierarchical aspect to work:

It parses the following input:

el
    child1
        child11
    child2

and puts its basic AST representation into a lua table that looks something like this:

{
    children =
    {
        name = el
        children =
        {
            name = child1
            children =
            {
                name = child11
            }
        },
        {
            name = child2
        }
    }
}

While my implementation works, I feel like I am circumventing a lot of the actual capturing mechanisms of LPeg in order to deal with back-referencing to build the hierarchy. I’d like to get some feedback on weather there are any better ways to approach this kind of grammar using LPeg. I am new to it in general so I might be missing something obvious here. Thank you very much!! Here is my lua code, it should just run if you have LPeg installed:

local lpeg = require("lpeg")

-- some basic lpeg helpers
local Newline = lpeg.S("\n\r")
local Indent = lpeg.P("    ") -- indent is just four spaces for now

-- helpers for back referencing and building
-- the final hierarchy.
local root = {children = {}}
local currentParent = root
local lastNode = root
local currentIndent = 0

-- helper to do all the indentation trickery that needs
-- to be applied to each line of the input
local function handleLine(_str)
    -- compute the current indent level of the line
    local idx = (Indent ^ 0):match(_str)
    local id = _str:sub(1, idx - 1)
    local level = id:len() / 4

    -- check if the parent changed based on the indent level
    local parent = currentParent
    if level > currentIndent then
        -- we went one level deeper
        parent = lastNode
    elseif level < currentIndent then
        -- we returned to the previous level
        parent = parent.parent
    end

    -- set the current parent and level
    currentParent = parent
    currentIndent = level
    assert(currentParent)

    -- create the new node
    local name = _str:sub(idx)
    local node = {parent = currentParent, children = {}, name = name}
    -- and insert it into the hierarchy
    table.insert(currentParent.children, node)
    lastNode = node

    return name
end

-- pattern to detect individual lines and call handleLine on the resulting line
local Line = ((Newline ^ 0 * lpeg.C((1 - Newline) ^ 1) / handleLine)) ^ 0

-- the test string to lex
local testString = "el\n    child1\n        child11\n    child2"

-- perform the lexing
-- NOTE: we are not using any captures here right now as we basically
-- do all the capturing manuall in handleLine :(
Line:match(testString)

-- helper to print the hierarchy
local function printHierarchy(_node, _indent)
    local indentStr = string.rep("    ", _indent)
    print(indentStr .. "{")
    if _node.name then print(indentStr .. "    name = " .. _node.name) end
    if #_node.children > 0 then
        print(indentStr .. "    children = ")
        for _, v in ipairs(_node.children) do
            printHierarchy(v, _indent + 1)
        end
    end
    print(indentStr .. "},")
end

-- print the resulting hierarchy
printHierarchy(root, 0)

Matthias