lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Hi,

This is my first post, please be gentle. I am trying to make a simple
imitation of AWK in Lua 5.1 using a PC.

My aim is to write a table of instructions each comprising a pattern and
action couple - written in Lua. I am not aiming to emulate the full AWK
syntax or make Lua read AWK scripts.

>From AWK it seems sensible to recreated BEGIN,END,RS,FS,ORS,OFS,FNR and
I have added FN - a Lua table to hold field names.

I would then pass the instructions table to awk.lua with a source for
processing. e.g.

instructions = {
['BEGIN'] = "print('Test run') print('\\n')",
['RS'] = "\r?\n",
['FS'] = "%s",
['ORS'] = "+",
['OFS'] = "_",
['END'] = "print('\\nCompleted @'..os.date():sub(9))",
-- {"",""}
{[[(string.match(line,"roof") and string.match(line,"On"))]] ,
[[print("\n"..FNR.."\t"..line)]]},
}

print(awk("The Cat sat\nOn the roof\nAnd smiled",instructions))
print(awk({"The Cat sat","On the roof","And frowned"},instructions))
print(awk("input.txt",instructions))

As you can see I would like to read the source from a string, table or
file. I will assume the table is correctly broken into records and
ignore the RS (input record separator).

This script needs a string.split() function (which I have slightly
modified to return a table and the number of fields) and an io.exists()
function:

function string.split(inputstr, sep)
-- https://stackoverflow.com/questions/1426954/split-string-in-lua
    if sep == nil then
        sep = "%s"
    end

    local t={}
    local i=1

    for str in string.gmatch(inputstr, "([^"..sep.."]+)") do
       t[i] = str
       i = i + 1
    end

    if i>1 then
        return t,i
    else
        return inputstr
    end
end

function io.exists(filename)
-- Source:
http://stackoverflow.com/questions/4990990/lua-check-if-a-file-exists
    if type(filename)~="string" then
       return false
    end
       return os.rename(filename,filename) and true or false
    end
end

So to my first attempt at awk.lua.

The pattern() function may return any true value to trigger the action,
as in AWK it doesn't have to be a text match.

For default output and print(), I have used a custom function extending
io.write() to accept multiple parameters, add ORS, suppress the extra
EOL character and to allow redirection.

I have coded the default instructions {"",""} and {"","print(line)"}.
You will see I have altered the "line" variable in the ENV for action(),
to substitute the FS with OFS, this may cause problems if you manipulate
the line variable in an action() and expect it to be the original from
the input stream.

Other actions written in Lua should work within the assigned ENV.

        function awk(source,instructions,output)
        -- awk source as string, table or file, instructions as table,
optional output

            -- Check for instructions table
            if type(instructions)~="table" then
                print("Error: Instructions need to be in a table")
                return
            end

            -- Check for special table entries or apply defaults
            RS  = instructions.RS or "\r?\n"            -- Try to
match awk default
            FS  = instructions.FS or "%s"               -- Try to
match awk default
            ORS = instructions.ORS or "\n"
            OFS         = instructions.OFS or " "
            BEGIN = instructions.BEGIN or "return "
            END         = instructions.END or "return "
            BEGIN = assert(loadstring(BEGIN)) or nil
            END         = assert(loadstring(END))  or nil
            FN  = instructions.FN or {}         -- Extra variable for
field names e.g. CSV data

            -- Check type of source and define iterator
            if io.exists(source) then
                FILENAME = source
                -- Load the whole file
                local f = assert(io.open(source, "r"))
                local t = io.read(f,"*all")
                io.close(f)
                -- Split using RS
                source = table.split(t,RS)
            else
                FILENAME = "Lua string"
            end

            if type(source)=="string" then
                source = string.split(source,RS)
            end
            if type(source)=="table" then
                -- Leave: assumes RS is correct
            end

            -- Check output
            if output then
                if io.exists(output) then
                    assert(io.open(outout, "w"))
                else
                    print("Can't find the output file: "..output)
                    return
                end
            end

            -- Set up the sandbox for pattern and actions functions
            --   the print function could be changed to concat using OFS
            awkENV = {
                ['ORS']         = ORS,
                ['OFS']         = OFS,
                ['output']      = output,
                ['print']       = print,
                ['string']      = string,
                ['math']        = math,
                ['os']          = os,
                ['io']          = io,
                ['table']       = table,
            }

            -- Call the BEGIN Function
            if type(BEGIN)=="function" then
                setfenv(BEGIN,awkENV)
                BEGIN()
            end

            -- Process the source lines
            for FNR,line in ipairs(source) do
                -- Handle empty lines


                -- Process the line into a table of fields
                field, NF = string.split(line,FS)

                -- Add to the sandboxed ENV
                awkENV.FNR = FNR
                awkENV.line = line
                awkENV.field = field

                -- Apply the instructions if pattern == true
                for i,v in ipairs(instructions) do

                    -- Set default for empty pattern
                    pattern     = v[1]
                    if pattern=="" then
                        pattern="true"
                    end

                    -- Set default for empty action
                    action = v[2]
                    if action =="" then
                        action ="print(table.concat(field,OFS))"
                    end

                    -- Check for last source record and don't print ORS
                    if next(source,FNR) ==nil then
                        awkENV['print'] = function(...)
                                local arg={...}
                                local text = table.concat(arg,"\t")
                                io.write(text)
                        end
                        awkENV['io.write']      = function(...)
                                local arg={...}
                                local text = table.concat(arg,"\t")
                                io.write(text)
                        end
                    else
                        awkENV['print'] = function(...)
                                local arg={...}
                                local text = table.concat(arg,"\t")
                                io.write(text..ORS)
                        end
                        awkENV['io.write']      = function(...)
                                local arg={...}
                                local text = table.concat(arg,"\t")
                                io.write(text..ORS)
                        end
                    end

                    -- Set the environment for pattern
                    pattern = assert(loadstring("return "..pattern),
"Could not load pattern: "..pattern)
                    awkENV.line = line
                    setfenv(pattern,awkENV)

                    -- Set the environment for actions - not echange to "line"
                    action = assert(loadstring("return "..action),
"Could not load action: "..action)
                    awkENV.line = table.concat(field,OFS)
                    setfenv(action,awkENV)

                    -- Evaluate each pattern and act
                    if pattern() then
                        action()
                    end

                end
            end

            -- Call the END function
            if type(END)=="function" then
                setfenv(END,awkENV)
                END()
            end

            io.close()

        end

I would be grateful for some advice about how to manage the ORS. You
will see that for the last source line I avoid appending the ORS. However,
if the last line doesn't generate any output I can't get back the previously
output ORS!

For a generalized approach I would also value some advice about creating
an iterator to process a line at a time, while preserving the ability to
send a string or table or file name.

Kind Regards

Gavin