lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Happy new year to everyone!

I'll try to write simpler code in 2005... so here's a new version of my tokens implementation for APLC! :-)

If no-one complains I'll submit it shortly, it seems to function OK.

--
Wim
-- display short description if no filename is provided
if not arg[1] then
	print("usage: " .. arg[0] .. [[ filename

count the number of tokens in a Lua script file.]])
	return
end

-- total number of tokens
local tokens = 0

-- forward declaration of lexer states
local main, num, multi, quot

-- forward declaration of main token patterns
local patterns

function main(str, index)
	-- skip whitespace
	local _, i, c = string.find(str, "^%s+", index)
	if i then index = i + 1 end

	if index <= string.len(str) then
		for idx, pat in ipairs(patterns) do
			-- try a token pattern
			_, i, c = string.find(str, pat[1], index)
			if i then
				-- a match!  continue in next lexer state
				tokens = tokens + 1
				return pat[2](str, i + 1, c)
			end
		end
		error("unrecognised token: " .. string.sub(str, index, index + 10) .. "...")
	end
end

function num(str, index)
	local _, i = string.find(str, "^[eE][%+%-]?[0-9]+", index)
	return main(str, i and i + 1 or index)
end

function multi(str, index)
	local level = 1
	while true do
		local _, i, c = string.find(str, "([%[%]])%1", index)
		if not c then error "]] expected"
		elseif c == "[" then level = level + 1
		elseif level > 1 then level = level - 1
		else return main(str, i + 1)
		end
		index = i + 1
	end
end

function quot(str, index, char)
	while true do
		local _, i, c = string.find(str, "([\\\n'\"])", index)
		if not c or c == "\n" then	error("partial string: " .. char .. " expected")
		elseif c == char then return main(str, i + 1)
		elseif c == "\\" then index = i + 2
		else index = i + 1
		end
	end
end

patterns = {
	{"^[_%a][_%w]*", main},
	{"^%.%.%.?", main},
	{"^[~=<>]=", main},
	{"^[0-9]+%.?[0-9]*", num},
	{"^%.[0-9]+", num},
	{"^%-%-%[%[", multi},
	{"^%[%[", multi},
	{"^%-%-.-\n", main},
	{"^[~%^%*%(%)%-%+={}%[%]:;<>,%./]", main},
	{"^(['\"])", quot},
}

-- lex a file as a single string
local function lex(filename)
	io.input(filename)
	main(io.read "*a", 1)
end

-- report result
local rc, err = pcall(lex, arg[1])
print(rc and tokens .. " tokens" or err)