lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


In a private thread about lua-users.org, John Belmonte, Nick Trout and
myself were talking about automatically doing syntax highlighting of Lua files.

I want to make a Lua syntax highlighter for Lua code, mostly for the
pleasure of the challenge.
I take the SciTE (www.Scintilla.org) generated HTML as a base for ideas. It
uses SPAN tags to mark the elements to highlight, using CSS to define fonts
and colors. This may exclude some old browsers, but it is flexible (user can
choose the CSS file of its liking or define its own) and probably smaller than
using FONT tags.

Anyway, I played a bit with gsub, as a proof of concept.
Since lexing is heavily context dependent, gsub isn't really suited for this
task, since it doesn't see if a word is inside a string or a comment, for
example.
I will probably take the byte by byte approach taken in Scintilla lexers. It
may even be not slower, since gsub must parse the whole file several times.
Speed isn't a major concern for individual PC, but may be if the script is
used as a CGI.

Anyway, I made a prototype that produce a decent output, so if you want to
play with it, it is your:

-- Parse a Lua file and output a primitive syntax highlighted HTML file
-- by Philippe Lhoste <PhiLho@GMX.net> http://jove.prohosting.com/~philho/
-- v. 1.0 -- 2001/08/06

filenameIn  = (arg and arg[1]) or "-"  -- If absent, use standard input
filenameOut = (arg and arg[2]) or "-"  -- If absent, use standard output

tabulation = strrep("&nbsp;", 4)

tokens =
{
  ["~="]=1, ["<="]=1, [">="]=1, ["<"]=1, [">"]=1,
  ["=="]=1, ["="]=1, ["+"]=1, ["-"]=1, ["*"]=1,
  ["/"]=1, ["("]=1, [")"]=1, ["{"]=1, ["}"]=1,
  ["["]=1, ["]"]=1, [";"]=1, [","]=1, ["."]=1,
  [".."]=1, ["..."]=1, [":"]=1, ["^"]=1, ["%"]=1,
}
keywords =
{
  ["and"]=1, ["break"]=1, ["do"]=1, ["else"]=1, ["elseif"]=1, ["end"]=1,
["for"]=1,
  ["function"]=1, ["global"]=1, ["if"]=1, ["in"]=1, ["local"]=1, ["nil"]=1,
["not"]=1, ["or"]=1,
  ["repeat"]=1, ["return"]=1, ["then"]=1, ["until"]=1, ["while"]=1,
  ["arg"]=1, ["self"]=1,
}
internalVariables =
{
  ["_ALERT"]=1, ["_ERRORMESSAGE"]=1, ["_INPUT"]=1, ["_OUTPUT"]=1,
  ["_STDERR"]=1, ["_STDIN"]=1, ["_STDOUT"]=1, ["_PROMPT"]=1, ["PI"]=1,
}
libraryFunctions =
{
  -- Basic Functions
  ["assert"]=1, ["call"]=1, ["collectgarbage"]=1, ["copytagmethods"]=1,
["dofile"]=1,
  ["dostring"]=1, ["error"]=1, ["foreach"]=1, ["foreachi"]=1,
["getglobal"]=1,
  ["getn"]=1, ["gettagmethod"]=1, ["globals"]=1, ["newtag"]=1, ["next"]=1,
  ["print"]=1, ["rawget"]=1, ["rawset"]=1, ["setglobal"]=1,
["settagmethod"]=1,
  ["sort"]=1, ["tag"]=1, ["tonumber"]=1, ["tostring"]=1, ["tinsert"]=1,
  ["tremove"]=1, ["type"]=1,
  -- String Manipulation
  ["strbyte"]=1, ["strchar"]=1, ["strfind"]=1, ["strlen"]=1, ["strrep"]=1,
  ["strsub"]=1, ["strupper"]=1, ["format"]=1, ["gsub"]=1,
  -- Mathematical Functions
  ["abs"]=1, ["acos"]=1, ["asin"]=1, ["atan"]=1, ["atan2"]=1,
  ["ceil"]=1, ["cos"]=1, ["deg"]=1, ["exp"]=1, ["floor"]=1,
  ["log"]=1, ["log10"]=1, ["max"]=1, ["min"]=1, ["mod"]=1,
  ["rad"]=1, ["sin"]=1, ["sqrt"]=1, ["tan"]=1, ["frexp"]=1,
  ["ldexp"]=1, ["random"]=1, ["randomseed"]=1,
  -- I/O Facilities
  ["openfile"]=1, ["closefile"]=1, ["readfrom"]=1, ["writeto"]=1,
["appendto"]=1,
  ["remove"]=1, ["rename"]=1, ["flush"]=1, ["seek"]=1, ["tmpname"]=1,
  ["read"]=1, ["write"]=1,
  -- System Facilities
  ["clock"]=1, ["date"]=1, ["execute"]=1, ["exit"]=1, ["getenv"]=1,
  ["setlocale"]=1,
}

function ParseFile(fileContent)
  if not fileContent then
    return nil
  end
  -- Escape special HTML characters
  fileContent = gsub(fileContent, "&", "&amp;")  -- Must be first!
  fileContent = gsub(fileContent, "<", "&lt;")
  fileContent = gsub(fileContent, ">", "&gt;")
  -- Transform tabulations to fixed number of spaces
  fileContent = gsub(fileContent, "\t", tabulation)
  -- Transform 2 or more successive spaces to non-breakable /
non-collapsable spaces.
  -- No need to transform single spaces, it wastes space and is less
readable.
  fileContent = gsub(fileContent, " ( +)",
    function (spaces)
      return strrep("&nbsp;", strlen(spaces) + 1)
    end
  )
  -- Mark end of lines
  fileContent = gsub(fileContent, "\n", "<br>\n")
  -- Mark special words
  fileContent = gsub(fileContent, "([%w_]+)",
    function (w)
      if libraryFunctions[w] then
        return "<span class=LF>" .. w .. "</span>"
      elseif keywords[w] then
        return "<span class=K>" .. w .. "</span>"
      elseif internalVariables[w] then
        return "<span class=IV>" .. w .. "</span>"
      elseif w then
        return w
      end
    end
  )
  write(fileContent)
  return 0
end

function ProcessFile()
  local fileInHandle
  if filenameIn ~= "-" then
    -- I use openfile instead of readfrom because I plan to use binary mode
    fileInHandle = openfile(filenameIn, "rt")
    if not fileInHandle then
      return nil, filenameIn
    end
  else
    fileInHandle = _INPUT
  end
  if filenameOut ~= "-" then
    if not writeto(filenameOut) then
      return nil, filenameOut
    end
  end
  local fileContent = read(fileInHandle, "*a")  -- Read the whole file
  closefile(fileInHandle)
  write('<html>\n<head>\n<title>', filenameIn, '</title>\n')
  write('<meta name="GENERATOR" content="SyntaxHighlight.lua 1.0">\n')
  write('<link rel="stylesheet" type="text/css" href="Lua.css">\n')
  write('</head>\n<body>\n')
  local result = ParseFile(fileContent)
  write('</body>\n</html>\n')
  writeto()     -- Restore default writing
  return result, nil
end

result, fn = ProcessFile()   -- Main
if not result then
  print("Error in parameter: " .. (fn or 'nil'))
end

-------
Lua.css
-------
/* I keep the style names short, to avoid bloating the HTML file */
BODY {
  background: #fff;
  margin: 0px;
  /* Border: External margin */
  border-bottom-width: 0px;
  border-left-width: 0px;
  border-right-width: 0px;
  border-top-width: 0px;
  /* Padding: Internal margin */
  padding-bottom: 0px;
  padding-left: 0px;
  padding-right: 0px;
  padding-top: 0px;
  /* Default font (same as for span) */
  font-family: Andale Mono;
  font-size: 8pt;
  color: #000000;
}
/* White space */
.WS
{
  color: #FF0000;
}
/* Comment */
.C
{
  font-family: Verdana;
  color: #8080A0;
}
/* Number */
.N
{
  color: #8000FF;
}
/* Operator (or token) */
.O
{
  color: #008040;
}
/* Keyword */
.K
{
  color: #0000A0;
  font-weight: bold;
}
/* Internal variables */
.IV
{
  color: #0000A0;
  font-style: italic;
}
/* Library functions */
.LF
{
  color: #0000A0;
}
/* Double quote string */
.DQS
{
  color: #006080;
}
/* Single quote string */
.SQS
{
  color: #0080A0;
}
/* Literal string */
.LS
{
  font-family: Lucida Console;
  color: #006080;
  background: #E0FFE0;
}
/* End of line where string is not closed */
.ES
{
  background: #E0C0E0;
}
/* Preprocessor (obsolete in 4.0) */
.P
{
  color: #8080FF;
}
/* Default span attributes */
SPAN
{
  font-family: Andale Mono;
  font-size: 8pt;
  color: #000000;
}

Regards.

-- 
--._.·´¯`·._.·´¯`·._.·´¯`·._.·´¯`·._.·´¯`·._.·´¯`·._.--
Philippe Lhoste (Paris -- France)
Professional programmer and amateur artist
http://jove.prohosting.com/~philho/
--´¯`·._.·´¯`·._.·´¯`·._.·´¯`·._.·´¯`·._.·´¯`·._.·´¯`--

Sent through GMX FreeMail - http://www.gmx.net