Scite Unicode Input

lua-users home
wiki

This script replaces the hexadecimal unicode codepoint before the cursor with its utf-8 encoding. If the script is hocked up with the Ctrl+U keyboard combination, then typing 2200 Ctrl+U is replaced with ∀.

This script is also provided as a GPLv2 project on sourceforge: http://sourceforge.net/projects/emitunicodeinscite/

Enjoy!


-- DESCRIPTION:
-- 
-- This lua script adds utf8 unicode input, to the scite text editor.
-- 
-- The scite text editor should be set to use the UTF-8 encoding
-- , because this script adds utf8, into the text buffer of the
-- scite editor. Select File->Encoding->UTF-8, from the
-- menu bar of scite.
-- 
-- For example, it will be possible that you type 2200 CTRL+U
-- , and 2200 is replaced to ∀; (U+2200), in the scite editor.
-- 
-- ______________________________________________________________________________
-- 
-- INSTALL:
-- 
-- To have scite running this script each time you press Ctrl+U, add next lines
-- into your ~/SciTEUser.properties file, where ~ is your home directory.
-- FILE ~/SciTEUser.properties:
--[[
ext.lua.startup.script=$(SciteUserHome)/emitUtf8UnicodeIntoTheSciteEditor.lua
command.name.12.*=Emit UTF8 Unicode
command.subsystem.12.*=3
command.12.*=emitUtf8UnicodeIntoTheSciteEditor
command.mode.12.*=savebefore:no
command.shortcut.12.*=Ctrl+U
--]]
-- ______________________________________________________________________________
-- THE LUA CODE:
-- 
-- Next is the definition of the lua function that is called by scite
-- when CTRL+U is pressed, to replace unicode endpoint encoding, with
-- utf8 encoding of the unicode endpoint.
-- ______________________________________________________________________________


-- Computes the utf8 encoding for a unicode codepoint u
-- , when 0 <= u <= 0x7f
-- 
-- @param unicodeValue the unicode codepoint u
-- 
-- @return the utf8 encoding of the unicode codepoint u
function case1UnicodeToUtf8(unicodeValue)
  --print('case 1')
  local u = unicodeValue
  local byte0 = (u % 0x80)
  local utf8 = string.char(byte0)
  return utf8
end

-- ______________________________________________________________________________
-- Computes the utf8 encoding for a unicode codepoint u
-- , when 0x80 <= u <= 0x7ff
-- 
-- @param unicodeValue the unicode codepoint u
-- 
-- @return the utf8 encoding of the unicode codepoint u
function case2UnicodeToUtf8(unicodeValue)
  --print('case 2')
  local u = unicodeValue
  local byte1 = (0x80 + (u % 0x40) )
  u = math.floor(u / 0x40)
  local byte0 = (0xc0 + (u % 0x20) )
  local utf8 = string.char(byte0, byte1)
  return utf8
end

-- ______________________________________________________________________________
-- Computes the utf8 encoding for a unicode codepoint u
-- , when 0x800 <= u <= 0xffff.
-- 
-- @param unicodeValue the unicode codepoint u
-- 
-- @return the utf8 encoding of the unicode codepoint u
function case3UnicodeToUtf8(unicodeValue)
  local u = unicodeValue
  local byte2 = (0x80 + (u % 0x40))
  -- print('byte2: '..byte2)
  u = math.floor(u / 0x40)
  local byte1 = (0x80 + (u % 0x40))
  -- print('byte1: '..byte1)
  u = math.floor(u / 0x40)
  local byte0 = (0xe0 + (u % 0x10))
  -- print('byte0: '..byte0)
  local utf8 = string.char(byte0, byte1, byte2)
  return utf8
end

-- ______________________________________________________________________________
-- Computes the utf8 encoding for a unicode codepoint u
-- , when 0x10000 <= u <= 0x10ffff.
-- 
-- @param unicodeValue the unicode codepoint u
-- 
-- @return the utf8 encoding of the unicode codepoint u
function case4UnicodeToUtf8(unicodeValue)
  local u = unicodeValue
  local byte3 = (0x80 + (u % 0x40))
  u = math.floor(u / 0x40)
  local byte2 = (0x80 + (u % 0x40))
  u = math.floor(u / 0x40)
  local byte1 = (0x80 + (u % 0x40))
  u = math.floor(u / 0x40)
  local byte0 = (0xf0 + (u % 0x8))
  local utf8 = string.char(byte0, byte1, byte2, byte3)
  return utf8
end

-- ______________________________________________________________________________
-- Converts a unicode integer value, into a utf8 string value.
-- 
-- The unicode integer value is an integer that
-- is greater than or equal to zero.
-- 
-- The utf8 string value is a string that is a sequence of
-- 8 bits characters that give the utf8 encoding of the
-- unicode codepoint given by the unicode integer value.
-- 
-- @param unicodeValue the unicode integer value;
-- a unicode codepoint
-- 
-- @return the utf8 encoding of the unicode codepoint
-- provided by the unicodeValue input argument
function unicodeToUtf8(unicodeValue)
  local u = unicodeValue
  if ((0x800 <= u) and (0xffff >= u))
  then
    return case3UnicodeToUtf8(u)
  end
  if ((0x80 <= u) and (0x7fff >= u))
  then
    return case2UnicodeToUtf8(u)
  end
  if ((0x0 <= u) and (0x7f >= u))
  then
    return case1UnicodeToUtf8(u)
  end
  if( (0x10000 <= u) and (0x10ffff >= u) )
  then
    return case4UnicodeToUtf8(u)
  end
  return nil
end

-- ______________________________________________________________________________
-- Peeks (reads) the character at position i, in the Scite Editor.
-- If the character is the ascii name of a hex digit, it returns
-- the corresponding hex digit, otherwise it returns nil.
-- 
-- @param i position in the Scite Editor
-- @return hex digit at position i, or nil
function peekHexdigit(i)
  local e = editor
  local asciiCode = e.CharAt[i]
  if((0>asciiCode) or (0xff < asciiCode))
  then
    return nil
  end
  local charValue = string.char(asciiCode)
  local hexDigit = tonumber(charValue,0x10)
  return hexDigit -- may be nil
end

-- ______________________________________________________________________________
-- Reads the sequence of maximum length at most 5, at the left of the cursor
-- in the Scite Editor.
-- Encodes the longest suffix of this sequence, that is a hex number, into
-- the utf encoding of this hex number.
-- Replaces this longest suffix, with the utf8 sequence.
-- 
-- @return true a suffix of length greater than zero, at most 5 existed
-- and was replaced with the utf8 encoding of the number it
-- represented
-- 
-- false , when no such suffix existed
function emitUtf8Unicode()
  local e = editor
  local n = e.TextLength
  local i = e.CurrentPos
  local maxlen = 5
  if ((0 == n) or (1 > i))
  then
    return nil -- Success. No request
  end
  local len = 1
  local len2 = 0
  local u = 0
  local thePower = 1
  while (     (len <= maxlen)
          and (0 <= (i - len) )
        )
  do
    local hexDigit = peekHexdigit(i-len,u)
    if (nil == hexDigit)
    then
      break -- out of the while loop
    end
    u = ( u + (thePower * hexDigit) )
    thePower = (0x10 * thePower )
    len2 = len
    --print("u: "..u)
    len = len + 1
  end
  if (0 == len2)
  then
    return nil -- Failure. No unicode
  end
  utf8 = unicodeToUtf8(u)
  if(nil == utf8)
  then
    return nil -- Failure. Unicode to utf8 conversion failed.
  end
  e:SetSel(i-len2,i)
  e:ReplaceSel(utf8)
  --print("utf8: "..utf8)
  return true -- Success.
end

-- ______________________________________________________________________________
-- Emits utf8 encoding in the place of the unicode codepoint
-- in the editor, at the left of the cursor.
-- 
-- Writes a message to the Output pane, if no codepoint existed
-- at the left of the cursor.
-- 
function emitUtf8UnicodeIntoTheSciteEditor()
  local ok = emitUtf8Unicode()
  if not ok
  then
    print("Failed to encode unicode into text editor.")
  end
end

-- ______________________________________________________________________________
-- 
-- Following web pages were useful in writing the lua scite script.
-- 
-- http://lua-users.org/wiki/UsingLuaWithScite
-- http://www.scintilla.org/PaneAPI.html
-- http://www.lua.org/manual/5.1/manual.html#pdf-tonumber
-- https://en.wikipedia.org/wiki/UTF-8
-- 
-- http://lua-users.org/lists/lua-l/2007-08/msg00276.html
-- http://keplerproject.github.io/luadoc/ 


RecentChanges · preferences
edit · history
Last edited August 8, 2013 4:26 pm GMT (diff)