String Recipes |
|
This is intended to be just short snippets, not fully developed functions which would belong in a library. Please feel free to add your favourites.
str = str:gsub("^%l", string.upper)
str = str:gsub("%a", string.upper, 1)
str = str:gsub("<[^%s>]+", string.lower)
local function tchelper(first, rest) return first:upper()..rest:lower() end -- Add extra characters to the pattern if you need to. _ and ' are found in the -- middle of identifiers and English words. -- We must also put %w_' into [%w_'] to make it handle normal stuff and extra stuff the same. -- This also turns hex numbers into, eg. 0Xa7d4 str = str:gsub("(%a)([%w_']*)", tchelper)
Example:
> str = "foo"
> str = str:gsub("^%l", string.upper)
> =str
Foo
> str = "_foo"
> str = str:gsub("^%l", string.upper)
> =str
_foo
> str = str:gsub("%a", string.upper, 1)
> =str
_Foo
See also SplitJoin, MakingLuaLikePhp (explode).
-- words and numbers for word in str:gmatch("%w+") do ... end -- identifiers in typical programming languages for id in str:gmatch("[_%a][_%w]*") do ... end -- whitespace-separated components (without handling quotes) for id in str:gmatch("%S+") do ... end
for line in str:gmatch("[^\r\n]+") do ... end
Any of the above can also be done as a function iterator:
-- call func with each word in a string str:gsub("%w+", func)
If the number of fields is known:
str:match( ("([^"..sep.."]*)"..sep):rep(nsep) )
If the number of fields is not known
fields = {str:match((str:gsub("[^"..sep.."]*"..sep, "([^"..sep.."]*)"..sep)))}
Some might call the above a hack :) sep will need to be escaped if it is a pattern metacharacter,
and you'd probably be better off precomputing and/or memoizing the patterns.
Yet another method:
fields = {}
str:gsub("([^"..sep.."]*)"..sep, function(c) table.insert(fields, c) end)
This is intended for strings without newlines in them (i.e. after reflowing the text and breaking it into paragraphs.)
function wrap(str, limit, indent, indent1) indent = indent or "" indent1 = indent1 or indent limit = limit or 72 local here = 1-#indent1 return indent1..str:gsub("(%s+)()(%S+)()", function(sp, st, word, fi) if fi-here > limit then here = st - #indent return "\n"..indent..word end end) end
This builds on wrap to do a quick-and-dirty reflow: paragraphs are defined as lines starting with a space, or having a blank line between them:
function reflow(str, limit, indent, indent1) return (str:gsub("%s*\n%s+", "\n") :gsub("%s%s+", " ") :gsub("[^\n]+", function(line) return wrap(line, limit, indent, indent1) end)) end
"" == str:gsub(pat, "")
not str:gsub(pat, ""):find"%S"
Example: split a string into words, or return nil
function justWords(str) local t = {} local function helper(word) table.insert(t, word) return "" end if not str:gsub("%w+", helper):find"%S" then return t end end
"" == str:gsub(pat, function(s) return ok(s) and "" or "*" end)
sep. It calls func for each segment. When func is called, the first argument is the segment and the remaining arguments are the captures from sep, if any. On the last segment, func will be called with just one argument. (This could be used as a flag, or you could use two different functions). sep must not match the empty string. Enhancements are left as an exercise :)
func((str:gsub("(.-)("..sep..")", func)))
Example: Split a string into lines separated by either DOS or Unix line endings, creating a table out of the results.
function lines(str) local t = {} local function helper(line) table.insert(t, line) return "" end helper((str:gsub("(.-)\r?\n", helper))) return t end
-- Compatibility: Lua-5.1 function split(str, pat) local t = {} -- NOTE: use {n = 0} in Lua-5.0 local fpat = "(.-)" .. pat local last_end = 1 local s, e, cap = str:find(fpat, 1) while s do if s ~= 1 or cap ~= "" then table.insert(t,cap) end last_end = e+1 s, e, cap = str:find(fpat, last_end) end if last_end <= #str then cap = str:sub(last_end) table.insert(t, cap) end return t end
Example: Split a file path string into components.
function split_path(str) return split(str,'[\\/]+') end parts = split_path("/usr/local/bin") --> {'usr','local','bin'}
Test Cases:
split('foo/bar/baz/test','/') --> {'foo','bar','baz','test'} split('/foo/bar/baz/test','/') --> {'foo','bar','baz','test'} split('/foo/bar/baz/test/','/') --> {'foo','bar','baz','test'} split('/foo/bar//baz/test///','/') --> {'foo','bar','','baz','test','',''} split('//foo////bar/baz///test///','/+') --> {'foo','bar','baz','test'} split('foo','/+') --> {'foo'} split('','/+') --> {} split('foo','') -- opps! infinite loop!
-- Compatibility: Lua-5.0 function Split(str, delim, maxNb) -- Eliminate bad cases... if string.find(str, delim) == nil then return { str } end if maxNb == nil or maxNb < 1 then maxNb = 0 -- No limit end local result = {} local pat = "(.-)" .. delim .. "()" local nb = 0 local lastPos for part, pos in string.gfind(str, pat) do nb = nb + 1 result[nb] = part lastPos = pos if nb == maxNb then break end end -- Handle the last field if nb ~= maxNb then result[nb + 1] = string.sub(str, lastPos) end return result end
Test Cases:
ShowSplit("abc", '') --> { [1] = "", [2] = "", [3] = "", [4] = "", [5] = "" } -- No infite loop... but garbage in, garbage out... ShowSplit("", ',') --> { [1] = "" } ShowSplit("abc", ',') --> { [1] = "abc" } ShowSplit("a,b,c", ',') --> { [1] = "a", [2] = "b", [3] = "c" } ShowSplit("a,b,c,", ',') --> { [1] = "a", [2] = "b", [3] = "c", [4] = "" } ShowSplit(",a,b,c,", ',') --> { [1] = "", [2] = "a", [3] = "b", [4] = "c", [5] = "" } ShowSplit("x,,,y", ',') --> { [1] = "x", [2] = "", [3] = "", [4] = "y" } ShowSplit(",,,", ',') --> { [1] = "", [2] = "", [3] = "", [4] = "" } ShowSplit("x!yy!zzz!@", '!', 4) --> { [1] = "x", [2] = "yy", [3] = "zzz", [4] = "@" } ShowSplit("x!yy!zzz!@", '!', 3) --> { [1] = "x", [2] = "yy", [3] = "zzz" } ShowSplit("x!yy!zzz!@", '!', 1) --> { [1] = "x" } ShowSplit("a:b:i:p:u:random:garbage", ":", 5) --> { [1] = "a", [2] = "b", [3] = "i", [4] = "p", [5] = "u" } ShowSplit("hr , br ; p ,span, div", '%s*[;,]%s*') --> { [1] = "hr", [2] = "br", [3] = "p", [4] = "span", [5] = "div" }
function string.starts(String,Start) return string.sub(String,1,string.len(Start))==Start end function string.ends(String,End) return End=='' or string.sub(String,-string.len(End))==End end
(Note that you should only decode a URL string after splitting it; this allows you to correctly process quoted "?" characters in the query string or base part, for instance.)
function url_decode(str) str = string.gsub (str, "+", " ") str = string.gsub (str, "%%(%x%x)", function(h) return string.char(tonumber(h,16)) end) str = string.gsub (str, "\r\n", "\n") return str end}
function url_encode(str) if (str) str = string.gsub (str, "\n", "\r\n") str = string.gsub (str, "([^%w ])", function (c) return string.format ("%%%02X", string.byte(c)) end) str = string.gsub (str, " ", "+") end return str end
str = string.gsub (str, "%f[%a]%u+%f[%A]", string.lower)
Note the use here of the "frontier" regular expression patter %f. Without it it is hard to match on a word boundary, including where the boundary is at the start or end of the string to be matched. Try it on the string "AAA bbb CCC dddEEE FFFhhh JJJ". For more details read about the FrontierPattern.
email="alex@it-rfc.de" if (email:match("[A-Za-z0-9%.%%%+%-]+@[A-Za-z0-9%.%%%+%-]+%.%w%w%w?%w?")) then print(email .. " is a valid email address") end
Both ruby and python have a short form for string formatting, using the % operator.
The following snippet adds a similar use of the mod operator to lua :
getmetatable("").__mod = function(a, b) if not b then return a elseif type(b) == "table" then return string.format(a, unpack(b)) else return string.format(a, b) end end
Example usage:
print( "%5.2f" % math.pi ) print( "%-10.10s %04d" % { "test", 123 } )
You might like or dislike this notation, choose for yourself.