Interpolating Search

lua-users home
wiki

When searching for a value in a sorted array, the to preferring search algorithm is the binary search. In certain cases although we can be a lot faster using an interpolating search mechanism. tables optimal for a interpolating search are { 1,2,3,4,5,6,7,8,9,10 }, where as tables looking like { 1,1,1,1,1,1,1,1,1,2,10 } and searching it for the value 2 will use a alot more time than the binary search. So it depends on the scenario, but if used right one can be 3 times as fast as the binary search. I wrote an extra function for searching a reversed table, since it was the better option rather to changing all the variables and the < check.

--[[
   Interpolating Search v 0.2
   
   LUA 5.1 compatible
   
   Can only search for values of type number
   
   table.intsearch( table, value [, compval ] ), for searching a normal sorted table
   table.intsearchrev( table, value [, compval ] ), for searching a reversed sorted table
   
   If the  value is found:
      it returns a table holding all the matching indices (e.g. { startindice,endindice } )
      endindice may be the same as startindice if only one matching indice was found
   If compval is given:
      then it must be a function that takes one value and returns a second value2,
      to be compared with the searched value, e.g.:
      compvalue = function( value ) return value[1] end
   Return value:
      on success: a table holding matching indices (e.g. { startindice,endindice } )
      on failure: nil
]]--
do
   -- Avoid heap allocs for performance
   local default_fcompval = function( value ) return value end
   function table.intsearch( t,value,fcompval )
      -- Initialise functions
      local fcompval = fcompval or default_fcompval
      -- Inistialise numbers
      local ilow,ihigh = 1,#t
      -- return on empty table
      if not t[ilow] then return end
      -- get values of indices
      local _ilow,_ihigh = fcompval( t[ilow] ),fcompval( t[ihigh] )
      -- make sure slope cannot become 0
      while _ilow and _ilow < _ihigh do
         -- get interpolated position
         local pos = math.floor( (value-_ilow)*(ihigh-ilow)/(_ihigh-_ilow) ) + ilow
         -- check for out of range
         if pos < ilow or pos > ihigh then return end
         -- get compare value
         local compval = fcompval( t[pos] )
         if value == compval then
            -- insert found position
            local tfound,num = { pos,pos },pos-1
            -- get all values that match
            while value == fcompval( t[num] ) do
               tfound[1],num = num,num-1
            end
            num = pos+1
            while value == fcompval( t[num] ) do
               tfound[2],num = num,num+1
            end
            return tfound
         -- keep searching,
         -- left part of the table
         elseif value < compval then
            ihigh = pos-1
         else
            ilow = pos+1
         end
         _ilow,_ihigh = fcompval( t[ilow] ),fcompval( t[ihigh] )
      end
      if value == fcompval( t[ilow] ) then
         -- only add values --> of ilow
         local tfound,num = { ilow,ilow },ilow+1
         while value == fcompval( t[num] ) do
            tfound[2],num = num,num+1
         end
         return tfound         
      end
   end
   -- Interpolated search on a reversed table
   function table.intsearchrev( t,value,fcompval )
      -- Initialise functions
      local fcompval = fcompval or default_fcompval
      -- Inistialise numbers
      local ilow,ihigh = 1,#t
      if not t[ilow] then return end
      local _ilow,_ihigh = fcompval( t[ilow] ),fcompval( t[ihigh] )
      while _ilow and _ilow > _ihigh do
         -- get interpolated position
         local pos = math.floor( (_ihigh-value)*(ihigh-ilow)/(_ihigh-_ilow) ) + ilow
         -- check for out of range
         if pos < ilow or pos > ihigh then return end
         local compval = fcompval( t[pos] )
         if value == compval then
            -- insert found position
            local tfound,num = { pos,pos },pos-1
            -- get all values that match
            while value == fcompval( t[num] ) do
               tfound[1],num = num,num-1
            end
            num = pos+1
            while value == fcompval( t[num] ) do
               tfound[2],num = num,num+1
            end
            return tfound
            -- keep searching,
            -- left part of the table
         elseif value > compval then
            ihigh = pos-1
         else
            ilow = pos+1
         end
         _ilow,_ihigh = fcompval( t[ilow] ),fcompval( t[ihigh] )
      end
      if value == fcompval( t[ilow] ) then
         -- only add values --> of ilow
         local tfound,num = { ilow,ilow },ilow+1
         while value == fcompval( t[num] ) do
            tfound[2],num = num,num+1
         end
         return tfound         
      end
   end
end
-- CHILLCODE™
Testsuit:
-- test: table size 0
t = {}
local v = table.intsearch(t, 5); assert(v == nil)

-- test: table size 1
t = {5}
local v = table.intsearch(t, 4); assert(v == nil)
local v = table.intsearch(t, 5); assert(v[1] == 1)
local v = table.intsearch(t, 6); assert(v == nil)

-- test: table size 2
t = {4,6}
local v = table.intsearch(t, 3); assert(v == nil)
local v = table.intsearch(t, 4); assert(v[1] == 1)
local v = table.intsearch(t, 5); assert(v == nil)
local v = table.intsearch(t, 6); assert(v[1] == 2)
local v = table.intsearch(t, 7); assert(v == nil)

-- test: typical, with duplicate
t = {0,2,4,4,6,8,10}
local v = table.intsearch(t, 0); assert(v[1] == 1)
local v = table.intsearch(t, 10); assert(v[1] == 7)
local v = table.intsearch(t, 4); assert(v[1] == 3 and v[2] == 4)
local v = table.intsearch(t, 5); assert(v == nil)
local v = table.intsearch(t, 11); assert(v == nil)
local v = table.intsearch(t, -1); assert(v == nil)

-- test: identical
t = {1,1,1,1,1,1,1,1,1,1}
local v = table.intsearch(t, 1); assert(v[1] == 1 and v[2] == 10)

-- test: fcomp
t = {10,52,34,44,86,38}
local v = table.intsearch(t, 6, function(v) return v % 10 end); assert(v[1] == 5)
local v = table.intsearch(t, 4, function(v) return v % 10 end); assert(v[1] == 3 and v[2] == 4)

-- test: reverse
t = {10,8,6,4,4,2,0}
local v = table.intsearchrev(t, 6); assert(v[1] == 3)
local v = table.intsearchrev(t, 4); assert(v[1] == 4 and v[2] == 5)

print "DONE"

Interpolating Search for a String

This combines the interpolating search with the binary search to be able to search tables holding strings. This was more a test if it is actually possible, and in some cases, this method could be faster than the binary search.

--[[
   Interpolating Search on a String
   
   LUA 5.1 compatible
   
   Can only search sorted tables with value string
   
   table.intsearchstr( table, value ), for searching a normal sorted table
   table.intsearchstrrev( table, value ), for searching a reversed sorted table
   
   If the  value is found:
      it returns a table holding all the matching indices (e.g. { startindice,endindice } )
      endindice may be the same as startindice if only one matching indice was found
   Return value:
      on success: a table holding matching indices (e.g. { startindice,endindice } )
      on failure: nil
]]--
do
   -- Avoid heap allocs for performance
   local getbytevalue = function( value )
      if value then
         local num,mul = 0,1
         -- set bytedept, 4 or 5 seems appropriate
         for i = 4,1,-1 do
            local byte = string.byte( string.sub( value,i,i ) ) or -1
            byte,num,mul = byte + 1,num + mul*byte,mul*257
         end
         return num
      end
   end   
   -- Load the optimised binary function
   -- Avoid heap allocs for performance
   local fcompf = function( a,b ) return a < b end
   local fcompr = function( a,b ) return a > b end
   local function tablebinsearch( t,value,reversed,iStart,iEnd )
      -- Initialise functions
      local fcomp = reversed and fcompr or fcompf
      --  Initialise numbers
      local iStart,iEnd,iMid = iStart or 1,iEnd or #t,0
      -- Binary Search
      while iStart <= iEnd do
         -- calculate middle
         iMid = math.floor( (iStart+iEnd)/2 )
         -- get compare value
         local value2 = t[iMid]
         -- get all values that match
         if value == value2 then
            local tfound,num = { iMid,iMid },iMid - 1
            while value == t[num] do
               tfound[1],num = num,num - 1
            end
            num = iMid + 1
            while value == t[num] do
               tfound[2],num = num,num + 1
            end
            return tfound
         -- keep searching
         elseif fcomp( value,value2 ) then
            iEnd = iMid - 1
         else
            iStart = iMid + 1
         end
      end
   end
   
   -- The interpolationg Search Function on a String
   function table.intsearchstr( t,value )
      -- Inistialise numbers
      local ilow,ihigh = 1,#t
      -- return on empty table
      if not t[ilow] then return end
      -- get byte values values of indices and searched value
      local _ilow,_ihigh,_value = getbytevalue(t[ilow]),getbytevalue(t[ihigh]),getbytevalue(value)
      local oldpos = -1
      -- make sure slope cannot become 0
      while _ilow and _ilow < _ihigh do
         -- get interpolated position
         local pos = math.floor( (_value-_ilow)*(ihigh-ilow)/(_ihigh-_ilow) ) + ilow
         -- check for out of range
         if pos < ilow or pos > ihigh then return end
         -- check for real match
         if value == t[pos] then
            -- insert found position
            local tfound,num = { pos,pos },pos-1
            -- get all values that match
            while value == t[num] do
               tfound[1],num = num,num-1
            end
            num = pos+1
            while value == t[num] do
               tfound[2],num = num,num+1
            end
            return tfound
         -- keep searching,
         -- left part of the table,check for real lower
         elseif value < t[pos] then
            ihigh = pos-1
         else
            ilow = pos+1
         end
         -- check if new position is further than 1 away
         if oldpos+1 == pos or oldpos-1 == pos then
            -- start binary search
            return tablebinsearch( t,value,_,ilow,ihigh )
         end
         _ilow,_ihigh,oldpos = getbytevalue(t[ilow]),getbytevalue(t[ihigh]),pos
      end
      -- initiate binary seach function here since we could be in a flat for the interpolating search
      return tablebinsearch( t,value,_,ilow,ihigh )
   end
   -- The interpolationg Search Function on a String
   function table.intsearchstrrev( t,value )
      -- Inistialise numbers
      local ilow,ihigh = 1,#t
      -- return on empty table
      if not t[ilow] then return end
      -- get byte values values of indices and searched value
      local _ilow,_ihigh,_value = getbytevalue(t[ilow]),getbytevalue(t[ihigh]),getbytevalue(value)
      local oldpos = -1
      -- make sure slope cannot become 0
      while _ilow and _ilow > _ihigh do
         -- get interpolated position
         local pos = math.floor( (_ihigh-_value)*(ihigh-ilow)/(_ihigh-_ilow) ) + ilow
         -- check for out of range
         if pos < ilow or pos > ihigh then return end
         -- check for real match
         if value == t[pos] then
            -- insert found position
            local tfound,num = { pos,pos },pos-1
            -- get all values that match
            while value == t[num] do
               tfound[1],num = num,num-1
            end
            num = pos+1
            while value == t[num] do
               tfound[2],num = num,num+1
            end
            return tfound
         -- keep searching,
         -- left part of the table,check for real lower
         elseif value > t[pos] then
            ihigh = pos-1
         else
            ilow = pos+1
         end
         -- check if new position is further than 1 away
         if oldpos+1 == pos or oldpos-1 == pos then
            -- start binary search
            return tablebinsearch( t,value,1,ilow,ihigh )
         end
         _ilow,_ihigh,oldpos = getbytevalue(t[ilow]),getbytevalue(t[ihigh]),pos
      end
      -- initiate binary seach function here since we could be in a flat for the interpolating search
      return tablebinsearch( t,value,1,ilow,ihigh )
   end
end
-- CHILLCODE™
Test suit:
-- test: table size 0
t = { "a","a","a","a","a","a","a","a","a","a","aa","z" }
local v = table.intsearchstr(t, "aa"); assert(v[1] == 11)

-- test flat for interpolatin search function
t = { "aabb","aabbcc","aabc","aabc","aabc","aabc","aabc","aabc","aabc","aabc","aabc","aabc","aabc","aabca","aabcc" }
local v = table.intsearchstr(t, "aabca"); assert(v[1] == 14)

-- test: table size 1
t = { "a" }
local v = table.intsearchstr(t, "b"); assert(v == nil)
local v = table.intsearchstr(t, "a"); assert(v[1] == 1)
local v = table.intsearchstr(t, "c"); assert(v == nil)

-- test: table size 2
t = {"a","c"}
local v = table.intsearchstr(t, "0"); assert(v == nil)
local v = table.intsearchstr(t, "a"); assert(v[1] == 1)
local v = table.intsearchstr(t, "b"); assert(v == nil)
local v = table.intsearchstr(t, "c"); assert(v[1] == 2)
local v = table.intsearchstr(t, "d"); assert(v == nil)

-- test: typical, with duplicate
t = {"a","b","c","c","d","e","f"}
local v = table.intsearchstr(t, "a"); assert(v[1] == 1)
local v = table.intsearchstr(t, "f"); assert(v[1] == 7)
local v = table.intsearchstr(t, "c"); assert(v[1] == 3 and v[2] == 4)
local v = table.intsearchstr(t, "da"); assert(v == nil)
local v = table.intsearchstr(t, "fa"); assert(v == nil)
local v = table.intsearchstr(t, "0"); assert(v == nil)

-- test: identical
t = {"a","a","a","a","a","a","a","a","a","a"}
local v = table.intsearchstr(t, "a"); assert(v[1] == 1 and v[2] == 10)

-- test: reverse
t = {"f","e","d","c","c","b","a"}
local v = table.intsearchstrrev(t, "d"); assert(v[1] == 3)
local v = table.intsearchstrrev(t, "c"); assert(v[1] == 4 and v[2] == 5)

print "DONE"

See Also


RecentChanges · preferences
edit · history
Last edited June 10, 2007 2:59 pm GMT (diff)