lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


As promised earlier today, here is a SipHash patch for Lua 5.4.0, which replaces its default hash compression code with the fast and secure SipHash algorithm. This patch will cause Lua 5.4.0 to generate two harmless warnings while compiling lstring.c.

Here is my benchmark: No difference in speed.

SipHash compile			Stock Lua 5.4.0
real    0m23.038s		real    0m22.653s
user    0m22.414s		user    0m22.034s
sys     0m0.619s		sys     0m0.617s

real    0m23.491s		real    0m24.177s
user    0m22.859s		user    0m23.289s
sys     0m0.595s		sys     0m0.598s

real    0m23.375s		real    0m23.262s
user    0m22.803s		user    0m22.693s
sys     0m0.569s		sys     0m0.566s

To replicate this benchmark:

git clone https://github.com/Samboy/covid-19-html/
cd covid-19-html
git clone https://github.com/nytimes/covid-19-data/
cp covid-19-data/us-counties.csv data.csv
time lua examine-growth.lua benchmark

Note that this patch does not make an attempt to generate a random key for SipHash. The way to do so is implementation dependent, but *usually* can be done with /dev/random (I posted a Lua script earlier today which uses /dev/random to make a SipHash.h file with a random key).

The patch is public domain. Both this patch and a similar patch for Lua 5.1.5 are available here:

https://github.com/samboy/LUAstuff/

There’s also some Lua code there for doing stuff like sorted table iterations, splitting lines with regular expressions or comma separated data, copying files, etc. -- this is my collection of “batteries” for Lua so I can use it to solve my day to day computing problems.

-- Sam


--- lua-5.4.0/src/SipHash.h.orig	2020-08-08 19:22:44.720096348 -0700
+++ lua-5.4.0/src/SipHash.h	2020-08-08 19:22:40.433125570 -0700
@@ -0,0 +1,3 @@
+#include <stdint.h>
+uint64_t sipKey1 = 0xded6cbc72f7eeb4fULL;
+uint64_t sipKey2 = 0x81875fe84b1705d7ULL;
--- lua-5.4.0/src/lstring.c.orig	2020-08-08 20:18:16.897392042 -0700
+++ lua-5.4.0/src/lstring.c	2020-08-08 20:19:56.381714440 -0700
@@ -20,6 +20,7 @@
 #include "lobject.h"
 #include "lstate.h"
 #include "lstring.h"
+#include "SipHash.h"
 
 
 /*
@@ -52,9 +53,62 @@
 
 unsigned int luaS_hash (const char *str, size_t l, unsigned int seed,
                         size_t step) {
-  unsigned int h = seed ^ cast_uint(l);
-  for (; l >= step; l -= step)
-    h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1]));
+  unsigned int h;
+  uint64_t v0, v1, v2, v3, m;
+  int shift = 0, round = 0;
+  size_t offset = 0;
+
+  // We calculate the hash via SipHash, for security reasons
+  v0 = sipKey1 ^ 0x736f6d6570736575ULL;
+  v1 = sipKey2 ^ 0x646f72616e646f6dULL;
+  v2 = sipKey1 ^ 0x6c7967656e657261ULL;
+  v3 = sipKey2 ^ 0x7465646279746573ULL;
+  m = 0;
+  while(offset <= l) {
+    if(offset < l) {
+      m |= (uint64_t)(str[offset] & 0xff) << shift;  
+      shift += 8;
+    }
+    while(shift >= 64 || offset == l) { // "while" to avoid goto
+      if(offset == l && shift != 64) {
+        m |= (uint64_t)(l & 0xff) << 56;
+        offset++;
+      }
+      shift = 0;
+      v3 ^= m;
+      for(round = 0; round < 2; round++) {
+        v0 += v1; v2 += v3;
+        v1 = (v1 << 13) | (v1 >> 51);
+        v3 = (v3 << 16) | (v3 >> 48);
+        v1 ^= v0; v3 ^= v2;
+        v0 = (v0 << 32) | (v0 >> 32);
+        v2 += v1; v0 += v3;
+        v1 = (v1 << 17) | (v1 >> 47);
+        v3 = (v3 << 21) | (v3 >> 43);
+        v1 ^= v2; v3 ^= v0;
+        v2 = (v2 << 32) | (v2 >> 32);
+      }
+      v0 ^= m;
+      shift = 0;
+      m = 0;
+    }
+    offset++;
+  }   
+  v2 ^= 255;
+  for(round = 0; round < 4; round++) {
+    v0 += v1; v2 += v3;
+    v1 = (v1 << 13) | (v1 >> 51);
+    v3 = (v3 << 16) | (v3 >> 48);
+    v1 ^= v0; v3 ^= v2;
+    v0 = (v0 << 32) | (v0 >> 32);
+    v2 += v1; v0 += v3;
+    v1 = (v1 << 17) | (v1 >> 47);
+    v3 = (v3 << 21) | (v3 >> 43);
+    v1 ^= v2; v3 ^= v0;
+    v2 = (v2 << 32) | (v2 >> 32);
+  }
+  m =  v0 ^ v1 ^ v2 ^ v3;
+  h = (unsigned int)m;
   return h;
 }