[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Red-Black Tree Patch (Re: Hash Table Collisions (n.runs-SA-2011.004))
- From: David Kolf <kolf@...>
- Date: Wed, 04 Jan 2012 12:22:27 +0100
My patch to handle hash table collisions seems to run now. My red-black
tree implementation is based on
<http://www.eternallyconfuzzled.com/tuts/datastructures/jsw_tut_rbtree.aspx>.
My version isn't the nicest implementation yet, but it seems to work.
Use it at your own risk.
Since the server where my old webpage was hosted is down, I hope the Lua
mailing list (and the archive) will keep the two attachments. One is the
patch for Lua-5.1.4, the other one is for Lua-5.2.0.
Should I publish my test case, too, or would that be too much help for
script kiddies? (It is trivial, but anyway...)
Best regards,
David
Index: src/lgc.c
==================================================================
--- src/lgc.c
+++ src/lgc.c
@@ -634,10 +634,103 @@
}
}
}
}
+/* RB-Tree added by DHK. See note in lstring.c. */
+static void removestr (lua_State *L, GCObject *o) {
+ TString *ts = rawgco2ts(o);
+ GCObject **tree = &G(L)->strt.trees[lmod(ts->tsv.hash, G(L)->strt.size)];
+ GCObject head, *root = *tree, *n, *q, *p, *g, *pn;
+ int dir = 1;
+ const char *str = getstr(ts);
+ size_t l = ts->tsv.len;
+
+ if (!root) return;
+ head.ts.tsv.isred = 0;
+ head.ts.tsv.child[0] = NULL;
+ head.ts.tsv.child[1] = root;
+ q = &head; g = p = n = pn = NULL;
+
+ while (q->ts.tsv.child[dir]) {
+ int last = dir;
+ g = p;
+ p = q;
+ q = q->ts.tsv.child[dir];
+ if (l < q->ts.tsv.len) dir = 0;
+ else if (l > q->ts.tsv.len) dir = 1;
+ else dir = memcmp(str, getstr(&q->ts), l * sizeof(char)) > 0;
+
+ if (q == o) {
+ n = q; pn = p;
+ }
+
+ if (!isred (q) && !isred (q->ts.tsv.child[dir])) {
+ if (isred (q->ts.tsv.child[!dir])) {
+ p = p->ts.tsv.child[last] = luaS_rottree (L, q, dir);
+ } else {
+ GCObject *s = p->ts.tsv.child[!last];
+ if (s) {
+ if (!isred (s->ts.tsv.child[0]) && !isred (s->ts.tsv.child[1])) {
+ p->ts.tsv.isred = 0;
+ s->ts.tsv.isred = 1;
+ q->ts.tsv.isred = 1;
+ } else {
+ int dir2 = g->ts.tsv.child[1] == p;
+ if (isred (s->ts.tsv.child[last])) {
+ p->ts.tsv.child[!last] = luaS_rottree (L, p->ts.tsv.child[!last], !last);
+ g->ts.tsv.child[dir2] = luaS_rottree (L, p, last);
+ } else if (isred (s->ts.tsv.child[!last])) {
+ g->ts.tsv.child[dir2] = luaS_rottree (L, p, last);
+ }
+ q->ts.tsv.isred = g->ts.tsv.child[dir2]->ts.tsv.isred = 1;
+ g->ts.tsv.child[dir2]->ts.tsv.child[0]->ts.tsv.isred = 0;
+ g->ts.tsv.child[dir2]->ts.tsv.child[1]->ts.tsv.isred = 0;
+ }
+ }
+ }
+ }
+ }
+ if (n) {
+ GCObject *t;
+ t = q->ts.tsv.child[q->ts.tsv.child[0] == NULL];
+ /* move q into the position of n */
+ if (pn->ts.tsv.child[0] != n && pn->ts.tsv.child[1] != n) {
+ GCObject *c = head.ts.tsv.child[1];
+ /* pn isn't the parent of n anymore -- find it again */
+ pn = &head;
+ while (c != n) {
+ pn = c;
+ if (l < c->ts.tsv.len) dir = 0;
+ else if (l > c->ts.tsv.len) dir = 1;
+ else dir = memcmp(str, getstr(&c->ts), l * sizeof(char)) > 0;
+ c = c->ts.tsv.child[dir];
+ }
+ }
+ if (n == p) {
+ /* n is the direct parent of q */
+ q->ts.tsv.child[0] = q != n->ts.tsv.child[0] ? (n->ts.tsv.child[0]) : t;
+ q->ts.tsv.child[1] = q != n->ts.tsv.child[1] ? (n->ts.tsv.child[1]) : t;
+ q->ts.tsv.isred = n->ts.tsv.isred;
+ if (pn) pn->ts.tsv.child[pn->ts.tsv.child[1] == n] = q;
+ } else {
+ /* other nodes are between n and q */
+ if (n != q) {
+ q->ts.tsv.child[0] = n->ts.tsv.child[0];
+ q->ts.tsv.child[1] = n->ts.tsv.child[1];
+ q->ts.tsv.isred = n->ts.tsv.isred;
+ if (pn) pn->ts.tsv.child[pn->ts.tsv.child[1] == n] = q;
+ }
+ p->ts.tsv.child[p->ts.tsv.child[1] == q] = t;
+ }
+ luaM_freemem(L, n, sizestring(gco2ts(n)));
+ }
+
+ *tree = head.ts.tsv.child[1];
+ if (*tree != NULL)
+ (*tree)->ts.tsv.isred = 0;
+}
static void freeobj (lua_State *L, GCObject *o) {
switch (gch(o)->tt) {
case LUA_TPROTO: luaF_freeproto(L, gco2p(o)); break;
case LUA_TFUNCTION: luaF_freeclosure(L, gco2cl(o)); break;
@@ -645,11 +738,11 @@
case LUA_TTABLE: luaH_free(L, gco2t(o)); break;
case LUA_TTHREAD: luaE_freethread(L, gco2th(o)); break;
case LUA_TUSERDATA: luaM_freemem(L, o, sizeudata(gco2u(o))); break;
case LUA_TSTRING: {
G(L)->strt.nuse--;
- luaM_freemem(L, o, sizestring(gco2ts(o)));
+ removestr(L, o);
break;
}
default: lua_assert(0);
}
}
Index: src/lobject.h
==================================================================
--- src/lobject.h
+++ src/lobject.h
@@ -402,10 +402,13 @@
struct {
CommonHeader;
lu_byte reserved;
unsigned int hash;
size_t len; /* number of characters in string */
+ /* added by DHK: store the strings in a red-black tree */
+ GCObject *child[2];
+ int isred;
} tsv;
} TString;
/* get the actual string (array of bytes) from a TString */
Index: src/lstate.c
==================================================================
--- src/lstate.c
+++ src/lstate.c
@@ -247,10 +247,11 @@
g->gcrunning = 0; /* no GC while building state */
g->lastmajormem = 0;
g->strt.size = 0;
g->strt.nuse = 0;
g->strt.hash = NULL;
+ g->strt.trees = NULL; /* added by DHK */
setnilvalue(&g->l_registry);
luaZ_initbuffer(L, &g->buff);
g->panic = NULL;
g->version = lua_version(NULL);
g->gcstate = GCSpause;
Index: src/lstate.h
==================================================================
--- src/lstate.h
+++ src/lstate.h
@@ -56,10 +56,11 @@
#define KGC_GEN 2 /* generational collection */
typedef struct stringtable {
GCObject **hash;
+ GCObject **trees; /* added by DHK */
lu_int32 nuse; /* number of elements */
int size;
} stringtable;
Index: src/lstring.c
==================================================================
--- src/lstring.c
+++ src/lstring.c
@@ -15,38 +15,111 @@
#include "lmem.h"
#include "lobject.h"
#include "lstate.h"
#include "lstring.h"
+/* Red-Black Trees for strings added by David Heiko Kolf to prevent an attack
+ on hash tables described in n.runs-SA-2011.004.
+ This implementation is based on
+ http://www.eternallyconfuzzled.com/tuts/datastructures/jsw_tut_rbtree.aspx.
+ Modifications in other parts of Lua marked using "DHK".
+ */
+
+GCObject *luaS_rottree(lua_State *L, GCObject *n, int dir)
+{
+ GCObject *s = n->ts.tsv.child[!dir];
+ n->ts.tsv.child[!dir] = s->ts.tsv.child[dir];
+ s->ts.tsv.child[dir] = n;
+ n->ts.tsv.isred = 1;
+ s->ts.tsv.isred = 0;
+ (void) L; /* unused */
+ return s;
+}
+static void insertstr(lua_State *L, GCObject **tree, GCObject *o)
+{
+ /* insert the string in a red-black tree */
+ GCObject *root = *tree;
+ GCObject *q, *p, *g, *t;
+ GCObject head;
+ int dir = 0, last = 0;
+ const char *str = getstr(&o->ts);
+ size_t l = o->ts.tsv.len;
+ o->ts.tsv.child[0] = o->ts.tsv.child[1] = NULL;
+ if (!root) {
+ *tree = o;
+ o->ts.tsv.isred = 0;
+ return;
+ }
+ /* put a dummy object in front of the root to avoid special cases */
+ head.ts.tsv.isred = 0;
+ head.ts.tsv.child[0] = NULL;
+ head.ts.tsv.child[1] = root;
+ t = &head; q = root; g = p = NULL;
+ for (;;) {
+ if (q == NULL) {
+ p->ts.tsv.child[dir] = q = o;
+ o->ts.tsv.isred = 1;
+ } else if (isred (q->ts.tsv.child[0]) && isred (q->ts.tsv.child[1])) {
+ q->ts.tsv.isred = 1;
+ q->ts.tsv.child[0]->ts.tsv.isred = 0;
+ q->ts.tsv.child[1]->ts.tsv.isred = 0;
+ }
+ if (isred (q) && isred (p)) {
+ int pdir = t->ts.tsv.child[1] == g;
+ if (q == p->ts.tsv.child[last]) {
+ t->ts.tsv.child[pdir] = luaS_rottree (L, g, !last);
+ } else {
+ g->ts.tsv.child[last] = luaS_rottree (L, g->ts.tsv.child[last], last);
+ t->ts.tsv.child[pdir] = luaS_rottree (L, g, !last);
+ }
+ }
+ if (q == o) break;
+ last = dir;
+ if (l < q->ts.tsv.len) dir = 0;
+ else if (l > q->ts.tsv.len) dir = 1;
+ else dir = memcmp(str, getstr(&q->ts), l * sizeof(char)) > 0;
+ if (g != NULL) t = g;
+ g = p;
+ p = q;
+ q = q->ts.tsv.child[dir];
+ }
+ *tree = head.ts.tsv.child[1];
+ (*tree)->ts.tsv.isred = 0;
+}
void luaS_resize (lua_State *L, int newsize) {
int i;
stringtable *tb = &G(L)->strt;
/* cannot resize while GC is traversing strings */
luaC_runtilstate(L, ~bitmask(GCSsweepstring));
if (newsize > tb->size) {
luaM_reallocvector(L, tb->hash, tb->size, newsize, GCObject *);
for (i = tb->size; i < newsize; i++) tb->hash[i] = NULL;
+ luaM_reallocvector(L, tb->trees, tb->size, newsize, GCObject *);
+ for (i = tb->size; i < newsize; i++) tb->trees[i] = NULL;
}
/* rehash */
for (i=0; i<tb->size; i++) {
GCObject *p = tb->hash[i];
tb->hash[i] = NULL;
+ tb->trees[i] = NULL;
while (p) { /* for each node in the list */
GCObject *next = gch(p)->next; /* save next */
unsigned int h = lmod(gco2ts(p)->hash, newsize); /* new position */
gch(p)->next = tb->hash[h]; /* chain it */
tb->hash[h] = p;
resetoldbit(p); /* see MOVE OLD rule */
+ insertstr(L, &tb->trees[h], p);
p = next;
}
}
if (newsize < tb->size) {
/* shrinking slice must be empty */
lua_assert(tb->hash[newsize] == NULL && tb->hash[tb->size - 1] == NULL);
luaM_reallocvector(L, tb->hash, tb->size, newsize, GCObject *);
+ luaM_reallocvector(L, tb->trees, tb->size, newsize, GCObject *);
}
tb->size = newsize;
}
@@ -53,24 +126,27 @@
static TString *newlstr (lua_State *L, const char *str, size_t l,
unsigned int h) {
size_t totalsize; /* total size of TString object */
GCObject **list; /* (pointer to) list where it will be inserted */
TString *ts;
+ size_t i;
stringtable *tb = &G(L)->strt;
if (l+1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
luaM_toobig(L);
if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
luaS_resize(L, tb->size*2); /* too crowded */
totalsize = sizeof(TString) + ((l + 1) * sizeof(char));
- list = &tb->hash[lmod(h, tb->size)];
+ i = lmod(h, tb->size);
+ list = &tb->hash[i];
ts = &luaC_newobj(L, LUA_TSTRING, totalsize, list, 0)->ts;
ts->tsv.len = l;
ts->tsv.hash = h;
ts->tsv.reserved = 0;
memcpy(ts+1, str, l*sizeof(char));
((char *)(ts+1))[l] = '\0'; /* ending 0 */
tb->nuse++;
+ insertstr(L, &tb->trees[i], cast(GCObject *, ts));
return ts;
}
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
@@ -78,20 +154,23 @@
unsigned int h = cast(unsigned int, l); /* seed */
size_t step = (l>>5)+1; /* if string is too long, don't hash all its chars */
size_t l1;
for (l1=l; l1>=step; l1-=step) /* compute hash */
h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
- for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
- o != NULL;
- o = gch(o)->next) {
+ for (o = G(L)->strt.trees[lmod(h, G(L)->strt.size)];
+ o != NULL; ) { /* binary tree search by DHK */
TString *ts = rawgco2ts(o);
- if (h == ts->tsv.hash &&
- ts->tsv.len == l &&
- (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
+ int c ;
+ if (l < ts->tsv.len) c = -1;
+ else if (l > ts->tsv.len) c = 1;
+ else c = memcmp(str, getstr(ts), l * sizeof(char));
+ if (c == 0) {
if (isdead(G(L), o)) /* string is dead (but was not collected yet)? */
changewhite(o); /* resurrect it */
return ts;
+ } else {
+ o = ts->tsv.child[c > 0];
}
}
return newlstr(L, str, l, h); /* not found; create a new string */
}
Index: src/lstring.h
==================================================================
--- src/lstring.h
+++ src/lstring.h
@@ -31,7 +31,10 @@
LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, Table *e);
LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l);
LUAI_FUNC TString *luaS_new (lua_State *L, const char *str);
+/* Added by DHK: */
+#define isred(o) ((o) && (o)->ts.tsv.isred)
+LUAI_FUNC GCObject *luaS_rottree(lua_State *L, GCObject *n, int dir);
#endif
Index: src/ltable.c
==================================================================
--- src/ltable.c
+++ src/ltable.c
@@ -61,10 +61,14 @@
#define hashmod(t,n) (gnode(t, ((n) % ((sizenode(t)-1)|1))))
#define hashpointer(t,p) hashmod(t, IntPoint(p))
+/* Changed by DHK: hash the pointer, in case the strings were chosen by an
+ attacker to cause lots of hash collisions. */
+#undef hashstr
+#define hashstr hashpointer
#define dummynode (&dummynode_)
#define isdummy(n) ((n) == dummynode)
Index: src/lgc.c
==================================================================
--- src/lgc.c
+++ src/lgc.c
@@ -372,10 +372,103 @@
}
l = h->gclist;
}
}
+/* RB-Tree added by DHK. See note in lstring.c. */
+static void removestr (lua_State *L, GCObject *o) {
+ TString *ts = rawgco2ts(o);
+ GCObject **tree = &G(L)->strt.trees[lmod(ts->tsv.hash, G(L)->strt.size)];
+ GCObject head, *root = *tree, *n, *q, *p, *g, *pn;
+ int dir = 1;
+ const char *str = getstr(ts);
+ size_t l = ts->tsv.len;
+
+ if (!root) return;
+ head.ts.tsv.isred = 0;
+ head.ts.tsv.child[0] = NULL;
+ head.ts.tsv.child[1] = root;
+ q = &head; g = p = n = pn = NULL;
+
+ while (q->ts.tsv.child[dir]) {
+ int last = dir;
+ g = p;
+ p = q;
+ q = q->ts.tsv.child[dir];
+ if (l < q->ts.tsv.len) dir = 0;
+ else if (l > q->ts.tsv.len) dir = 1;
+ else dir = memcmp(str, getstr(&q->ts), l * sizeof(char)) > 0;
+
+ if (q == o) {
+ n = q; pn = p;
+ }
+
+ if (!isred (q) && !isred (q->ts.tsv.child[dir])) {
+ if (isred (q->ts.tsv.child[!dir])) {
+ p = p->ts.tsv.child[last] = luaS_rottree (L, q, dir);
+ } else {
+ GCObject *s = p->ts.tsv.child[!last];
+ if (s) {
+ if (!isred (s->ts.tsv.child[0]) && !isred (s->ts.tsv.child[1])) {
+ p->ts.tsv.isred = 0;
+ s->ts.tsv.isred = 1;
+ q->ts.tsv.isred = 1;
+ } else {
+ int dir2 = g->ts.tsv.child[1] == p;
+ if (isred (s->ts.tsv.child[last])) {
+ p->ts.tsv.child[!last] = luaS_rottree (L, p->ts.tsv.child[!last], !last);
+ g->ts.tsv.child[dir2] = luaS_rottree (L, p, last);
+ } else if (isred (s->ts.tsv.child[!last])) {
+ g->ts.tsv.child[dir2] = luaS_rottree (L, p, last);
+ }
+ q->ts.tsv.isred = g->ts.tsv.child[dir2]->ts.tsv.isred = 1;
+ g->ts.tsv.child[dir2]->ts.tsv.child[0]->ts.tsv.isred = 0;
+ g->ts.tsv.child[dir2]->ts.tsv.child[1]->ts.tsv.isred = 0;
+ }
+ }
+ }
+ }
+ }
+ if (n) {
+ GCObject *t;
+ t = q->ts.tsv.child[q->ts.tsv.child[0] == NULL];
+ /* move q into the position of n */
+ if (pn->ts.tsv.child[0] != n && pn->ts.tsv.child[1] != n) {
+ GCObject *c = head.ts.tsv.child[1];
+ /* pn isn't the parent of n anymore -- find it again */
+ pn = &head;
+ while (c != n) {
+ pn = c;
+ if (l < c->ts.tsv.len) dir = 0;
+ else if (l > c->ts.tsv.len) dir = 1;
+ else dir = memcmp(str, getstr(&c->ts), l * sizeof(char)) > 0;
+ c = c->ts.tsv.child[dir];
+ }
+ }
+ if (n == p) {
+ /* n is the direct parent of q */
+ q->ts.tsv.child[0] = q != n->ts.tsv.child[0] ? (n->ts.tsv.child[0]) : t;
+ q->ts.tsv.child[1] = q != n->ts.tsv.child[1] ? (n->ts.tsv.child[1]) : t;
+ q->ts.tsv.isred = n->ts.tsv.isred;
+ if (pn) pn->ts.tsv.child[pn->ts.tsv.child[1] == n] = q;
+ } else {
+ /* other nodes are between n and q */
+ if (n != q) {
+ q->ts.tsv.child[0] = n->ts.tsv.child[0];
+ q->ts.tsv.child[1] = n->ts.tsv.child[1];
+ q->ts.tsv.isred = n->ts.tsv.isred;
+ if (pn) pn->ts.tsv.child[pn->ts.tsv.child[1] == n] = q;
+ }
+ p->ts.tsv.child[p->ts.tsv.child[1] == q] = t;
+ }
+ luaM_freemem(L, n, sizestring(gco2ts(n)));
+ }
+
+ *tree = head.ts.tsv.child[1];
+ if (*tree != NULL)
+ (*tree)->ts.tsv.isred = 0;
+}
static void freeobj (lua_State *L, GCObject *o) {
switch (o->gch.tt) {
case LUA_TPROTO: luaF_freeproto(L, gco2p(o)); break;
case LUA_TFUNCTION: luaF_freeclosure(L, gco2cl(o)); break;
@@ -386,11 +479,11 @@
luaE_freethread(L, gco2th(o));
break;
}
case LUA_TSTRING: {
G(L)->strt.nuse--;
- luaM_freemem(L, o, sizestring(gco2ts(o)));
+ removestr(L, o);
break;
}
case LUA_TUSERDATA: {
luaM_freemem(L, o, sizeudata(gco2u(o)));
break;
Index: src/lobject.h
==================================================================
--- src/lobject.h
+++ src/lobject.h
@@ -201,10 +201,13 @@
struct {
CommonHeader;
lu_byte reserved;
unsigned int hash;
size_t len;
+ /* added by DHK: store the strings in a red-black tree */
+ GCObject *child[2];
+ int isred;
} tsv;
} TString;
#define getstr(ts) cast(const char *, (ts) + 1)
Index: src/lstate.c
==================================================================
--- src/lstate.c
+++ src/lstate.c
@@ -161,10 +161,11 @@
g->uvhead.u.l.next = &g->uvhead;
g->GCthreshold = 0; /* mark it as unfinished state */
g->strt.size = 0;
g->strt.nuse = 0;
g->strt.hash = NULL;
+ g->strt.trees = NULL; /* added by DHK */
setnilvalue(registry(L));
luaZ_initbuffer(L, &g->buff);
g->panic = NULL;
g->gcstate = GCSpause;
g->rootgc = obj2gco(L);
Index: src/lstate.h
==================================================================
--- src/lstate.h
+++ src/lstate.h
@@ -35,10 +35,11 @@
typedef struct stringtable {
GCObject **hash;
+ GCObject **trees; /* added by DHK */
lu_int32 nuse; /* number of elements */
int size;
} stringtable;
Index: src/lstring.c
==================================================================
--- src/lstring.c
+++ src/lstring.c
@@ -15,21 +15,92 @@
#include "lmem.h"
#include "lobject.h"
#include "lstate.h"
#include "lstring.h"
+/* Red-Black Trees for strings added by David Heiko Kolf to prevent an attack
+ on hash tables described in n.runs-SA-2011.004.
+ This implementation is based on
+ http://www.eternallyconfuzzled.com/tuts/datastructures/jsw_tut_rbtree.aspx.
+ Modifications in other parts of Lua marked using "DHK".
+ */
+
+GCObject *luaS_rottree(lua_State *L, GCObject *n, int dir)
+{
+ GCObject *s = n->ts.tsv.child[!dir];
+ n->ts.tsv.child[!dir] = s->ts.tsv.child[dir];
+ s->ts.tsv.child[dir] = n;
+ n->ts.tsv.isred = 1;
+ s->ts.tsv.isred = 0;
+ (void) L; /* unused */
+ return s;
+}
+static void insertstr(lua_State *L, GCObject **tree, GCObject *o)
+{
+ /* insert the string in a red-black tree */
+ GCObject *root = *tree;
+ GCObject *q, *p, *g, *t;
+ GCObject head;
+ int dir = 0, last = 0;
+ const char *str = getstr(&o->ts);
+ size_t l = o->ts.tsv.len;
+ o->ts.tsv.child[0] = o->ts.tsv.child[1] = NULL;
+ if (!root) {
+ *tree = o;
+ o->ts.tsv.isred = 0;
+ return;
+ }
+ /* put a dummy object in front of the root to avoid special cases */
+ head.ts.tsv.isred = 0;
+ head.ts.tsv.child[0] = NULL;
+ head.ts.tsv.child[1] = root;
+ t = &head; q = root; g = p = NULL;
+ for (;;) {
+ if (q == NULL) {
+ p->ts.tsv.child[dir] = q = o;
+ o->ts.tsv.isred = 1;
+ } else if (isred (q->ts.tsv.child[0]) && isred (q->ts.tsv.child[1])) {
+ q->ts.tsv.isred = 1;
+ q->ts.tsv.child[0]->ts.tsv.isred = 0;
+ q->ts.tsv.child[1]->ts.tsv.isred = 0;
+ }
+ if (isred (q) && isred (p)) {
+ int pdir = t->ts.tsv.child[1] == g;
+ if (q == p->ts.tsv.child[last]) {
+ t->ts.tsv.child[pdir] = luaS_rottree (L, g, !last);
+ } else {
+ g->ts.tsv.child[last] = luaS_rottree (L, g->ts.tsv.child[last], last);
+ t->ts.tsv.child[pdir] = luaS_rottree (L, g, !last);
+ }
+ }
+ if (q == o) break;
+ last = dir;
+ if (l < q->ts.tsv.len) dir = 0;
+ else if (l > q->ts.tsv.len) dir = 1;
+ else dir = memcmp(str, getstr(&q->ts), l * sizeof(char)) > 0;
+ if (g != NULL) t = g;
+ g = p;
+ p = q;
+ q = q->ts.tsv.child[dir];
+ }
+ *tree = head.ts.tsv.child[1];
+ (*tree)->ts.tsv.isred = 0;
+}
void luaS_resize (lua_State *L, int newsize) {
GCObject **newhash;
+ GCObject **newtrees; /* added by DHK */
stringtable *tb;
int i;
if (G(L)->gcstate == GCSsweepstring)
return; /* cannot resize during GC traverse */
newhash = luaM_newvector(L, newsize, GCObject *);
+ newtrees = luaM_newvector(L, newsize, GCObject *);
tb = &G(L)->strt;
for (i=0; i<newsize; i++) newhash[i] = NULL;
+ for (i=0; i<newsize; i++) newtrees[i] = NULL;
/* rehash */
for (i=0; i<tb->size; i++) {
GCObject *p = tb->hash[i];
while (p) { /* for each node in the list */
GCObject *next = p->gch.next; /* save next */
@@ -36,16 +107,19 @@
unsigned int h = gco2ts(p)->hash;
int h1 = lmod(h, newsize); /* new position */
lua_assert(cast_int(h%newsize) == lmod(h, newsize));
p->gch.next = newhash[h1]; /* chain it */
newhash[h1] = p;
+ insertstr(L, &newtrees[h1], p);
p = next;
}
}
luaM_freearray(L, tb->hash, tb->size, TString *);
+ luaM_freearray(L, tb->trees, tb->size, TString *);
tb->size = newsize;
tb->hash = newhash;
+ tb->trees = newtrees;
}
static TString *newlstr (lua_State *L, const char *str, size_t l,
unsigned int h) {
@@ -64,10 +138,11 @@
tb = &G(L)->strt;
h = lmod(h, tb->size);
ts->tsv.next = tb->hash[h]; /* chain new entry */
tb->hash[h] = obj2gco(ts);
tb->nuse++;
+ insertstr(L, &tb->trees[h], cast(GCObject *, ts));
if (tb->nuse > cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
luaS_resize(L, tb->size*2); /* too crowded */
return ts;
}
@@ -77,18 +152,23 @@
unsigned int h = cast(unsigned int, l); /* seed */
size_t step = (l>>5)+1; /* if string is too long, don't hash all its chars */
size_t l1;
for (l1=l; l1>=step; l1-=step) /* compute hash */
h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
- for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
- o != NULL;
- o = o->gch.next) {
+ for (o = G(L)->strt.trees[lmod(h, G(L)->strt.size)];
+ o != NULL; ) { /* binary tree search by DHK */
TString *ts = rawgco2ts(o);
- if (ts->tsv.len == l && (memcmp(str, getstr(ts), l) == 0)) {
+ int c ;
+ if (l < ts->tsv.len) c = -1;
+ else if (l > ts->tsv.len) c = 1;
+ else c = memcmp(str, getstr(ts), l * sizeof(char));
+ if (c == 0) {
/* string may be dead */
if (isdead(G(L), o)) changewhite(o);
return ts;
+ } else {
+ o = ts->tsv.child[c > 0];
}
}
return newlstr(L, str, l, h); /* not found */
}
Index: src/lstring.h
==================================================================
--- src/lstring.h
+++ src/lstring.h
@@ -25,7 +25,10 @@
LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, Table *e);
LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l);
+/* Added by DHK: */
+#define isred(o) ((o) && (o)->ts.tsv.isred)
+LUAI_FUNC GCObject *luaS_rottree(lua_State *L, GCObject *n, int dir);
#endif
Index: src/ltable.c
==================================================================
--- src/ltable.c
+++ src/ltable.c
@@ -60,10 +60,14 @@
#define hashmod(t,n) (gnode(t, ((n) % ((sizenode(t)-1)|1))))
#define hashpointer(t,p) hashmod(t, IntPoint(p))
+/* Changed by DHK: hash the pointer, in case the strings were chosen by an
+ attacker to cause lots of hash collisions. */
+#undef hashstr
+#define hashstr hashpointer
/*
** number of ints inside a lua_Number
*/
#define numints cast_int(sizeof(lua_Number)/sizeof(int))