lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


On Wed, Jan 04, 2012 at 05:43:36PM -0500, Sean Conner wrote:
<snip>
> good    8.36
> bad     6.32
> 
> Note:  this on a quad-core 2.8GHz Pentium system.  Also note that the "good
> hash" covers 2,000,000 distinct strings, while the "bad hash" covers only
> 20,000 (one hundred times less strings).  The "good hash" function generates
> a random, 1024 byte string (not strictly text---it's 1024 bytes of random
> data).  The "bad hash" function generates a similarly random 1024 bytes, but
> the bytes that comprise the hash (from lstring.c:80) are identical from
> string to string.  One last note:  This was coded for Lua 5.1.

I suspect your generation is buggy, because I get something very different.

% ./genkeys -n 16 | /usr/bin/time ./collide
        0.20 real         0.17 user         0.01 sys
% ./genkeys -n 16 -c | /usr/bin/time ./collide
       68.78 real        68.46 user         0.15 sys

That's just for 2^16 keys. Here's my code in two files:

1) Lua code to read and insert keys

#!/usr/bin/env lua

local key
local hash = {}

for key in function () return io.read("*l") end do
	hash[key] = true
end



2) genkeys.c generates keys. do ./genkeys -h to see options

/*
 * make genkeys CFLAGS="-Wall -pedantic"
 *
 * Should work on both Linux and *BSD.
 *
 * `./genkeys -h` to see usage.
 *
 */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/param.h>
#include <unistd.h>


static unsigned hash(const void *_src, size_t len) {
	const unsigned char *src = _src;
	unsigned step = (len >> 5)+1;
	unsigned h = len;
	unsigned l1;

	for (l1 = len; l1 >= step; l1-=step) {
		h = h ^ ((h<<5) + (h>>2) + src[l1-1]);
	}

	return h;
} /* hash() */


static void frepc(int ch, int count, FILE *fp) {
	while (count-- > 0)
		fputc(ch, fp);
} /* frepc() */


static void fnybc(unsigned n, int i, FILE *fp) {
	fputc("0123456789abcdef"[0x0f & (n >> (i * 4))], fp);
} /* fnybc() */


static void genkey(unsigned ctr, _Bool collide, FILE *fp) {
	int i;

	frepc('=', 20, fp);

	for (i = 7; i >= 0; i--) {
		fputc(' ', fp);
		fnybc(ctr, i, fp);
	}

	if (collide)
		fputc(' ', fp);
} /* genkey() */


struct buffer {
	FILE *fp;
	unsigned char *p, *pe, data[256];
}; /* struct buffer */

static void bufreset(struct buffer *buf) {
	rewind(buf->fp);
	buf->p = buf->data;
	buf->pe = &buf->data[sizeof buf->data];
} /* bufreset() */

#if !defined(__linux)
static int bufcopy(void *_buf, const char *src, int len) {
	struct buffer *buf = _buf;
	int lim = MIN(len, (buf->pe - buf->p));

	memcpy(buf->p, src, lim);
	buf->p += lim;

	return len;
} /* bufcopy() */
#endif /* !defined(__linux) */

static void bufinit(struct buffer *buf) {
	memset(buf, 0, sizeof *buf);
#if __linux
	buf->fp = fmemopen(buf->data, sizeof buf->data - 1, "w");
#else
	buf->fp = fwopen(buf, &bufcopy);
#endif
	setbuf(buf->fp, NULL);
} /* bufinit() */

static size_t buflen(struct buffer *buf) {
#if __linux
	return (size_t)ftell(buf->fp);
#else
	return buf->p - buf->data;
#endif
} /* buflen() */


#define USAGE \
	"%s -cn:vh\n" \
	"  -c    generate collisions\n" \
	"  -n E  generate 2^E keys\n" \
	"  -v    be verbose\n" \
	"  -h    print this usage message\n" \
	"\n" \
	"Report bugs to <william@25thandClement.com>\n"

int main(int argc, char *argv[]) {
	extern char *optarg;
	_Bool collide = 0, verbose = 0;
	struct buffer buf;
	unsigned ctr, max = 1U<<20;
	int optc;

	while (-1 != (optc = getopt(argc, argv, "cn:vh"))) {
		switch (optc) {
		case 'c':
			collide = 1;
			break;
		case 'n':
			max = 1U<<strtoul(optarg, NULL, 0);
			break;
		case 'v':
			verbose = 1;
			break;
		case 'h':
			fprintf(stdout, USAGE, argv[0]);

			return 0;
		case '?':
			fprintf(stderr, USAGE, argv[0]);

			return EXIT_FAILURE;
		} /* switch() */
	} /* while() */

	if (verbose) {
		bufinit(&buf);
		fprintf(stderr, "generating %u keys\n", max);
	}

	for (ctr = 0; ctr < max; ctr++) {
		genkey(ctr, collide, stdout);
		fputc('\n', stdout);

		if (verbose) {
			bufreset(&buf);
			genkey(ctr, collide, buf.fp);
			fprintf(stderr, "%s: %u\n", (char *)&buf.data, hash(buf.data, buflen(&buf)));
		}
	}

	return 0;
} /* main() */