[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Struct library patch WAS: Roberto's struct library missing "size" function?
- From: Flemming Madsen <lua@...>
- Date: Thu, 15 Oct 2009 09:50:49 +0200
I recently had to do some serious binary manipulations. The 'struct' library came in very handy, but fell short in a couple of case this is addressed with the attached patch. (Which btw. also adds the ability to report sizes)
- c0 replaced with c<nothing>. Means you can build the format string programatically without treating zero length strings as a special case.
- Can handle 'long long' integers (i8 / I8)
- Can insert/specify padding anywhere in a struct. ('X' eg. when a string is following a union)
- Can report current offset in both pack and unpack ('=')
- Can mask out return values when you only want to calculate sizes or unmarshal pascal-style strings. '(' & ')'
- Handles doubles with swapped hi/lo words (Seen on ARM platform)
- Can unpack arbitrary userdata,length datums (like alien version of struct)
** Valid formats:
** > - big endian
** < - little endian
** ![num] - alignment
** x[num] - pad num bytes, default 1
** X[num] - pad to num align, default MAXALIGN
** b/B - signed/unsigned byte
** h/H - signed/unsigned short
** l/L - signed/unsigned long
** i/I[num] - signed/unsigned integer with size `n' (default is size of int)
** c[num] - sequence of `num' chars (from/to a string); when packing, num
absent means the whole string; when unpacking, num absent means
use the previous read number as the string length.
** s - zero terminated string
** f - float
** d - double
** ' ' - ignored
** '(' ')' - stop assigning items. ')' start assigning (padding when packing)
** '=' - return current position / offset
/Flemming
--- struct.orig 2009-10-15 08:30:59.000000000 +0200
+++ struct.c 2009-10-15 09:45:31.000000000 +0200
@@ -1,37 +1,43 @@
-
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <string.h>
+#include <stdbool.h>
#include "lua.h"
#include "lauxlib.h"
+#ifndef LLONG_MAX
+#define LLONG_MAX 9223372036854775807LL
+#endif
/*
** {======================================================
** Library for packing/unpacking structures.
-** $Id: struct.c,v 1.2 2008/04/18 20:06:01 roberto Exp $
+** $Id: struct.c 20405 2009-10-15 07:06:45Z fm $
** =======================================================
*/
/*
** Valid formats:
-** > - big endian
-** < - little endian
-** ![num] - alignment
-** x - pading
-** b/B - signed/unsigned byte
-** h/H - signed/unsigned short
-** l/L - signed/unsigned long
-** i/In - signed/unsigned integer with size `n' (default is size of int)
-** cn - sequence of `n' chars (from/to a string); when packing, n==0 means
- the whole string; when unpacking, n==0 means use the previous
- read number as the string length
-** s - zero-terminated string
-** f - float
-** d - doulbe
-** ' ' - ignored
+** > - big endian
+** < - little endian
+** ![num] - alignment
+** x[num] - pad num bytes, default 1
+** X[num] - pad to num align, default MAXALIGN
+** b/B - signed/unsigned byte
+** h/H - signed/unsigned short
+** l/L - signed/unsigned long
+** i/I[num] - signed/unsigned integer with size `n' (default is size of int)
+** c[num] - sequence of `num' chars (from/to a string); when packing, num
+ absent means the whole string; when unpacking, num absent means
+ use the previous read number as the string length.
+** s - zero terminated string
+** f - float
+** d - double
+** ' ' - ignored
+** '(' ')' - stop assigning items. ')' start assigning (padding when packing)
+** '=' - return current position / offset
*/
@@ -59,10 +65,18 @@
char endian;
} const native = {1};
+static union dblswap {
+ long long dummy;
+ double dbl;
+ long l[2];
+} const swaptest = {0x00000000c0000000};
+
typedef struct Header {
int endian;
int align;
+ bool noassign;
+ bool dblswap;
} Header;
@@ -79,7 +93,8 @@
}
-#define defaultoptions(h) ((h)->endian = native.endian, (h)->align = 1)
+#define defaultoptions(h) ((h)->endian = native.endian, (h)->align = 1, (h)->noassign = 0, \
+ (h)->dblswap = false)
@@ -90,9 +105,18 @@
case 'L': case 'l': return sizeof(long);
case 'f': return sizeof(float);
case 'd': return sizeof(double);
- case 'x': return 1;
- case 'c': return getnum(fmt, 1);
- case 's': case ' ': case '<': case '>': case '!': return 0;
+ case 'x': return getnum(fmt, 1);
+ case 'X': return getnum(fmt, MAXALIGN);
+ case 'c': return getnum(fmt, 0);
+ case 's':
+ case ' ':
+ case '<':
+ case '>':
+ case '(':
+ case ')':
+ case '!':
+ case '=':
+ return 0;
case 'i': case 'I': {
int sz = getnum(fmt, sizeof(int));
if (!isp2(sz))
@@ -108,7 +132,7 @@
static int gettoalign (size_t len, Header *h, int opt, size_t size) {
- if (size == 0 || opt == 'c') return 0;
+ if (size == 0 || opt == 'c' || opt == 's') return 0;
if (size > (size_t)h->align) size = h->align; /* respect max. alignment */
return (size - (len & (size - 1))) & (size - 1);
}
@@ -117,8 +141,10 @@
static void commoncases (lua_State *L, int opt, const char **fmt, Header *h) {
switch (opt) {
case ' ': return; /* ignore white spaces */
- case '>': h->endian = BIG; return;
- case '<': h->endian = LITTLE; return;
+ case '>': h->endian = BIG; (h)->dblswap = (swaptest.dbl == -2); return;
+ case '<': h->endian = LITTLE; (h)->dblswap = (swaptest.dbl == -2); return;
+ case '(': h->noassign = true; return;
+ case ')': h->noassign = false; return;
case '!': {
int a = getnum(fmt, MAXALIGN);
if (!isp2(a))
@@ -134,11 +160,11 @@
static void putinteger (lua_State *L, luaL_Buffer *b, int arg, int endian,
int size) {
lua_Number n = luaL_checknumber(L, arg);
- unsigned long value;
- if (n < (lua_Number)LONG_MAX)
- value = (long)n;
+ unsigned long long value;
+ if (n < (lua_Number)LLONG_MAX)
+ value = (long long)n;
else
- value = (unsigned long)n;
+ value = (unsigned long long)n;
if (endian == LITTLE) {
int i;
for (i = 0; i < size; i++)
@@ -168,6 +194,8 @@
luaL_Buffer b;
const char *fmt = luaL_checkstring(L, 1);
Header h;
+ int poscnt = 0;
+ int posBuf[10];
int arg = 2;
size_t totalsize = 0;
defaultoptions(&h);
@@ -179,14 +207,19 @@
int toalign = gettoalign(totalsize, &h, opt, size);
totalsize += toalign;
while (toalign-- > 0) luaL_putchar(&b, '\0');
+ if (opt == 'X')
+ size = 0;
+ if (h.noassign && size)
+ opt = 'x';
switch (opt) {
case 'b': case 'B': case 'h': case 'H':
case 'l': case 'L': case 'i': case 'I': { /* integer types */
putinteger(L, &b, arg++, h.endian, size);
break;
}
- case 'x': {
- luaL_putchar(&b, '\0');
+ case 'x': case 'X': {
+ size_t l = size;
+ while (l-- > 0) luaL_putchar(&b, '\0');
break;
}
case 'f': {
@@ -196,15 +229,21 @@
break;
}
case 'd': {
- double d = luaL_checknumber(L, arg++);
+ union dblswap d;
+ d.dbl = luaL_checknumber(L, arg++);
correctbytes((char *)&d, size, h.endian);
+ if (h.dblswap) {
+ long tmp = d.l[0];
+ d.l[0] = d.l[1];
+ d.l[1] = tmp;
+ }
luaL_addlstring(&b, (char *)&d, size);
break;
}
case 'c': case 's': {
size_t l;
const char *s = luaL_checklstring(L, arg++, &l);
- if (size == 0) size = l;
+ if (opt == 's' || (opt == 'c' && fmt[-1] == 'c')) size = l;
luaL_argcheck(L, l >= (size_t)size, arg, "string too short");
luaL_addlstring(&b, s, size);
if (opt == 's') {
@@ -213,12 +252,19 @@
}
break;
}
+ case '=': {
+ if (poscnt < sizeof(posBuf)/sizeof(posBuf[0]))
+ posBuf[poscnt++] = totalsize + 1;
+ break;
+ }
default: commoncases(L, opt, &fmt, &h);
}
totalsize += size;
}
luaL_pushresult(&b);
- return 1;
+ for (arg = 0; arg < poscnt; arg++)
+ lua_pushinteger(L, posBuf[arg]);
+ return poscnt + 1;
}
@@ -250,8 +296,24 @@
Header h;
const char *fmt = luaL_checkstring(L, 1);
size_t ld;
- const char *data = luaL_checklstring(L, 2, &ld);
- size_t pos = luaL_optinteger(L, 3, 1) - 1;
+ const char *data;
+ size_t pos;
+ lua_Number lastnum = 0;
+
+ void pushnumber(lua_Number n) {
+ lastnum = n;
+ if (h.noassign) return;
+ lua_pushnumber(L, n);
+ }
+
+ if (lua_isuserdata(L, 2)) {
+ data = (const char*)lua_touserdata(L, 2);
+ ld = (size_t)luaL_checkinteger(L, 3);
+ pos = luaL_optinteger(L, 4, 1) - 1;
+ } else {
+ data = luaL_checklstring(L, 2, &ld);
+ pos = luaL_optinteger(L, 3, 1) - 1;
+ }
defaultoptions(&h);
lua_settop(L, 2);
while (*fmt) {
@@ -259,40 +321,45 @@
size_t size = optsize(L, opt, &fmt);
pos += gettoalign(pos, &h, opt, size);
luaL_argcheck(L, pos+size <= ld, 2, "data string too short");
+ if (opt == 'X')
+ size = 0;
switch (opt) {
case 'b': case 'B': case 'h': case 'H':
case 'l': case 'L': case 'i': case 'I': { /* integer types */
int issigned = islower(opt);
lua_Number res = getinteger(data+pos, h.endian, issigned, size);
- lua_pushnumber(L, res);
+ pushnumber(res);
break;
}
- case 'x': {
+ case 'x': case 'X': {
break;
}
case 'f': {
float f;
memcpy(&f, data+pos, size);
correctbytes((char *)&f, sizeof(f), h.endian);
- lua_pushnumber(L, f);
+ pushnumber(f);
break;
}
case 'd': {
- double d;
+ union dblswap d;
memcpy(&d, data+pos, size);
correctbytes((char *)&d, sizeof(d), h.endian);
- lua_pushnumber(L, d);
+ if (h.dblswap) {
+ long tmp = d.l[0];
+ d.l[0] = d.l[1];
+ d.l[1] = tmp;
+ }
+ pushnumber(d.dbl);
break;
}
case 'c': {
- if (size == 0) {
- if (!lua_isnumber(L, -1))
- luaL_error(L, "format `c0' needs a previous size");
- size = lua_tonumber(L, -1);
- lua_pop(L, 1);
+ if (fmt[-1] == 'c') {
+ size = lastnum;
luaL_argcheck(L, pos+size <= ld, 2, "data string too short");
}
- lua_pushlstring(L, data+pos, size);
+ if (!h.noassign)
+ lua_pushlstring(L, data+pos, size);
break;
}
case 's': {
@@ -300,7 +367,12 @@
if (e == NULL)
luaL_error(L, "unfinished string in data");
size = (e - (data+pos)) + 1;
- lua_pushlstring(L, data+pos, size - 1);
+ if (!h.noassign)
+ lua_pushlstring(L, data+pos, size - 1);
+ break;
+ }
+ case '=': {
+ lua_pushinteger(L, pos + 1);
break;
}
default: commoncases(L, opt, &fmt, &h);