[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Re: LUA fails to parse expression with big and nested lists.
- From: Andrey Dobrovolsky <ken@...>
- Date: Thu, 5 Oct 2023 20:44:19 +0300
Greetings!
I've faced the case of inability to parse the big and nested tables.
Unfortunately only 5 nesting levels may be parsed for sure, which seems
to be too little for so wonderful language as Lua is :-)
I've made some attempts to make the limit of guaranteed nesting levels a
little bit higher.
Really LFIELDS_PER_FLUSH constant defines the constant window of slots
accessible for the certain level fields. I've tried to make this window
proportional to an amount of free register slots (fs->freeregs). Such an
approach allows to get 29 guaranteed nesting levels to be parsed for
sure (real life cases may handle higher nesting levels). Not a big
achievement but looks a little bit better and may satisfy much more
demands for structured data parsing with the help of plain Lua.
The main change is adding to ConsControl structure the
"fields_per_flush" field, initialize it during constructor() invocation
and use it further in closelistfield() instead of the constant
LFIELDS_PER_FLUSH. The idea is that decreasing free registers window
cause decreasing of the field flush buffer.
Patches for Lua-5.4.6 are attached. Basic tests passing, some of full
tests fails (3 of them) but the same as the untrimmed Lua do.
Changes in the code size are:
readelf -s liblua.a | grep constructor
59: 000032b0 670 FUNC LOCAL DEFAULT 7 constructor
readelf -s liblua.a | grep constructor
59: 000032b0 674 FUNC LOCAL DEFAULT 7 constructor
readelf -s liblua.a | grep luaK_setlist
162: 00003110 132 FUNC GLOBAL INTERNAL 8 luaK_setlist
readelf -s liblua.a | grep luaK_setlist
162: 00003110 148 FUNC GLOBAL INTERNAL 8 luaK_setlist
gcc --version
gcc (GCC) 12.2.0
uname -a
Linux box 6.1.2-tinycore #612 SMP Mon Jan 2 16:44:20 UTC 2023 i686
GNU/Linux
Best regards!
Andrey Dobrovolsky
--- lcode.c.orig 2023-05-02 23:02:28.000000000 +0300
+++ lcode.c 2023-10-05 17:14:05.857087016 +0300
@@ -31,10 +31,6 @@
#include "lvm.h"
-/* Maximum number of registers in a Lua function (must fit in 8 bits) */
-#define MAXREGS 255
-
-
#define hasjumps(e) ((e)->t != (e)->f)
@@ -1804,8 +1800,11 @@
** 'tostore' is number of values (in registers 'base + 1',...) to add to
** table (or LUA_MULTRET to add up to stack top).
*/
-void luaK_setlist (FuncState *fs, int base, int nelems, int tostore) {
- lua_assert(tostore != 0 && tostore <= LFIELDS_PER_FLUSH);
+void luaK_setlist (FuncState *fs, ConsControl *cc, int tostore) {
+ int base = cc->t->u.info;
+ int nelems = cc->na;
+
+ lua_assert(tostore != 0 && tostore <= cc->fields_per_flush);
if (tostore == LUA_MULTRET)
tostore = 0;
if (nelems <= MAXARG_C)
--- lcode.h.orig 2023-05-02 23:02:29.000000000 +0300
+++ lcode.h 2023-10-05 17:15:22.973335908 +0300
@@ -59,6 +59,10 @@
#define luaK_jumpto(fs,t) luaK_patchlist(fs, luaK_jump(fs), t)
+/* Maximum number of registers in a Lua function (must fit in 8 bits) */
+#define MAXREGS 255
+
+
LUAI_FUNC int luaK_code (FuncState *fs, Instruction i);
LUAI_FUNC int luaK_codeABx (FuncState *fs, OpCode o, int A, unsigned int Bx);
LUAI_FUNC int luaK_codeAsBx (FuncState *fs, OpCode o, int A, int Bx);
@@ -96,7 +100,7 @@
expdesc *v2, int line);
LUAI_FUNC void luaK_settablesize (FuncState *fs, int pc,
int ra, int asize, int hsize);
-LUAI_FUNC void luaK_setlist (FuncState *fs, int base, int nelems, int tostore);
+LUAI_FUNC void luaK_setlist (FuncState *fs, ConsControl *cc, int tostore);
LUAI_FUNC void luaK_finish (FuncState *fs);
LUAI_FUNC l_noret luaK_semerror (LexState *ls, const char *msg);
--- lopcodes.h.orig 2023-05-02 23:02:29.000000000 +0300
+++ lopcodes.h 2023-10-05 17:31:34.561405697 +0300
@@ -398,8 +398,4 @@
#define opmode(mm,ot,it,t,a,m) \
(((mm) << 7) | ((ot) << 6) | ((it) << 5) | ((t) << 4) | ((a) << 3) | (m))
-
-/* number of list items to accumulate before a SETLIST instruction */
-#define LFIELDS_PER_FLUSH 50
-
#endif
--- lparser.c.orig 2023-05-02 23:02:30.000000000 +0300
+++ lparser.c 2023-10-05 19:47:08.060676082 +0300
@@ -835,15 +835,6 @@
*/
-typedef struct ConsControl {
- expdesc v; /* last list item read */
- expdesc *t; /* table descriptor */
- int nh; /* total number of 'record' elements */
- int na; /* number of array elements already stored */
- int tostore; /* number of array elements pending to be stored */
-} ConsControl;
-
-
static void recfield (LexState *ls, ConsControl *cc) {
/* recfield -> (NAME | '['exp']') = exp */
FuncState *fs = ls->fs;
@@ -869,8 +860,8 @@
if (cc->v.k == VVOID) return; /* there is no list item */
luaK_exp2nextreg(fs, &cc->v);
cc->v.k = VVOID;
- if (cc->tostore == LFIELDS_PER_FLUSH) {
- luaK_setlist(fs, cc->t->u.info, cc->na, cc->tostore); /* flush */
+ if (cc->tostore == cc->fields_per_flush) {
+ luaK_setlist(fs, cc, cc->tostore); /* flush */
cc->na += cc->tostore;
cc->tostore = 0; /* no more items pending */
}
@@ -881,13 +872,13 @@
if (cc->tostore == 0) return;
if (hasmultret(cc->v.k)) {
luaK_setmultret(fs, &cc->v);
- luaK_setlist(fs, cc->t->u.info, cc->na, LUA_MULTRET);
+ luaK_setlist(fs, cc, LUA_MULTRET);
cc->na--; /* do not count last expression (unknown number of elements) */
}
else {
if (cc->v.k != VVOID)
luaK_exp2nextreg(fs, &cc->v);
- luaK_setlist(fs, cc->t->u.info, cc->na, cc->tostore);
+ luaK_setlist(fs, cc, cc->tostore);
}
cc->na += cc->tostore;
}
@@ -922,6 +913,11 @@
}
+#define FLUSH_PART_DIVIDER 8
+
+/* 2 <= (MAXREGS + 1) */
+
+
static void constructor (LexState *ls, expdesc *t) {
/* constructor -> '{' [ field { sep field } [sep] ] '}'
sep -> ',' | ';' */
@@ -934,6 +930,7 @@
cc.t = t;
init_exp(t, VNONRELOC, fs->freereg); /* table will be at stack top */
luaK_reserveregs(fs, 1);
+ cc.fields_per_flush = ((MAXREGS - fs->freereg) / FLUSH_PART_DIVIDER) + 1;
init_exp(&cc.v, VVOID, 0); /* no value (yet) */
checknext(ls, '{');
do {
--- lparser.h.orig 2023-05-02 23:02:30.000000000 +0300
+++ lparser.h 2023-10-05 17:31:24.398127423 +0300
@@ -163,6 +163,16 @@
} FuncState;
+typedef struct ConsControl {
+ expdesc v; /* last list item read */
+ expdesc *t; /* table descriptor */
+ int nh; /* total number of 'record' elements */
+ int na; /* number of array elements already stored */
+ int tostore; /* number of array elements pending to be stored */
+ int fields_per_flush; /* number of list items to accumulate before a SETLIST instruction */
+} ConsControl;
+
+
LUAI_FUNC int luaY_nvarstack (FuncState *fs);
LUAI_FUNC LClosure *luaY_parser (lua_State *L, ZIO *z, Mbuffer *buff,
Dyndata *dyd, const char *name, int firstchar);