lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Greetings!
I've faced the case of inability to parse the big and nested tables. Unfortunately only 5 nesting levels may be parsed for sure, which seems to be too little for so wonderful language as Lua is :-) I've made some attempts to make the limit of guaranteed nesting levels a little bit higher.

Really LFIELDS_PER_FLUSH constant defines the constant window of slots accessible for the certain level fields. I've tried to make this window proportional to an amount of free register slots (fs->freeregs). Such an approach allows to get 29 guaranteed nesting levels to be parsed for sure (real life cases may handle higher nesting levels). Not a big achievement but looks a little bit better and may satisfy much more demands for structured data parsing with the help of plain Lua.

The main change is adding to ConsControl structure the "fields_per_flush" field, initialize it during constructor() invocation and use it further in closelistfield() instead of the constant LFIELDS_PER_FLUSH. The idea is that decreasing free registers window cause decreasing of the field flush buffer.

Patches for Lua-5.4.6 are attached. Basic tests passing, some of full tests fails (3 of them) but the same as the untrimmed Lua do.

Changes in the code size are:

readelf -s liblua.a | grep constructor
    59: 000032b0   670 FUNC    LOCAL  DEFAULT    7 constructor
 readelf -s liblua.a | grep constructor
    59: 000032b0   674 FUNC    LOCAL  DEFAULT    7 constructor


readelf -s liblua.a | grep luaK_setlist
   162: 00003110   132 FUNC    GLOBAL INTERNAL    8 luaK_setlist
 readelf -s liblua.a | grep luaK_setlist
   162: 00003110   148 FUNC    GLOBAL INTERNAL    8 luaK_setlist

gcc --version
gcc (GCC) 12.2.0

 uname -a
Linux box 6.1.2-tinycore #612 SMP Mon Jan 2 16:44:20 UTC 2023 i686 GNU/Linux

Best regards!

Andrey Dobrovolsky

--- lcode.c.orig	2023-05-02 23:02:28.000000000 +0300
+++ lcode.c	2023-10-05 17:14:05.857087016 +0300
@@ -31,10 +31,6 @@
 #include "lvm.h"
 
 
-/* Maximum number of registers in a Lua function (must fit in 8 bits) */
-#define MAXREGS		255
-
-
 #define hasjumps(e)	((e)->t != (e)->f)
 
 
@@ -1804,8 +1800,11 @@
 ** 'tostore' is number of values (in registers 'base + 1',...) to add to
 ** table (or LUA_MULTRET to add up to stack top).
 */
-void luaK_setlist (FuncState *fs, int base, int nelems, int tostore) {
-  lua_assert(tostore != 0 && tostore <= LFIELDS_PER_FLUSH);
+void luaK_setlist (FuncState *fs, ConsControl *cc, int tostore) {
+  int base = cc->t->u.info;
+  int nelems = cc->na;
+
+  lua_assert(tostore != 0 && tostore <= cc->fields_per_flush);
   if (tostore == LUA_MULTRET)
     tostore = 0;
   if (nelems <= MAXARG_C)
--- lcode.h.orig	2023-05-02 23:02:29.000000000 +0300
+++ lcode.h	2023-10-05 17:15:22.973335908 +0300
@@ -59,6 +59,10 @@
 
 #define luaK_jumpto(fs,t)	luaK_patchlist(fs, luaK_jump(fs), t)
 
+/* Maximum number of registers in a Lua function (must fit in 8 bits) */
+#define MAXREGS		255
+
+
 LUAI_FUNC int luaK_code (FuncState *fs, Instruction i);
 LUAI_FUNC int luaK_codeABx (FuncState *fs, OpCode o, int A, unsigned int Bx);
 LUAI_FUNC int luaK_codeAsBx (FuncState *fs, OpCode o, int A, int Bx);
@@ -96,7 +100,7 @@
                             expdesc *v2, int line);
 LUAI_FUNC void luaK_settablesize (FuncState *fs, int pc,
                                   int ra, int asize, int hsize);
-LUAI_FUNC void luaK_setlist (FuncState *fs, int base, int nelems, int tostore);
+LUAI_FUNC void luaK_setlist (FuncState *fs, ConsControl *cc, int tostore);
 LUAI_FUNC void luaK_finish (FuncState *fs);
 LUAI_FUNC l_noret luaK_semerror (LexState *ls, const char *msg);
 
--- lopcodes.h.orig	2023-05-02 23:02:29.000000000 +0300
+++ lopcodes.h	2023-10-05 17:31:34.561405697 +0300
@@ -398,8 +398,4 @@
 #define opmode(mm,ot,it,t,a,m)  \
     (((mm) << 7) | ((ot) << 6) | ((it) << 5) | ((t) << 4) | ((a) << 3) | (m))
 
-
-/* number of list items to accumulate before a SETLIST instruction */
-#define LFIELDS_PER_FLUSH	50
-
 #endif
--- lparser.c.orig	2023-05-02 23:02:30.000000000 +0300
+++ lparser.c	2023-10-05 19:47:08.060676082 +0300
@@ -835,15 +835,6 @@
 */
 
 
-typedef struct ConsControl {
-  expdesc v;  /* last list item read */
-  expdesc *t;  /* table descriptor */
-  int nh;  /* total number of 'record' elements */
-  int na;  /* number of array elements already stored */
-  int tostore;  /* number of array elements pending to be stored */
-} ConsControl;
-
-
 static void recfield (LexState *ls, ConsControl *cc) {
   /* recfield -> (NAME | '['exp']') = exp */
   FuncState *fs = ls->fs;
@@ -869,8 +860,8 @@
   if (cc->v.k == VVOID) return;  /* there is no list item */
   luaK_exp2nextreg(fs, &cc->v);
   cc->v.k = VVOID;
-  if (cc->tostore == LFIELDS_PER_FLUSH) {
-    luaK_setlist(fs, cc->t->u.info, cc->na, cc->tostore);  /* flush */
+  if (cc->tostore == cc->fields_per_flush) {
+    luaK_setlist(fs, cc, cc->tostore);  /* flush */
     cc->na += cc->tostore;
     cc->tostore = 0;  /* no more items pending */
   }
@@ -881,13 +872,13 @@
   if (cc->tostore == 0) return;
   if (hasmultret(cc->v.k)) {
     luaK_setmultret(fs, &cc->v);
-    luaK_setlist(fs, cc->t->u.info, cc->na, LUA_MULTRET);
+    luaK_setlist(fs, cc, LUA_MULTRET);
     cc->na--;  /* do not count last expression (unknown number of elements) */
   }
   else {
     if (cc->v.k != VVOID)
       luaK_exp2nextreg(fs, &cc->v);
-    luaK_setlist(fs, cc->t->u.info, cc->na, cc->tostore);
+    luaK_setlist(fs, cc, cc->tostore);
   }
   cc->na += cc->tostore;
 }
@@ -922,6 +913,11 @@
 }
 
 
+#define FLUSH_PART_DIVIDER 8
+
+/* 2 <= (MAXREGS + 1) */
+
+
 static void constructor (LexState *ls, expdesc *t) {
   /* constructor -> '{' [ field { sep field } [sep] ] '}'
      sep -> ',' | ';' */
@@ -934,6 +930,7 @@
   cc.t = t;
   init_exp(t, VNONRELOC, fs->freereg);  /* table will be at stack top */
   luaK_reserveregs(fs, 1);
+  cc.fields_per_flush = ((MAXREGS - fs->freereg) / FLUSH_PART_DIVIDER) + 1;
   init_exp(&cc.v, VVOID, 0);  /* no value (yet) */
   checknext(ls, '{');
   do {
--- lparser.h.orig	2023-05-02 23:02:30.000000000 +0300
+++ lparser.h	2023-10-05 17:31:24.398127423 +0300
@@ -163,6 +163,16 @@
 } FuncState;
 
 
+typedef struct ConsControl {
+  expdesc v;  /* last list item read */
+  expdesc *t;  /* table descriptor */
+  int nh;  /* total number of 'record' elements */
+  int na;  /* number of array elements already stored */
+  int tostore;  /* number of array elements pending to be stored */
+  int fields_per_flush;  /* number of list items to accumulate before a SETLIST instruction */
+} ConsControl;
+
+
 LUAI_FUNC int luaY_nvarstack (FuncState *fs);
 LUAI_FUNC LClosure *luaY_parser (lua_State *L, ZIO *z, Mbuffer *buff,
                                  Dyndata *dyd, const char *name, int firstchar);