[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Lua patterns proposed extension
- From: "Dmitriy Kryuk" <kryukdmitriy@...>
- Date: Sun, 06 Mar 2011 00:00:55 +0600
Lua patterns support a capture of type %bxy that captures a sequence of
characters with 'x' and 'y' balanced. This can be inappropriate for the
cases where those characters can represent not the "bracket structure"
only, but appear "escaped" as ordinary characters.
I propose a patch that adds a %Bxyz capture type to Lua. It stands for
"sequence of characters with balanced 'x' and 'y' characters where any
character right after a 'z' is treated just as an ordinary character, not
the one to balance".
I have used it to parse Apache log files like this:
str = '"GET /A\\"B HTTP/1.1"'
str:match('%B""\\') -- captures the whole string
The patch follows:
diff -Naur lua-5.1.4-old/src/lstrlib.c lua-5.1.4/src/lstrlib.c
--- lua-5.1.4-old/src/lstrlib.c 2008-07-11 17:27:21.000000000 +0000
+++ lua-5.1.4/src/lstrlib.c 2011-03-04 13:50:48.000000000 +0000
@@ -298,6 +298,35 @@
}
+static const char *matchbalanceesc (MatchState *ms, const char *s,
+ const char *p) {
+ if (*p == 0 || *(p+1) == 0 || *(p+2) == 0 )
+ luaL_error(ms->L, "unbalanced pattern");
+ if (*s != *p) return NULL;
+ else {
+ int b = *p;
+ int e = *(p+1);
+ int m = *(p+2);
+ int cont = 1;
+ int esc = 0;
+ while (++s < ms->src_end) {
+ if (esc) {
+ esc=0;
+ continue;
+ }
+ if (*s == m) {
+ esc=1;
+ }
+ else if (*s == e) {
+ if (--cont == 0) return s+1;
+ }
+ else if (*s == b) cont++;
+ }
+ }
+ return NULL; /* string ends out of balance */
+}
+
+
static const char *max_expand (MatchState *ms, const char *s,
const char *p, const char *ep) {
ptrdiff_t i = 0; /* counts maximum expand for item */
@@ -381,6 +410,11 @@
if (s == NULL) return NULL;
p+=4; goto init; /* else return match(ms, s, p+4); */
}
+ case 'B': { /* balanced-escaped string? */
+ s = matchbalanceesc(ms, s, p+2);
+ if (s == NULL) return NULL;
+ p+=5; goto init; /* else return match(ms, s, p+4); */
+ }
case 'f': { /* frontier? */
const char *ep; char previous;
p += 2;