From c55661843fd906073850eb2c9224009ff92acbeb Mon Sep 17 00:00:00 2001 From: Ray Gardner Date: Thu, 29 Aug 2024 20:59:50 -0600 Subject: [PATCH] Mod getrec_f() to permit null RS="()" An empty RS string (RS="") is special and causes awk to enter multiline record mode. But a null RS (e.g. RS="()") matches an empty string at the beginning of the file, and cannot match any separator string, and caused an infinite loop of output. Other awks (except busybox awk, which has the same bug, and mawk which gives an error on that regex) treat it as not matching anything (same as RS="^$), so entire input becomes a single record (../cmit_ggdb). Here we adopt the same policy. Also added CHANGELOG.md and updated README.md. --- toys/pending/awk.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/toys/pending/awk.c b/toys/pending/awk.c index e94d26f1..fd7675ec 100644 --- a/toys/pending/awk.c +++ b/toys/pending/awk.c @@ -3346,6 +3346,7 @@ static int rx_findx(regex_t *rx, char *s, long len, regoff_t *start, regoff_t *e return 0; } +// get a record; return length, or 0 at EOF static ssize_t getrec_f(struct zfile *zfp) { int r = 0; @@ -3372,21 +3373,24 @@ static ssize_t getrec_f(struct zfile *zfp) } TT.rgl.recptr = zfp->recbuf + zfp->recoffs; r = rx_findx(rsrxp, TT.rgl.recptr, zfp->endoffs - zfp->recoffs, &so, &eo, 0); - // if not found, or found "near" end of buffer... + if (!r && so == eo) r = 1; // RS was empty, so fake not found if (r || zfp->recoffs + eo > (int)zfp->recbufsize - RS_LENGTH_MARGIN) { - // if at end of data, and (not found or found at end of data) + // not found, or found "near" end of buffer... if (zfp->endoffs < (int)zfp->recbufsize && (r || zfp->recoffs + eo == zfp->endoffs)) { + // at end of data, and (not found or found at end of data) ret = zfp->endoffs - zfp->recoffs; zfp->recoffs = zfp->endoffs; break; } if (zfp->recoffs) { + // room to move data up: move remaining data in buffer to low end memmove(zfp->recbuf, TT.rgl.recptr, zfp->endoffs - zfp->recoffs); zfp->endoffs -= zfp->recoffs; zfp->recoffs = 0; - } else zfp->recbuf = + } else zfp->recbuf = // enlarge buffer xrealloc(zfp->recbuf, (zfp->recbufsize = zfp->recbufsize * 3 / 2) + 1); + // try to read more into buffer past current data zfp->endoffs += fread(zfp->recbuf + zfp->endoffs, 1, zfp->recbufsize - zfp->endoffs, zfp->fp); zfp->recbuf[zfp->endoffs] = 0; -- 2.39.2