From 88a9e202c7bef768c49466a3878854c3ba4d0ebe Mon Sep 17 00:00:00 2001 From: Ray Gardner Date: Thu, 8 Aug 2024 17:47:59 -0600 Subject: [PATCH] Mods to make "interactive input" work with awk User @davidar (github.com/davidar, David A Roberts) notes that wak/toybox awk did not work correctly with interactive input. His example was wak '/^say/ {print $2} Taking input from the terminal, input of 'say hi' should respond 'hi', but it was not working correctly due to the behavior of the input routines not stopping at a newline, but attempting to fill a large buffer to accommodate the use of a regex as RS (record separator). David provided a patch using fgets() to stop reading input at a newline, but it caused the RS to be ignored. I tried checking for RS == "\n" but that was not sufficient, as RS is usually a newline, and causing all input to go to fgets() was a big performance hit. So I also added a field 'is_tty' for each opened file (including stdin), and only use fgets() if RS is "\n" and also if the input file isatty(). Then I find that I also have to set toybox awk to use line buffering. When using (non-toybox) wak, the system sets the output buffering to non-full-buffering if stdout is an "interactive device" (as required by POSIX). It would be nice if toybox provided an option to not set the buffering for stdout at all, say TOYFLAG_DEFAULTBUF, leaving it to the system, then toybox awk would not take the performance hit on stdout for all file operations. --- toys/pending/awk.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/toys/pending/awk.c b/toys/pending/awk.c index a0183fe0..f41db24c 100644 --- a/toys/pending/awk.c +++ b/toys/pending/awk.c @@ -5,7 +5,7 @@ * * See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html -USE_AWK(NEWTOY(awk, "F:v*f*bc", TOYFLAG_USR|TOYFLAG_BIN)) +USE_AWK(NEWTOY(awk, "F:v*f*bc", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LINEBUF)) config AWK bool "awk" @@ -86,16 +86,11 @@ GLOBALS( struct runtime_globals { struct zvalue cur_arg; - //char *filename; // UNUSED FILE *fp; // current data file int narg; // cmdline arg index int nfiles; // num of cmdline data file args processed int eof; // all cmdline files (incl. stdin) read char *recptr; - char *recbuf; - size_t recbufsize; - char *recbuf_multx; - size_t recbufsize_multx; struct zstring *zspr; // Global to receive sprintf() string value } rgl; @@ -132,7 +127,8 @@ GLOBALS( char *fn; FILE *fp; char mode; // w, a, or r - char file_or_pipe; // f or p + char file_or_pipe; // 1 if file, 0 if pipe + char is_tty; char is_std_file; char *recbuf; size_t recbufsize; @@ -3003,12 +2999,12 @@ static struct zvalue *setup_lvalue(int ref_stack_ptr, int parmbase, int *field_n return v; // order FATAL() and return to mute warning } - -static struct zfile *new_file(char *fn, FILE *fp, char mode, char file_or_pipe) +static struct zfile *new_file(char *fn, FILE *fp, char mode, char file_or_pipe, + char is_std_file) { struct zfile *f = xzalloc(sizeof(struct zfile)); *f = (struct zfile){TT.zfiles, xstrdup(fn), fp, mode, file_or_pipe, - 0, 0, 0, 0, 0, 0, 0, 0, 0}; + isatty(fileno(fp)), is_std_file, 0, 0, 0, 0, 0, 0, 0, 0}; return TT.zfiles = f; } @@ -3073,7 +3069,7 @@ static struct zfile *setup_file(char file_or_pipe, char *mode) } FILE *fp = (file_or_pipe ? fopen : popen)(fn, mode); if (fp) { - struct zfile *p = new_file(fn, fp, *mode, file_or_pipe); + struct zfile *p = new_file(fn, fp, *mode, file_or_pipe, 0); drop(); return p; } @@ -3290,10 +3286,12 @@ static int next_fp(void) if (TT.cfile->fp && TT.cfile->fp != stdin) fclose(TT.cfile->fp); if ((!fn && !TT.rgl.nfiles && TT.cfile->fp != stdin) || (fn && !strcmp(fn, "-"))) { TT.cfile->fp = stdin; + TT.cfile->fn = ""; zvalue_release_zstring(&STACK[FILENAME]); STACK[FILENAME].vst = new_zstring("", 7); } else if (fn) { if (!(TT.cfile->fp = fopen(fn, "r"))) FFATAL("can't open %s\n", fn); + TT.cfile->fn = fn; zvalue_copy(&STACK[FILENAME], &TT.rgl.cur_arg); } else { TT.rgl.eof = 1; @@ -3301,6 +3299,7 @@ static int next_fp(void) } set_num(&STACK[FNR], 0); TT.cfile->recoffs = TT.cfile->endoffs = 0; // reset record buffer + TT.cfile->is_tty = isatty(fileno(TT.cfile->fp)); return 1; } @@ -3342,8 +3341,8 @@ static int rx_findx(regex_t *rx, char *s, long len, regoff_t *start, regoff_t *e static ssize_t getrec_f(struct zfile *zfp) { - int r = 0, rs = ENSURE_STR(&STACK[RS])->vst->str[0] & 0xff; - if (!rs) return getrec_multiline(zfp); + int r = 0; + if (!ENSURE_STR(&STACK[RS])->vst->str[0]) return getrec_multiline(zfp); regex_t rsrx, *rsrxp = &rsrx; // TEMP!! FIXME Need to cache and avoid too-frequent rx compiles rx_zvalue_compile(&rsrxp, &STACK[RS]); @@ -3355,7 +3354,11 @@ static ssize_t getrec_f(struct zfile *zfp) #define RS_LENGTH_MARGIN (INIT_RECBUF_LEN / 8) if (!zfp->recbuf) zfp->recbuf = xmalloc((zfp->recbufsize = INIT_RECBUF_LEN) + 1); - zfp->endoffs = fread(zfp->recbuf, 1, zfp->recbufsize, zfp->fp); + if (zfp->is_tty && !memcmp(STACK[RS].vst->str, "\n", 2)) { + zfp->endoffs = 0; + if (fgets(zfp->recbuf, zfp->recbufsize, zfp->fp)) + zfp->endoffs = strlen(zfp->recbuf); + } else zfp->endoffs = fread(zfp->recbuf, 1, zfp->recbufsize, zfp->fp); zfp->recoffs = 0; zfp->recbuf[zfp->endoffs] = 0; if (!zfp->endoffs) break; @@ -4475,11 +4478,11 @@ static void run(int optind, int argc, char **argv, char *sepstring, xregcomp(&TT.rx_default, "[ \t\n]+", REG_EXTENDED); xregcomp(&TT.rx_last, "[ \t\n]+", REG_EXTENDED); xregcomp(&TT.rx_printf_fmt, printf_fmt_rx, REG_EXTENDED); - new_file("-", stdin, 'r', 'f')->is_std_file = 1; - new_file("/dev/stdin", stdin, 'r', 'f')->is_std_file = 1; - new_file("/dev/stdout", stdout, 'w', 'f')->is_std_file = 1; + new_file("-", stdin, 'r', 1, 1); + new_file("/dev/stdin", stdin, 'r', 1, 1); + new_file("/dev/stdout", stdout, 'w', 1, 1); TT.zstdout = TT.zfiles; - new_file("/dev/stderr", stderr, 'w', 'f')->is_std_file = 1; + new_file("/dev/stderr", stderr, 'w', 1, 1); seedrand(123); int status = -1, r = 0; if (TT.cgl.first_begin) r = interp(TT.cgl.first_begin, &status); -- 2.39.2