Mercurial > hg > toybox
changeset 719:053061a0b5b6
The previous wc -m didn't handle multibyte characters that crossed a buffer boundary, so take a guess at making that work. (I haven't got a test case for this. I also don't know how to handle invalid sequences so just don't count them.)
author | Rob Landley <rob@landley.net> |
---|---|
date | Wed, 28 Nov 2012 03:12:02 -0600 |
parents | 0ed3351d91eb |
children | e2eb80481afc |
files | toys/posix/wc.c |
diffstat | 1 files changed, 17 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/toys/posix/wc.c Tue Nov 27 11:09:04 2012 +0100 +++ b/toys/posix/wc.c Wed Nov 28 03:12:02 2012 -0600 @@ -51,24 +51,30 @@ unsigned long word=0, lengths[]={0,0,0}; for (;;) { - len = read(fd, toybuf, sizeof(toybuf)); - if (len<0) { + i = 0; +again: + len = i+read(fd, toybuf+i, sizeof(toybuf)-i); + if (len < i) { perror_msg("%s",name); - toys.exitval = EXIT_FAILURE; + toys.exitval = 1; } - if (len<1) break; + if (!len) break; for (i=0; i<len; i+=clen) { - wchar_t wchar; if (CFG_TOYBOX_I18N && (toys.optflags&FLAG_m)) { + wchar_t wchar = 0; + clen = mbrtowc(&wchar, toybuf+i, len-i, 0); - if(clen==(size_t)(-1)) { - if(i!=len-1) { - clen = 1; + if (clen < 1) { + // If the problem might be buffer wrap, move and read more data + if (i) { + memmove(toybuf, toybuf+i, sizeof(toybuf)-i); + i = len - i; + goto again; + } else { + clen=1; continue; - } else break; + } } - if(clen==(size_t)(-2)) break; - if(clen==0) clen=1; space = iswspace(wchar); } else space = isspace(toybuf[i]);