changeset 719:053061a0b5b6

The previous wc -m didn't handle multibyte characters that crossed a buffer boundary, so take a guess at making that work. (I haven't got a test case for this. I also don't know how to handle invalid sequences so just don't count them.)
author Rob Landley <rob@landley.net>
date Wed, 28 Nov 2012 03:12:02 -0600
parents 0ed3351d91eb
children e2eb80481afc
files toys/posix/wc.c
diffstat 1 files changed, 17 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/toys/posix/wc.c	Tue Nov 27 11:09:04 2012 +0100
+++ b/toys/posix/wc.c	Wed Nov 28 03:12:02 2012 -0600
@@ -51,24 +51,30 @@
   unsigned long word=0, lengths[]={0,0,0};
 
   for (;;) {
-    len = read(fd, toybuf, sizeof(toybuf));
-    if (len<0) {
+    i = 0;
+again:
+    len = i+read(fd, toybuf+i, sizeof(toybuf)-i);
+    if (len < i) {
       perror_msg("%s",name);
-      toys.exitval = EXIT_FAILURE;
+      toys.exitval = 1;
     }
-    if (len<1) break;
+    if (!len) break;
     for (i=0; i<len; i+=clen) {
-      wchar_t wchar;
       if (CFG_TOYBOX_I18N && (toys.optflags&FLAG_m)) {
+        wchar_t wchar = 0;
+
         clen = mbrtowc(&wchar, toybuf+i, len-i, 0);
-        if(clen==(size_t)(-1)) {
-          if(i!=len-1) {
-            clen = 1;
+        if (clen < 1) {
+          // If the problem might be buffer wrap, move and read more data
+          if (i) {
+            memmove(toybuf, toybuf+i, sizeof(toybuf)-i);
+            i = len - i;
+            goto again;
+          } else {
+            clen=1;
             continue;
-          } else break;
+          }
         }
-        if(clen==(size_t)(-2)) break;
-        if(clen==0) clen=1;
         space = iswspace(wchar);
       } else space = isspace(toybuf[i]);