Mercurial > hg > toybox
comparison toys/posix/wc.c @ 719:053061a0b5b6
The previous wc -m didn't handle multibyte characters that crossed a buffer boundary, so take a guess at making that work. (I haven't got a test case for this. I also don't know how to handle invalid sequences so just don't count them.)
author | Rob Landley <rob@landley.net> |
---|---|
date | Wed, 28 Nov 2012 03:12:02 -0600 |
parents | a950dd960593 |
children | 9499e4cf830f |
comparison
equal
deleted
inserted
replaced
718:0ed3351d91eb | 719:053061a0b5b6 |
---|---|
49 { | 49 { |
50 int i, len, clen=1, space; | 50 int i, len, clen=1, space; |
51 unsigned long word=0, lengths[]={0,0,0}; | 51 unsigned long word=0, lengths[]={0,0,0}; |
52 | 52 |
53 for (;;) { | 53 for (;;) { |
54 len = read(fd, toybuf, sizeof(toybuf)); | 54 i = 0; |
55 if (len<0) { | 55 again: |
56 len = i+read(fd, toybuf+i, sizeof(toybuf)-i); | |
57 if (len < i) { | |
56 perror_msg("%s",name); | 58 perror_msg("%s",name); |
57 toys.exitval = EXIT_FAILURE; | 59 toys.exitval = 1; |
58 } | 60 } |
59 if (len<1) break; | 61 if (!len) break; |
60 for (i=0; i<len; i+=clen) { | 62 for (i=0; i<len; i+=clen) { |
61 wchar_t wchar; | |
62 if (CFG_TOYBOX_I18N && (toys.optflags&FLAG_m)) { | 63 if (CFG_TOYBOX_I18N && (toys.optflags&FLAG_m)) { |
64 wchar_t wchar = 0; | |
65 | |
63 clen = mbrtowc(&wchar, toybuf+i, len-i, 0); | 66 clen = mbrtowc(&wchar, toybuf+i, len-i, 0); |
64 if(clen==(size_t)(-1)) { | 67 if (clen < 1) { |
65 if(i!=len-1) { | 68 // If the problem might be buffer wrap, move and read more data |
66 clen = 1; | 69 if (i) { |
70 memmove(toybuf, toybuf+i, sizeof(toybuf)-i); | |
71 i = len - i; | |
72 goto again; | |
73 } else { | |
74 clen=1; | |
67 continue; | 75 continue; |
68 } else break; | 76 } |
69 } | 77 } |
70 if(clen==(size_t)(-2)) break; | |
71 if(clen==0) clen=1; | |
72 space = iswspace(wchar); | 78 space = iswspace(wchar); |
73 } else space = isspace(toybuf[i]); | 79 } else space = isspace(toybuf[i]); |
74 | 80 |
75 if (toybuf[i]==10) lengths[0]++; | 81 if (toybuf[i]==10) lengths[0]++; |
76 if (space) word=0; | 82 if (space) word=0; |