diff --git a/README.md b/README.md index 2e0219e..f11ecde 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ ## Comparison with wc. -The GNU utils version ([github](https://github.com/coreutils/coreutils/tree/master/src/wc.c), [savannah](http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/wc.c;hb=HEAD)) is a bit over 1K lines of C. It does many things and checks many possible failure modes. +The GNU utils version ([github](https://github.com/coreutils/coreutils/tree/master/src/wc.c), [savannah](http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/wc.c;hb=HEAD)) is a bit over 1K lines of C. It does many things and checks many possible failure modes. I think it detects whether it should be reading from stdin using some very wrapped fstat. The busybox version ([git.busybox.net](https://git.busybox.net/busybox/tree/coreutils/wc.c)) of wc is much shorter, at 257 lines, while striving to be [POSIX-compliant](https://pubs.opengroup.org/onlinepubs/9699919799/), meaning it has flags. diff --git a/ww b/ww index 2ad1347..58c8de8 100755 Binary files a/ww and b/ww differ diff --git a/ww.c b/ww.c index 2d969a1..b966769 100644 --- a/ww.c +++ b/ww.c @@ -4,25 +4,19 @@ int wc(FILE* fp) { char c[1]; - int seen_word = 0, seen_sep_after_word = 0, num_words = 0; + int word = 0, num_words = 0; int fn = fileno(fp); while (read(fn, c, sizeof(c)) > 0) { - if (*c == '\n' || *c == ' ' || *c == '\t') { - if (seen_word) { - seen_sep_after_word = 1; - } + if (*c != ' ' && *c != '\n' && *c != '\t') { + word = 1; } else { - seen_word = 1; - } - // exercise: what happens if you only track seen_sep, - // instead of seen_sep_after_word? - // test with: $ echo " hello world" | ./wc - if (seen_word && seen_sep_after_word) { - num_words++; - seen_sep_after_word = seen_word = 0; + if (word) { + num_words++; + word = 0; + } } } - num_words+=seen_word; + num_words+=word; printf("%i\n", num_words); return 0; }