From b987143eb6c6c61a63323e4aec17fe1cc1465576 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Mon, 11 Sep 2023 16:34:53 +0200 Subject: [PATCH] add historical versions --- historical/busybox-wc.c | 257 +++++++++ historical/gnu-wc.c | 1034 +++++++++++++++++++++++++++++++++++++ historical/plan9-wc.c | 118 +++++ historical/plan9port-wc.c | 352 +++++++++++++ historical/unix-v7-wc.c | 86 +++ 5 files changed, 1847 insertions(+) create mode 100644 historical/busybox-wc.c create mode 100644 historical/gnu-wc.c create mode 100644 historical/plan9-wc.c create mode 100644 historical/plan9port-wc.c create mode 100644 historical/unix-v7-wc.c diff --git a/historical/busybox-wc.c b/historical/busybox-wc.c new file mode 100644 index 0000000..de75aba --- /dev/null +++ b/historical/busybox-wc.c @@ -0,0 +1,257 @@ +/* vi: set sw=4 ts=4: */ +/* + * wc implementation for busybox + * + * Copyright (C) 2003 Manuel Novoa III + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ +/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) + * + * Rewritten to fix a number of problems and do some size optimizations. + * Problems in the previous busybox implementation (besides bloat) included: + * 1) broken 'wc -c' optimization (read note below) + * 2) broken handling of '-' args + * 3) no checking of ferror on EOF returns + * 4) isprint() wasn't considered when word counting. + * + * NOTES: + * + * The previous busybox wc attempted an optimization using stat for the + * case of counting chars only. I omitted that because it was broken. + * It didn't take into account the possibility of input coming from a + * pipe, or input from a file with file pointer not at the beginning. + * + * To implement such a speed optimization correctly, not only do you + * need the size, but also the file position. Note also that the + * file position may be past the end of file. Consider the example + * (adapted from example in gnu wc.c) + * + * echo hello > /tmp/testfile && + * (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile + * + * for which 'wc -c' should output '0'. + */ +//config:config WC +//config: bool "wc (4.7 kb)" +//config: default y +//config: help +//config: wc is used to print the number of bytes, words, and lines, +//config: in specified files. +//config: +//config:config FEATURE_WC_LARGE +//config: bool "Support very large counts" +//config: default y +//config: depends on WC +//config: help +//config: Use "unsigned long long" for counter variables. + +//applet:IF_WC(APPLET(wc, BB_DIR_USR_BIN, BB_SUID_DROP)) + +//kbuild:lib-$(CONFIG_WC) += wc.o + +/* BB_AUDIT SUSv3 compliant. */ +/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ + +#include "libbb.h" +#include "unicode.h" + +#if !ENABLE_LOCALE_SUPPORT +# undef isprint +# undef isspace +# define isprint(c) ((unsigned)((c) - 0x20) <= (0x7e - 0x20)) +# define isspace(c) ((c) == ' ') +#endif + +#if ENABLE_FEATURE_WC_LARGE +# define COUNT_T unsigned long long +# define COUNT_FMT "llu" +#else +# define COUNT_T unsigned +# define COUNT_FMT "u" +#endif + +/* We support -m even when UNICODE_SUPPORT is off, + * we just don't advertise it in help text, + * since it is the same as -c in this case. + */ + +//usage:#define wc_trivial_usage +//usage: "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..." +//usage: +//usage:#define wc_full_usage "\n\n" +//usage: "Count lines, words, and bytes for FILEs (or stdin)\n" +//usage: "\n -c Count bytes" +//usage: IF_UNICODE_SUPPORT( +//usage: "\n -m Count characters" +//usage: ) +//usage: "\n -l Count newlines" +//usage: "\n -w Count words" +//usage: "\n -L Print longest line length" +//usage: +//usage:#define wc_example_usage +//usage: "$ wc /etc/passwd\n" +//usage: " 31 46 1365 /etc/passwd\n" + +/* Order is important if we want to be compatible with + * column order in "wc -cmlwL" output: + */ +enum { + WC_LINES = 0, /* -l */ + WC_WORDS = 1, /* -w */ + WC_UNICHARS = 2, /* -m */ + WC_BYTES = 3, /* -c */ + WC_LENGTH = 4, /* -L */ + NUM_WCS = 5, +}; + +int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int wc_main(int argc UNUSED_PARAM, char **argv) +{ + const char *arg; + const char *start_fmt = " %9"COUNT_FMT + 1; + const char *fname_fmt = " %s\n"; + COUNT_T *pcounts; + COUNT_T counts[NUM_WCS]; + COUNT_T totals[NUM_WCS]; + int num_files; + smallint status = EXIT_SUCCESS; + unsigned print_type; + + init_unicode(); + + print_type = getopt32(argv, "lwmcL"); + + if (print_type == 0) { + print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_BYTES); + } + + argv += optind; + if (!argv[0]) { + *--argv = (char *) bb_msg_standard_input; + fname_fmt = "\n"; + } + if (!argv[1]) { /* zero or one filename? */ + if (!((print_type-1) & print_type)) /* exactly one option? */ + start_fmt = "%"COUNT_FMT; + } + + memset(totals, 0, sizeof(totals)); + + pcounts = counts; + + num_files = 0; + while ((arg = *argv++) != NULL) { + FILE *fp; + const char *s; + unsigned u; + unsigned linepos; + smallint in_word; + + ++num_files; + fp = fopen_or_warn_stdin(arg); + if (!fp) { + status = EXIT_FAILURE; + continue; + } + + memset(counts, 0, sizeof(counts)); + linepos = 0; + in_word = 0; + + while (1) { + int c; + /* Our -w doesn't match GNU wc exactly... oh well */ + + c = getc(fp); + if (c == EOF) { + if (ferror(fp)) { + bb_simple_perror_msg(arg); + status = EXIT_FAILURE; + } + goto DO_EOF; /* Treat an EOF as '\r'. */ + } + + /* Cater for -c and -m */ + ++counts[WC_BYTES]; + if (unicode_status != UNICODE_ON /* every byte is a new char */ + || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */ + ) { + ++counts[WC_UNICHARS]; + } + + if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */ + ++linepos; + if (!isspace(c)) { + in_word = 1; + continue; + } + } else if ((unsigned)(c - 9) <= 4) { + /* \t 9 + * \n 10 + * \v 11 + * \f 12 + * \r 13 + */ + if (c == '\t') { + linepos = (linepos | 7) + 1; + } else { /* '\n', '\r', '\f', or '\v' */ + DO_EOF: + if (linepos > counts[WC_LENGTH]) { + counts[WC_LENGTH] = linepos; + } + if (c == '\n') { + ++counts[WC_LINES]; + } + if (c != '\v') { + linepos = 0; + } + } + } else { + continue; + } + + counts[WC_WORDS] += in_word; + in_word = 0; + if (c == EOF) { + break; + } + } + + fclose_if_not_stdin(fp); + + if (totals[WC_LENGTH] < counts[WC_LENGTH]) { + totals[WC_LENGTH] = counts[WC_LENGTH]; + } + totals[WC_LENGTH] -= counts[WC_LENGTH]; + + OUTPUT: + /* coreutils wc tries hard to print pretty columns + * (saves results for all files, finds max col len etc...) + * we won't try that hard, it will bloat us too much */ + s = start_fmt; + u = 0; + do { + if (print_type & (1 << u)) { + printf(s, pcounts[u]); + s = " %9"COUNT_FMT; /* Ok... restore the leading space. */ + } + totals[u] += pcounts[u]; + } while (++u < NUM_WCS); + printf(fname_fmt, arg); + } + + /* If more than one file was processed, we want the totals. To save some + * space, we set the pcounts ptr to the totals array. This has the side + * effect of trashing the totals array after outputting it, but that's + * irrelavent since we no longer need it. */ + if (num_files > 1) { + num_files = 0; /* Make sure we don't get here again. */ + arg = "total"; + pcounts = totals; + --argv; + goto OUTPUT; + } + + fflush_stdout_and_exit(status); +} diff --git a/historical/gnu-wc.c b/historical/gnu-wc.c new file mode 100644 index 0000000..6b802f5 --- /dev/null +++ b/historical/gnu-wc.c @@ -0,0 +1,1034 @@ +/* wc - print the number of lines, words, and bytes in files + Copyright (C) 1985-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Paul Rubin, phr@ocf.berkeley.edu + and David MacKenzie, djm@gnu.ai.mit.edu. */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "system.h" +#include "assure.h" +#include "argmatch.h" +#include "argv-iter.h" +#include "fadvise.h" +#include "mbchar.h" +#include "physmem.h" +#include "readtokens0.h" +#include "safe-read.h" +#include "stat-size.h" +#include "xbinary-io.h" + +#if !defined iswspace && !HAVE_ISWSPACE +# define iswspace(wc) \ + ((wc) == to_uchar (wc) && isspace (to_uchar (wc))) +#endif + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "wc" + +#define AUTHORS \ + proper_name ("Paul Rubin"), \ + proper_name ("David MacKenzie") + +/* Size of atomic reads. */ +#define BUFFER_SIZE (16 * 1024) + +#ifdef USE_AVX2_WC_LINECOUNT +/* From wc_avx2.c */ +extern bool +wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out, + uintmax_t *bytes_out); +#endif + +static bool debug; + +/* Cumulative number of lines, words, chars and bytes in all files so far. + max_line_length is the maximum over all files processed so far. */ +static uintmax_t total_lines; +static uintmax_t total_words; +static uintmax_t total_chars; +static uintmax_t total_bytes; +static uintmax_t total_lines_overflow; +static uintmax_t total_words_overflow; +static uintmax_t total_chars_overflow; +static uintmax_t total_bytes_overflow; +static uintmax_t max_line_length; + +/* Which counts to print. */ +static bool print_lines, print_words, print_chars, print_bytes; +static bool print_linelength; + +/* The print width of each count. */ +static int number_width; + +/* True if we have ever read the standard input. */ +static bool have_read_stdin; + +/* Used to determine if file size can be determined without reading. */ +static size_t page_size; + +/* Enable to _not_ treat non breaking space as a word separator. */ +static bool posixly_correct; + +/* The result of calling fstat or stat on a file descriptor or file. */ +struct fstatus +{ + /* If positive, fstat or stat has not been called yet. Otherwise, + this is the value returned from fstat or stat. */ + int failed; + + /* If FAILED is zero, this is the file's status. */ + struct stat st; +}; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + DEBUG_PROGRAM_OPTION = CHAR_MAX + 1, + FILES0_FROM_OPTION, + TOTAL_OPTION, +}; + +static struct option const longopts[] = +{ + {"bytes", no_argument, nullptr, 'c'}, + {"chars", no_argument, nullptr, 'm'}, + {"lines", no_argument, nullptr, 'l'}, + {"words", no_argument, nullptr, 'w'}, + {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION}, + {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION}, + {"max-line-length", no_argument, nullptr, 'L'}, + {"total", required_argument, nullptr, TOTAL_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +enum total_type + { + total_auto, /* 0: default or --total=auto */ + total_always, /* 1: --total=always */ + total_only, /* 2: --total=only */ + total_never /* 3: --total=never */ + }; +static char const *const total_args[] = +{ + "auto", "always", "only", "never", nullptr +}; +static enum total_type const total_types[] = +{ + total_auto, total_always, total_only, total_never +}; +ARGMATCH_VERIFY (total_args, total_types); +static enum total_type total_mode = total_auto; + +#ifdef USE_AVX2_WC_LINECOUNT +static bool +avx2_supported (void) +{ + bool avx_enabled = 0 < __builtin_cpu_supports ("avx2"); + + if (debug) + error (0, 0, (avx_enabled + ? _("using avx2 hardware support") + : _("avx2 support not detected"))); + + return avx_enabled; +} +#endif + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ + or: %s [OPTION]... --files0-from=F\n\ +"), + program_name, program_name); + fputs (_("\ +Print newline, word, and byte counts for each FILE, and a total line if\n\ +more than one FILE is specified. A word is a non-zero-length sequence of\n\ +printable characters delimited by white space.\n\ +"), stdout); + + emit_stdin_note (); + + fputs (_("\ +\n\ +The options below may be used to select which counts are printed, always in\n\ +the following order: newline, word, character, byte, maximum line length.\n\ + -c, --bytes print the byte counts\n\ + -m, --chars print the character counts\n\ + -l, --lines print the newline counts\n\ +"), stdout); + fputs (_("\ + --files0-from=F read input from the files specified by\n\ + NUL-terminated names in file F;\n\ + If F is - then read names from standard input\n\ + -L, --max-line-length print the maximum display width\n\ + -w, --words print the word counts\n\ +"), stdout); + fputs (_("\ + --total=WHEN when to print a line with total counts;\n\ + WHEN can be: auto, always, only, never\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Return non zero if a non breaking space. */ +ATTRIBUTE_PURE +static int +iswnbspace (wint_t wc) +{ + return ! posixly_correct + && (wc == 0x00A0 || wc == 0x2007 + || wc == 0x202F || wc == 0x2060); +} + +static int +isnbspace (int c) +{ + return iswnbspace (btowc (c)); +} + +/* FILE is the name of the file (or null for standard input) + associated with the specified counters. */ +static void +write_counts (uintmax_t lines, + uintmax_t words, + uintmax_t chars, + uintmax_t bytes, + uintmax_t linelength, + char const *file) +{ + static char const format_sp_int[] = " %*s"; + char const *format_int = format_sp_int + 1; + char buf[INT_BUFSIZE_BOUND (uintmax_t)]; + + if (print_lines) + { + printf (format_int, number_width, umaxtostr (lines, buf)); + format_int = format_sp_int; + } + if (print_words) + { + printf (format_int, number_width, umaxtostr (words, buf)); + format_int = format_sp_int; + } + if (print_chars) + { + printf (format_int, number_width, umaxtostr (chars, buf)); + format_int = format_sp_int; + } + if (print_bytes) + { + printf (format_int, number_width, umaxtostr (bytes, buf)); + format_int = format_sp_int; + } + if (print_linelength) + { + printf (format_int, number_width, umaxtostr (linelength, buf)); + } + if (file) + printf (" %s", strchr (file, '\n') ? quotef (file) : file); + putchar ('\n'); +} + +static bool +wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out) +{ + size_t bytes_read; + uintmax_t lines, bytes; + char buf[BUFFER_SIZE + 1]; + bool long_lines = false; + + if (!lines_out || !bytes_out) + { + return false; + } + + lines = bytes = 0; + + while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) + { + + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + return false; + } + + bytes += bytes_read; + + char *p = buf; + char *end = buf + bytes_read; + uintmax_t plines = lines; + + if (! long_lines) + { + /* Avoid function call overhead for shorter lines. */ + while (p != end) + lines += *p++ == '\n'; + } + else + { + /* rawmemchr is more efficient with longer lines. */ + *end = '\n'; + while ((p = rawmemchr (p, '\n')) < end) + { + ++p; + ++lines; + } + } + + /* If the average line length in the block is >= 15, then use + memchr for the next block, where system specific optimizations + may outweigh function call overhead. + FIXME: This line length was determined in 2015, on both + x86_64 and ppc64, but it's worth re-evaluating in future with + newer compilers, CPUs, or memchr() implementations etc. */ + if (lines - plines <= bytes_read / 15) + long_lines = true; + else + long_lines = false; + } + + *bytes_out = bytes; + *lines_out = lines; + + return true; +} + +/* Count words. FILE_X is the name of the file (or null for standard + input) that is open on descriptor FD. *FSTATUS is its status. + CURRENT_POS is the current file offset if known, negative if unknown. + Return true if successful. */ +static bool +wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) +{ + bool ok = true; + char buf[BUFFER_SIZE + 1]; + size_t bytes_read; + uintmax_t lines, words, chars, bytes, linelength; + bool count_bytes, count_chars, count_complicated; + char const *file = file_x ? file_x : _("standard input"); + + lines = words = chars = bytes = linelength = 0; + + /* If in the current locale, chars are equivalent to bytes, we prefer + counting bytes, because that's easier. */ +#if MB_LEN_MAX > 1 + if (MB_CUR_MAX > 1) + { + count_bytes = print_bytes; + count_chars = print_chars; + } + else +#endif + { + count_bytes = print_bytes || print_chars; + count_chars = false; + } + count_complicated = print_words || print_linelength; + + /* Advise the kernel of our access pattern only if we will read(). */ + if (!count_bytes || count_chars || print_lines || count_complicated) + fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); + + /* When counting only bytes, save some line- and word-counting + overhead. If FD is a 'regular' Unix file, using lseek is enough + to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE + bytes at a time until EOF. Note that the 'size' (number of bytes) + that wc reports is smaller than stats.st_size when the file is not + positioned at its beginning. That's why the lseek calls below are + necessary. For example the command + '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group' + should make wc report '0' bytes. */ + + if (count_bytes && !count_chars && !print_lines && !count_complicated) + { + bool skip_read = false; + + if (0 < fstatus->failed) + fstatus->failed = fstat (fd, &fstatus->st); + + /* For sized files, seek to one st_blksize before EOF rather than to EOF. + This works better for files in proc-like file systems where + the size is only approximate. */ + if (! fstatus->failed && usable_st_size (&fstatus->st) + && 0 <= fstatus->st.st_size) + { + off_t end_pos = fstatus->st.st_size; + if (current_pos < 0) + current_pos = lseek (fd, 0, SEEK_CUR); + + if (end_pos % page_size) + { + /* We only need special handling of /proc and /sys files etc. + when they're a multiple of PAGE_SIZE. In the common case + for files with st_size not a multiple of PAGE_SIZE, + it's more efficient and accurate to use st_size. + + Be careful here. The current position may actually be + beyond the end of the file. As in the example above. */ + + bytes = end_pos < current_pos ? 0 : end_pos - current_pos; + if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR)) + skip_read = true; + else + bytes = 0; + } + else + { + off_t hi_pos = (end_pos + - end_pos % (STP_BLKSIZE (&fstatus->st) + 1)); + if (0 <= current_pos && current_pos < hi_pos + && 0 <= lseek (fd, hi_pos, SEEK_CUR)) + bytes = hi_pos - current_pos; + } + } + + if (! skip_read) + { + fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); + while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) + { + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + ok = false; + break; + } + bytes += bytes_read; + } + } + } + else if (!count_chars && !count_complicated) + { +#ifdef USE_AVX2_WC_LINECOUNT + static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *); + if (!wc_lines_p) + wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines; +#else + bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *) + = wc_lines; +#endif + + /* Use a separate loop when counting only lines or lines and bytes -- + but not chars or words. */ + ok = wc_lines_p (file, fd, &lines, &bytes); + } +#if MB_LEN_MAX > 1 +# define SUPPORT_OLD_MBRTOWC 1 + else if (MB_CUR_MAX > 1) + { + bool in_word = false; + uintmax_t linepos = 0; + mbstate_t state = {0}; + bool in_shift = false; +# if SUPPORT_OLD_MBRTOWC + /* Back-up the state before each multibyte character conversion and + move the last incomplete character of the buffer to the front + of the buffer. This is needed because we don't know whether + the 'mbrtowc' function updates the state when it returns -2, -- + this is the ISO C 99 and glibc-2.2 behavior - or not - amended + ANSI C, glibc-2.1 and Solaris 5.7 behavior. We don't have an + autoconf test for this, yet. */ + size_t prev = 0; /* number of bytes carried over from previous round */ +# else + const size_t prev = 0; +# endif + + while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0) + { + char const *p; +# if SUPPORT_OLD_MBRTOWC + mbstate_t backup_state; +# endif + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + ok = false; + break; + } + + bytes += bytes_read; + p = buf; + bytes_read += prev; + do + { + wchar_t wide_char; + size_t n; + bool wide = true; + + if (!in_shift && is_basic (*p)) + { + /* Handle most ASCII characters quickly, without calling + mbrtowc(). */ + n = 1; + wide_char = *p; + wide = false; + } + else + { + in_shift = true; +# if SUPPORT_OLD_MBRTOWC + backup_state = state; +# endif + n = mbrtowc (&wide_char, p, bytes_read, &state); + if (n == (size_t) -2) + { +# if SUPPORT_OLD_MBRTOWC + state = backup_state; +# endif + break; + } + if (n == (size_t) -1) + { + /* Remember that we read a byte, but don't complain + about the error. Because of the decoding error, + this is a considered to be byte but not a + character (that is, chars is not incremented). */ + p++; + bytes_read--; + continue; + } + if (mbsinit (&state)) + in_shift = false; + if (n == 0) + { + wide_char = 0; + n = 1; + } + } + + switch (wide_char) + { + case '\n': + lines++; + FALLTHROUGH; + case '\r': + case '\f': + if (linepos > linelength) + linelength = linepos; + linepos = 0; + goto mb_word_separator; + case '\t': + linepos += 8 - (linepos % 8); + goto mb_word_separator; + case ' ': + linepos++; + FALLTHROUGH; + case '\v': + mb_word_separator: + words += in_word; + in_word = false; + break; + default: + if (wide && iswprint (wide_char)) + { + /* wcwidth can be expensive on OSX for example, + so avoid if not needed. */ + if (print_linelength) + { + int width = wcwidth (wide_char); + if (width > 0) + linepos += width; + } + if (iswspace (wide_char) || iswnbspace (wide_char)) + goto mb_word_separator; + in_word = true; + } + else if (!wide && isprint (to_uchar (*p))) + { + linepos++; + if (isspace (to_uchar (*p))) + goto mb_word_separator; + in_word = true; + } + break; + } + + p += n; + bytes_read -= n; + chars++; + } + while (bytes_read > 0); + +# if SUPPORT_OLD_MBRTOWC + if (bytes_read > 0) + { + if (bytes_read == BUFFER_SIZE) + { + /* Encountered a very long redundant shift sequence. */ + p++; + bytes_read--; + } + memmove (buf, p, bytes_read); + } + prev = bytes_read; +# endif + } + if (linepos > linelength) + linelength = linepos; + words += in_word; + } +#endif + else + { + bool in_word = false; + uintmax_t linepos = 0; + + while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) + { + char const *p = buf; + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + ok = false; + break; + } + + bytes += bytes_read; + do + { + switch (*p++) + { + case '\n': + lines++; + FALLTHROUGH; + case '\r': + case '\f': + if (linepos > linelength) + linelength = linepos; + linepos = 0; + goto word_separator; + case '\t': + linepos += 8 - (linepos % 8); + goto word_separator; + case ' ': + linepos++; + FALLTHROUGH; + case '\v': + word_separator: + words += in_word; + in_word = false; + break; + default: + if (isprint (to_uchar (p[-1]))) + { + linepos++; + if (isspace (to_uchar (p[-1])) + || isnbspace (to_uchar (p[-1]))) + goto word_separator; + in_word = true; + } + break; + } + } + while (--bytes_read); + } + if (linepos > linelength) + linelength = linepos; + words += in_word; + } + + if (count_chars < print_chars) + chars = bytes; + + if (total_mode != total_only) + write_counts (lines, words, chars, bytes, linelength, file_x); + + if (ckd_add (&total_lines, total_lines, lines)) + total_lines_overflow = true; + if (ckd_add (&total_words, total_words, words)) + total_words_overflow = true; + if (ckd_add (&total_chars, total_chars, chars)) + total_chars_overflow = true; + if (ckd_add (&total_bytes, total_bytes, bytes)) + total_bytes_overflow = true; + + if (linelength > max_line_length) + max_line_length = linelength; + + return ok; +} + +static bool +wc_file (char const *file, struct fstatus *fstatus) +{ + if (! file || STREQ (file, "-")) + { + have_read_stdin = true; + xset_binary_mode (STDIN_FILENO, O_BINARY); + return wc (STDIN_FILENO, file, fstatus, -1); + } + else + { + int fd = open (file, O_RDONLY | O_BINARY); + if (fd == -1) + { + error (0, errno, "%s", quotef (file)); + return false; + } + else + { + bool ok = wc (fd, file, fstatus, 0); + if (close (fd) != 0) + { + error (0, errno, "%s", quotef (file)); + return false; + } + return ok; + } + } +} + +/* Return the file status for the NFILES files addressed by FILE. + Optimize the case where only one number is printed, for just one + file; in that case we can use a print width of 1, so we don't need + to stat the file. Handle the case of (nfiles == 0) in the same way; + that happens when we don't know how long the list of file names will be. */ + +static struct fstatus * +get_input_fstatus (size_t nfiles, char *const *file) +{ + struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus); + + if (nfiles == 0 + || (nfiles == 1 + && ((print_lines + print_words + print_chars + + print_bytes + print_linelength) + == 1))) + fstatus[0].failed = 1; + else + { + for (size_t i = 0; i < nfiles; i++) + fstatus[i].failed = (! file[i] || STREQ (file[i], "-") + ? fstat (STDIN_FILENO, &fstatus[i].st) + : stat (file[i], &fstatus[i].st)); + } + + return fstatus; +} + +/* Return a print width suitable for the NFILES files whose status is + recorded in FSTATUS. Optimize the same special case that + get_input_fstatus optimizes. */ + +ATTRIBUTE_PURE +static int +compute_number_width (size_t nfiles, struct fstatus const *fstatus) +{ + int width = 1; + + if (0 < nfiles && fstatus[0].failed <= 0) + { + int minimum_width = 1; + uintmax_t regular_total = 0; + + for (size_t i = 0; i < nfiles; i++) + if (! fstatus[i].failed) + { + if (S_ISREG (fstatus[i].st.st_mode)) + regular_total += fstatus[i].st.st_size; + else + minimum_width = 7; + } + + for (; 10 <= regular_total; regular_total /= 10) + width++; + if (width < minimum_width) + width = minimum_width; + } + + return width; +} + + +int +main (int argc, char **argv) +{ + bool ok; + int optc; + size_t nfiles; + char **files; + char *files_from = nullptr; + struct fstatus *fstatus; + struct Tokens tok; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + page_size = getpagesize (); + /* Line buffer stdout to ensure lines are written atomically and immediately + so that processes running in parallel do not intersperse their output. */ + setvbuf (stdout, nullptr, _IOLBF, 0); + + posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr); + + print_lines = print_words = print_chars = print_bytes = false; + print_linelength = false; + total_lines = total_words = total_chars = total_bytes = max_line_length = 0; + + while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1) + switch (optc) + { + case 'c': + print_bytes = true; + break; + + case 'm': + print_chars = true; + break; + + case 'l': + print_lines = true; + break; + + case 'w': + print_words = true; + break; + + case 'L': + print_linelength = true; + break; + + case DEBUG_PROGRAM_OPTION: + debug = true; + break; + + case FILES0_FROM_OPTION: + files_from = optarg; + break; + + case TOTAL_OPTION: + total_mode = XARGMATCH ("--total", optarg, total_args, total_types); + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + + if (! (print_lines || print_words || print_chars || print_bytes + || print_linelength)) + print_lines = print_words = print_bytes = true; + + bool read_tokens = false; + struct argv_iterator *ai; + if (files_from) + { + FILE *stream; + + /* When using --files0-from=F, you may not specify any files + on the command-line. */ + if (optind < argc) + { + error (0, 0, _("extra operand %s"), quoteaf (argv[optind])); + fprintf (stderr, "%s\n", + _("file operands cannot be combined with --files0-from")); + usage (EXIT_FAILURE); + } + + if (STREQ (files_from, "-")) + stream = stdin; + else + { + stream = fopen (files_from, "r"); + if (stream == nullptr) + error (EXIT_FAILURE, errno, _("cannot open %s for reading"), + quoteaf (files_from)); + } + + /* Read the file list into RAM if we can detect its size and that + size is reasonable. Otherwise, we'll read a name at a time. */ + struct stat st; + if (fstat (fileno (stream), &st) == 0 + && S_ISREG (st.st_mode) + && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2)) + { + read_tokens = true; + readtokens0_init (&tok); + if (! readtokens0 (stream, &tok) || fclose (stream) != 0) + error (EXIT_FAILURE, 0, _("cannot read file names from %s"), + quoteaf (files_from)); + files = tok.tok; + nfiles = tok.n_tok; + ai = argv_iter_init_argv (files); + } + else + { + files = nullptr; + nfiles = 0; + ai = argv_iter_init_stream (stream); + } + } + else + { + static char *stdin_only[] = { nullptr }; + files = (optind < argc ? argv + optind : stdin_only); + nfiles = (optind < argc ? argc - optind : 1); + ai = argv_iter_init_argv (files); + } + + if (!ai) + xalloc_die (); + + fstatus = get_input_fstatus (nfiles, files); + if (total_mode == total_only) + number_width = 1; /* No extra padding, since no alignment requirement. */ + else + number_width = compute_number_width (nfiles, fstatus); + + ok = true; + for (int i = 0; /* */; i++) + { + bool skip_file = false; + enum argv_iter_err ai_err; + char *file_name = argv_iter (ai, &ai_err); + if (!file_name) + { + switch (ai_err) + { + case AI_ERR_EOF: + goto argv_iter_done; + case AI_ERR_READ: + error (0, errno, _("%s: read error"), + quotef (files_from)); + ok = false; + goto argv_iter_done; + case AI_ERR_MEM: + xalloc_die (); + default: + affirm (!"unexpected error code from argv_iter"); + } + } + if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-")) + { + /* Give a better diagnostic in an unusual case: + printf - | wc --files0-from=- */ + error (0, 0, _("when reading file names from stdin, " + "no file name of %s allowed"), + quoteaf (file_name)); + skip_file = true; + } + + if (!file_name[0]) + { + /* Diagnose a zero-length file name. When it's one + among many, knowing the record number may help. + FIXME: currently print the record number only with + --files0-from=FILE. Maybe do it for argv, too? */ + if (files_from == nullptr) + error (0, 0, "%s", _("invalid zero-length file name")); + else + { + /* Using the standard 'filename:line-number:' prefix here is + not totally appropriate, since NUL is the separator, not NL, + but it might be better than nothing. */ + unsigned long int file_number = argv_iter_n_args (ai); + error (0, 0, "%s:%lu: %s", quotef (files_from), + file_number, _("invalid zero-length file name")); + } + skip_file = true; + } + + if (skip_file) + ok = false; + else + ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]); + + if (! nfiles) + fstatus[0].failed = 1; + } + argv_iter_done: + + /* No arguments on the command line is fine. That means read from stdin. + However, no arguments on the --files0-from input stream is an error + means don't read anything. */ + if (ok && !files_from && argv_iter_n_args (ai) == 0) + ok &= wc_file (nullptr, &fstatus[0]); + + if (read_tokens) + readtokens0_free (&tok); + + if (total_mode != total_never + && (total_mode != total_auto || 1 < argv_iter_n_args (ai))) + { + if (total_lines_overflow) + { + total_lines = UINTMAX_MAX; + error (0, EOVERFLOW, _("total lines")); + ok = false; + } + if (total_words_overflow) + { + total_words = UINTMAX_MAX; + error (0, EOVERFLOW, _("total words")); + ok = false; + } + if (total_chars_overflow) + { + total_chars = UINTMAX_MAX; + error (0, EOVERFLOW, _("total characters")); + ok = false; + } + if (total_bytes_overflow) + { + total_bytes = UINTMAX_MAX; + error (0, EOVERFLOW, _("total bytes")); + ok = false; + } + + write_counts (total_lines, total_words, total_chars, total_bytes, + max_line_length, + total_mode != total_only ? _("total") : nullptr); + } + + argv_iter_free (ai); + + free (fstatus); + + if (have_read_stdin && close (STDIN_FILENO) != 0) + error (EXIT_FAILURE, errno, "-"); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/historical/plan9-wc.c b/historical/plan9-wc.c new file mode 100644 index 0000000..e381751 --- /dev/null +++ b/historical/plan9-wc.c @@ -0,0 +1,118 @@ +/* + * Count bytes within runes, if it fits in a uvlong, and other things. + */ +#include +#include +#include + +/* flags, per-file counts, and total counts */ +static int pline, pword, prune, pbadr, pchar; +static uvlong nline, nword, nrune, nbadr, nchar; +static uvlong tnline, tnword, tnrune, tnbadr, tnchar; + +enum{Space, Word}; + +static void +wc(Biobuf *bin) +{ + int where; + long r; + + nline = 0; + nword = 0; + nrune = 0; + nbadr = 0; + where = Space; + while ((long)(r = Bgetrune(bin)) >= 0) { + nrune++; + if(r == Runeerror) { + nbadr++; + continue; + } + if(r == '\n') + nline++; + if(where == Word){ + if(isspacerune(r)) + where = Space; + }else + if(isspacerune(r) == 0){ + where = Word; + nword++; + } + } + nchar = Boffset(bin); + tnline += nline; + tnword += nword; + tnrune += nrune; + tnbadr += nbadr; + tnchar += nchar; +} + +static void +report(uvlong nline, uvlong nword, uvlong nrune, uvlong nbadr, uvlong nchar, char *fname) +{ + char line[1024], *s, *e; + + s = line; + e = line + sizeof line; + line[0] = 0; + if(pline) + s = seprint(s, e, " %7llud", nline); + if(pword) + s = seprint(s, e, " %7llud", nword); + if(prune) + s = seprint(s, e, " %7llud", nrune); + if(pbadr) + s = seprint(s, e, " %7llud", nbadr); + if(pchar) + s = seprint(s, e, " %7llud", nchar); + if(fname != nil) + seprint(s, e, " %s", fname); + print("%s\n", line+1); +} + +void +main(int argc, char *argv[]) +{ + char *sts; + Biobuf sin, *bin; + int i; + + sts = nil; + ARGBEGIN { + case 'l': pline++; break; + case 'w': pword++; break; + case 'r': prune++; break; + case 'b': pbadr++; break; + case 'c': pchar++; break; + default: + fprint(2, "Usage: %s [-lwrbc] [file ...]\n", argv0); + exits("usage"); + } ARGEND + if(pline+pword+prune+pbadr+pchar == 0){ + pline = 1; + pword = 1; + pchar = 1; + } + if(argc == 0){ + Binit(&sin, 0, OREAD); + wc(&sin); + report(nline, nword, nrune, nbadr, nchar, nil); + Bterm(&sin); + }else{ + for(i = 0; i < argc; i++){ + bin = Bopen(argv[i], OREAD); + if(bin == nil){ + perror(argv[i]); + sts = "can't open"; + continue; + } + wc(bin); + report(nline, nword, nrune, nbadr, nchar, argv[i]); + Bterm(bin); + } + if(argc>1) + report(tnline, tnword, tnrune, tnbadr, tnchar, "total"); + } + exits(sts); +} diff --git a/historical/plan9port-wc.c b/historical/plan9port-wc.c new file mode 100644 index 0000000..6007759 --- /dev/null +++ b/historical/plan9port-wc.c @@ -0,0 +1,352 @@ +/* + * wc -- count things in utf-encoded text files + * Bugs: + * The only white space characters recognized are ' ', '\t' and '\n', even though + * ISO 10646 has many more blanks scattered through it. + * Should count characters that cannot occur in any rune (hex f0-ff) separately. + * Should count non-canonical runes (e.g. hex c1,80 instead of hex 40). + */ +#include +#include +#define NBUF (8*1024) +uvlong nline, tnline, pline; +uvlong nword, tnword, pword; +uvlong nrune, tnrune, prune; +uvlong nbadr, tnbadr, pbadr; +uvlong nchar, tnchar, pchar; +void count(int, char *); +void report(uvlong, uvlong, uvlong, uvlong, uvlong, char *); +void +main(int argc, char *argv[]) +{ + char *status=""; + int i, f; + ARGBEGIN { + case 'l': pline++; break; + case 'w': pword++; break; + case 'r': prune++; break; + case 'b': pbadr++; break; + case 'c': pchar++; break; + default: + fprint(2, "Usage: %s [-lwrbc] [file ...]\n", argv0); + exits("usage"); + } ARGEND + if(pline+pword+prune+pbadr+pchar == 0) { + pline = 1; + pword = 1; + pchar = 1; + } + if(argc==0) + count(0, 0); + else{ + for(i=0;i1) + report(tnline, tnword, tnrune, tnbadr, tnchar, "total"); + } + exits(status); +} +void +report(uvlong nline, uvlong nword, uvlong nrune, uvlong nbadr, uvlong nchar, char *fname) +{ + char line[1024], word[128]; + line[0] = '\0'; + if(pline){ + sprint(word, " %7llud", nline); + strcat(line, word); + } + if(pword){ + sprint(word, " %7llud", nword); + strcat(line, word); + } + if(prune){ + sprint(word, " %7llud", nrune); + strcat(line, word); + } + if(pbadr){ + sprint(word, " %7llud", nbadr); + strcat(line, word); + } + if(pchar){ + sprint(word, " %7llud", nchar); + strcat(line, word); + } + if(fname){ + sprint(word, " %s", fname); + strcat(line, word); + } + print("%s\n", line+1); +} +/* + * How it works. Start in statesp. Each time we read a character, + * increment various counts, and do state transitions according to the + * following table. If we're not in statesp or statewd when done, the + * file ends with a partial rune. + * | character + * state |09,20| 0a |00-7f|80-bf|c0-df|e0-ef|f0-f7|f8-ff + * -------+-----+-----+-----+-----+-----+-----+-----+----- + * statesp|ASP |ASPN |AWDW |AWDWX|AC2W |AC3W |AC4W |AWDWX + * statewd|ASP |ASPN |AWD |AWDX |AC2 |AC3 |AC4 |AWDX + * statec2|ASPX |ASPNX|AWDX |AWDR |AC2X |AC3X |AC4X |AWDX + * statec3|ASPX |ASPNX|AWDX |AC2R |AC2X |AC3X |AC4X |AWDX + * statec4|ASPX |ASPNX|AWDX |AC3R |AC2X |AC3X |AC4X |AWDX f4 8f bf bf + */ +enum{ /* actions */ + AC2, /* enter statec2 */ + AC2R, /* enter statec2, don't count a rune */ + AC2W, /* enter statec2, count a word */ + AC2X, /* enter statec2, count a bad rune */ + AC3, /* enter statec3 */ + AC3R, /* enter statec3, don't count a rune */ + AC3W, /* enter statec3, count a word */ + AC3X, /* enter statec3, count a bad rune */ + AC4, /* enter statec4 */ + AC4W, /* enter statec4, count a word */ + AC4X, /* enter statec4, count a bad rune */ + ASP, /* enter statesp */ + ASPN, /* enter statesp, count a newline */ + ASPNX, /* enter statesp, count a newline, count a bad rune */ + ASPX, /* enter statesp, count a bad rune */ + AWD, /* enter statewd */ + AWDR, /* enter statewd, don't count a rune */ + AWDW, /* enter statewd, count a word */ + AWDWX, /* enter statewd, count a word, count a bad rune */ + AWDX, /* enter statewd, count a bad rune */ +}; +uchar statesp[256]={ /* looking for the start of a word */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 00-07 */ +AWDW, ASP, ASPN, AWDW, AWDW, AWDW, AWDW, AWDW, /* 08-0f */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 10-17 */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 18-1f */ +ASP, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 20-27 */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 28-2f */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 30-37 */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 38-3f */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 40-47 */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 48-4f */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 50-57 */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 58-5f */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 60-67 */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 68-6f */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 70-77 */ +AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 78-7f */ +AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* 80-87 */ +AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* 88-8f */ +AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* 90-97 */ +AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* 98-9f */ +AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* a0-a7 */ +AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* a8-af */ +AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* b0-b7 */ +AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* b8-bf */ +AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, /* c0-c7 */ +AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, /* c8-cf */ +AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, /* d0-d7 */ +AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, /* d8-df */ +AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, /* e0-e7 */ +AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, /* e8-ef */ +AC4W, AC4W, AC4W, AC4W, AC4W, AC4W, AC4W, AC4W, /* f0-f7 */ +AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* f8-ff */ +}; +uchar statewd[256]={ /* looking for the next character in a word */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 00-07 */ +AWD, ASP, ASPN, AWD, AWD, AWD, AWD, AWD, /* 08-0f */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 10-17 */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 18-1f */ +ASP, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 20-27 */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 28-2f */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 30-37 */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 38-3f */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 40-47 */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 48-4f */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 50-57 */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 58-5f */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 60-67 */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 68-6f */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 70-77 */ +AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 78-7f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 80-87 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 88-8f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 90-97 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 98-9f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* a0-a7 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* a8-af */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* b0-b7 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* b8-bf */ +AC2, AC2, AC2, AC2, AC2, AC2, AC2, AC2, /* c0-c7 */ +AC2, AC2, AC2, AC2, AC2, AC2, AC2, AC2, /* c8-cf */ +AC2, AC2, AC2, AC2, AC2, AC2, AC2, AC2, /* d0-d7 */ +AC2, AC2, AC2, AC2, AC2, AC2, AC2, AC2, /* d8-df */ +AC3, AC3, AC3, AC3, AC3, AC3, AC3, AC3, /* e0-e7 */ +AC3, AC3, AC3, AC3, AC3, AC3, AC3, AC3, /* e8-ef */ +AC4, AC4, AC4, AC4, AC4, AC4, AC4, AC4, /* f0-f7 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* f8-ff */ +}; +uchar statec2[256]={ /* looking for 10xxxxxx to complete a rune */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 00-07 */ +AWDX, ASPX, ASPNX,AWDX, AWDX, AWDX, AWDX, AWDX, /* 08-0f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 10-17 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 18-1f */ +ASPX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 20-27 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 28-2f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 30-37 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 38-3f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 40-47 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 48-4f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 50-57 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 58-5f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 60-67 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 68-6f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 70-77 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 78-7f */ +AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* 80-87 */ +AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* 88-8f */ +AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* 90-97 */ +AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* 98-9f */ +AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* a0-a7 */ +AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* a8-af */ +AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* b0-b7 */ +AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* b8-bf */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c0-c7 */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c8-cf */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d0-d7 */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d8-df */ +AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e0-e7 */ +AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e8-ef */ +AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, /* f0-f7 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* f8-ff */ +}; +uchar statec3[256]={ /* looking for 10xxxxxx,10xxxxxx to complete a rune */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 00-07 */ +AWDX, ASPX, ASPNX,AWDX, AWDX, AWDX, AWDX, AWDX, /* 08-0f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 10-17 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 18-1f */ +ASPX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 20-27 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 28-2f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 30-37 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 38-3f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 40-47 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 48-4f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 50-57 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 58-5f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 60-67 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 68-6f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 70-77 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 78-7f */ +AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* 80-87 */ +AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* 88-8f */ +AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* 90-97 */ +AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* 98-9f */ +AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* a0-a7 */ +AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* a8-af */ +AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* b0-b7 */ +AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* b8-bf */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c0-c7 */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c8-cf */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d0-d7 */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d8-df */ +AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e0-e7 */ +AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e8-ef */ +AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, /* f0-f7 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* f8-ff */ +}; +uchar statec4[256]={ /* looking for 10xxxxxx,10xxxxxx,10xxxxxx to complete a rune */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 00-07 */ +AWDX, ASPX, ASPNX,AWDX, AWDX, AWDX, AWDX, AWDX, /* 08-0f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 10-17 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 18-1f */ +ASPX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 20-27 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 28-2f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 30-37 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 38-3f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 40-47 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 48-4f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 50-57 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 58-5f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 60-67 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 68-6f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 70-77 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 78-7f */ +AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, /* 80-87 */ +AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, /* 88-8f */ +AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, /* 90-97 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 98-9f */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* a0-a7 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* a8-af */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* b0-b7 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* b8-bf */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c0-c7 */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c8-cf */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d0-d7 */ +AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d8-df */ +AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e0-e7 */ +AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e8-ef */ +AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, /* f0-f7 */ +AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* f8-ff */ +}; +void +count(int f, char *name) +{ + int n; + uchar buf[NBUF]; + uchar *bufp, *ebuf; + uchar *state=statesp; + + nline = 0; + nword = 0; + nrune = 0; + nbadr = 0; + nchar = 0; + + for(;;){ + n=read(f, buf, NBUF); + if(n<=0) + break; + nchar+=n; + nrune+=n; /* might be too large, gets decreased later */ + bufp=buf; + ebuf=buf+n; + do{ + switch(state[*bufp]){ + case AC2: state=statec2; break; + case AC2R: state=statec2; --nrune; break; + case AC2W: state=statec2; nword++; break; + case AC2X: state=statec2; nbadr++; break; + case AC3: state=statec3; break; + case AC3R: state=statec3; --nrune; break; + case AC3W: state=statec3; nword++; break; + case AC3X: state=statec3; nbadr++; break; + case AC4: state=statec4; break; + case AC4W: state=statec4; nword++; break; + case AC4X: state=statec4; nbadr++; break; + case ASP: state=statesp; break; + case ASPN: state=statesp; nline++; break; + case ASPNX: state=statesp; nline++; nbadr++; break; + case ASPX: state=statesp; nbadr++; break; + case AWD: state=statewd; break; + case AWDR: state=statewd; --nrune; break; + case AWDW: state=statewd; nword++; break; + case AWDWX: state=statewd; nword++; nbadr++; break; + case AWDX: state=statewd; nbadr++; break; + } + }while(++bufp!=ebuf); + } + if(state!=statesp && state!=statewd) + nbadr++; + if(n<0) + fprint(2, "%s: %r\n", name); + report(nline, nword, nrune, nbadr, nchar, name); +} diff --git a/historical/unix-v7-wc.c b/historical/unix-v7-wc.c new file mode 100644 index 0000000..3a20b27 --- /dev/null +++ b/historical/unix-v7-wc.c @@ -0,0 +1,86 @@ +/* wc line and word count */ + +#include + +main(argc, argv) +char **argv; +{ + int i, token; + register FILE *fp; + long linect, wordct, charct; + long tlinect=0, twordct=0, tcharct=0; + char *wd; + register int c; + + wd = "lwc"; + if(argc > 1 && *argv[1] == '-') { + wd = ++argv[1]; + argc--; + argv++; + } + + i = 1; + fp = stdin; + do { + if(argc>1 && (fp=fopen(argv[i], "r")) == NULL) { + fprintf(stderr, "wc: can't open %s\n", argv[i]); + continue; + } + linect = 0; + wordct = 0; + charct = 0; + token = 0; + for(;;) { + c = getc(fp); + if (c == EOF) + break; + charct++; + if(' '1) { + printf(" %s\n", argv[i]); + } else + printf("\n"); + fclose(fp); + tlinect += linect; + twordct += wordct; + tcharct += charct; + } while(++i 2) { + wcp(wd, tcharct, twordct, tlinect); + printf(" total\n"); + } + exit(0); +} + +wcp(wd, charct, wordct, linect) +register char *wd; +long charct; long wordct; long linect; +{ + while (*wd) switch (*wd++) { + case 'l': + printf("%7ld", linect); + break; + + case 'w': + printf("%7ld ", wordct); + break; + + case 'c': + printf("%7ld", charct); + break; + } +}