annotate toys/uniq.c @ 567:f1629eb63806

Refactor uniq.c a bit.
author Rob Landley <rob@landley.net>
date Sat, 14 Apr 2012 22:34:34 -0500
parents a095c02dc431
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
546
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
1 /* vi: set sw=4 ts=4:
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
2 *
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
3 * uniq.c - report or filter out repeated lines in a file
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
4 *
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
5 * Copyright 2012 Georgi Chorbadzhiyski <georgi@unixsol.org>
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
6 *
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
7 * See http://www.opengroup.org/onlinepubs/009695399/utilities/uniq.html
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
8
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
9 USE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_BIN))
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
10
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
11 config UNIQ
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
12 bool "uniq"
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
13 default y
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
14 help
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
15 usage: uniq [-cduiz] [-w maxchars] [-f fields] [-s char] [input_file [output_file]]
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
16
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
17 Report or filter out repeated lines in a file
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
18
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
19 -c show counts before each line
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
20 -d show only lines that are repeated
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
21 -u show only lines that are unique
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
22 -i ignore case when comparing lines
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
23 -z lines end with \0 not \n
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
24 -w compare maximum X chars per line
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
25 -f ignore first X fields
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
26 -s ignore first X chars
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
27 */
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
28
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
29 #include "toys.h"
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
30
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
31 DEFINE_GLOBALS(
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
32 long maxchars;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
33 long nchars;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
34 long nfields;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
35 long repeats;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
36 )
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
37
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
38 #define TT this.uniq
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
39
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
40 #define FLAG_z 16
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
41 #define FLAG_i 8
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
42 #define FLAG_c 4
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
43 #define FLAG_d 2
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
44 #define FLAG_u 1
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
45
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
46 static char *skip(char *str)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
47 {
567
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
48 long nchars = TT.nchars, nfields;
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
49
546
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
50 // Skip fields first
567
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
51 for (nfields = TT.nfields; nfields; str++) {
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
52 while (*str && isspace(*str)) str++;
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
53 while (*str && !isspace(*str)) str++;
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
54 nfields--;
546
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
55 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
56 // Skip chars
567
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
57 while (*str && nchars--) str++;
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
58
546
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
59 return str;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
60 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
61
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
62 static void print_line(FILE *f, char *line)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
63 {
567
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
64 if (toys.optflags & (TT.repeats ? FLAG_u : FLAG_d)) return;
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
65 if (toys.optflags & FLAG_c) fprintf(f, "%7lu ", TT.repeats + 1);
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
66 fputs(line, f);
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
67 if (toys.optflags & FLAG_z) fputc(0, f);
546
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
68 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
69
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
70 void uniq_main(void)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
71 {
567
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
72 FILE *infile = stdin, *outfile = stdout;
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
73 char *thisline = NULL, *prevline = NULL, *tmpline, eol = '\n';
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
74 size_t thissize, prevsize = 0, tmpsize;
546
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
75
567
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
76 if (toys.optc >= 1) infile = xfopen(toys.optargs[0], "r");
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
77 if (toys.optc >= 2) outfile = xfopen(toys.optargs[1], "w");
546
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
78
567
f1629eb63806 Refactor uniq.c a bit.
Rob Landley <rob@landley.net>
parents: 546
diff changeset
79 if (toys.optflags & FLAG_z) eol = 0;
546
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
80
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
81 // If first line can't be read
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
82 if (getdelim(&prevline, &prevsize, eol, infile) < 0)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
83 return;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
84
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
85 while (getdelim(&thisline, &thissize, eol, infile) > 0) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
86 int diff;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
87 char *t1, *t2;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
88
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
89 // If requested get the chosen fields + character offsets.
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
90 if (TT.nfields || TT.nchars) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
91 t1 = skip(thisline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
92 t2 = skip(prevline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
93 } else {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
94 t1 = thisline;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
95 t2 = prevline;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
96 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
97
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
98 if (TT.maxchars == 0) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
99 diff = !(toys.optflags & FLAG_i)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
100 ? strcmp(t1, t2)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
101 : strcasecmp(t1, t2);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
102 } else {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
103 diff = !(toys.optflags & FLAG_i)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
104 ? strncmp(t1, t2, TT.maxchars)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
105 : strncasecmp(t1, t2, TT.maxchars);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
106 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
107
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
108 if (diff == 0) { // same
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
109 TT.repeats++;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
110 } else {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
111 print_line(outfile, prevline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
112
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
113 TT.repeats = 0;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
114
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
115 tmpline = prevline;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
116 prevline = thisline;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
117 thisline = tmpline;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
118
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
119 tmpsize = prevsize;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
120 prevsize = thissize;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
121 thissize = tmpsize;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
122 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
123 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
124
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
125 print_line(outfile, prevline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
126
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
127 if (CFG_TOYBOX_FREE) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
128 free(prevline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
129 free(thisline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
130 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
131 }