annotate toys/uniq.c @ 546:a095c02dc431

Implement uniq.
author Georgi Chorbadzhiyski <gf@unixsol.org>
date Wed, 14 Mar 2012 22:04:06 -0500
parents
children f1629eb63806
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
546
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
1 /* vi: set sw=4 ts=4:
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
2 *
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
3 * uniq.c - report or filter out repeated lines in a file
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
4 *
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
5 * Copyright 2012 Georgi Chorbadzhiyski <georgi@unixsol.org>
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
6 *
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
7 * See http://www.opengroup.org/onlinepubs/009695399/utilities/uniq.html
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
8
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
9 USE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_BIN))
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
10
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
11 config UNIQ
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
12 bool "uniq"
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
13 default y
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
14 help
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
15 usage: uniq [-cduiz] [-w maxchars] [-f fields] [-s char] [input_file [output_file]]
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
16
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
17 Report or filter out repeated lines in a file
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
18
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
19 -c show counts before each line
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
20 -d show only lines that are repeated
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
21 -u show only lines that are unique
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
22 -i ignore case when comparing lines
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
23 -z lines end with \0 not \n
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
24 -w compare maximum X chars per line
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
25 -f ignore first X fields
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
26 -s ignore first X chars
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
27 */
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
28
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
29 #include "toys.h"
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
30
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
31 DEFINE_GLOBALS(
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
32 long maxchars;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
33 long nchars;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
34 long nfields;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
35 long repeats;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
36 )
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
37
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
38 #define TT this.uniq
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
39
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
40 #define FLAG_z 16
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
41 #define FLAG_i 8
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
42 #define FLAG_c 4
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
43 #define FLAG_d 2
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
44 #define FLAG_u 1
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
45
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
46 static char *skip(char *str)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
47 {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
48 int field = 0;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
49 long nchars = TT.nchars;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
50 long nfields = TT.nfields;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
51 // Skip fields first
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
52 while (nfields && *str) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
53 if (isspace((unsigned char)*str)) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
54 if (field) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
55 field = 0;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
56 nfields--;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
57 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
58 } else if (!field) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
59 field = 1;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
60 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
61 str++;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
62 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
63 // Skip chars
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
64 while (nchars-- && *str)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
65 str++;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
66 return str;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
67 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
68
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
69 static void print_line(FILE *f, char *line)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
70 {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
71 if (TT.repeats == 0 && (toys.optflags & FLAG_d))
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
72 return;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
73 if (TT.repeats > 0 && (toys.optflags & FLAG_u))
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
74 return;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
75 if ((toys.optflags & FLAG_c)) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
76 fprintf(f, "%7lu %s", TT.repeats + 1, line);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
77 } else {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
78 fprintf(f, "%s", line);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
79 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
80 if (toys.optflags & FLAG_z)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
81 fprintf(f, "%c", '\0');
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
82 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
83
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
84 void uniq_main(void)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
85 {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
86 FILE *infile = stdin;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
87 FILE *outfile = stdout;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
88 char *thisline = NULL;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
89 char *prevline = NULL;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
90 size_t thissize, prevsize = 0;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
91 char *tmpline;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
92 char eol = '\n';
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
93 size_t tmpsize;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
94
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
95 if (toys.optc >= 1)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
96 infile = xfopen(toys.optargs[0], "r");
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
97
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
98 if (toys.optc >= 2)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
99 outfile = xfopen(toys.optargs[1], "w");
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
100
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
101 if (toys.optflags & FLAG_z)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
102 eol = '\0';
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
103
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
104 // If first line can't be read
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
105 if (getdelim(&prevline, &prevsize, eol, infile) < 0)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
106 return;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
107
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
108 while (getdelim(&thisline, &thissize, eol, infile) > 0) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
109 int diff;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
110 char *t1, *t2;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
111
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
112 // If requested get the chosen fields + character offsets.
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
113 if (TT.nfields || TT.nchars) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
114 t1 = skip(thisline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
115 t2 = skip(prevline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
116 } else {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
117 t1 = thisline;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
118 t2 = prevline;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
119 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
120
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
121 if (TT.maxchars == 0) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
122 diff = !(toys.optflags & FLAG_i)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
123 ? strcmp(t1, t2)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
124 : strcasecmp(t1, t2);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
125 } else {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
126 diff = !(toys.optflags & FLAG_i)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
127 ? strncmp(t1, t2, TT.maxchars)
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
128 : strncasecmp(t1, t2, TT.maxchars);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
129 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
130
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
131 if (diff == 0) { // same
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
132 TT.repeats++;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
133 } else {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
134 print_line(outfile, prevline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
135
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
136 TT.repeats = 0;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
137
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
138 tmpline = prevline;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
139 prevline = thisline;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
140 thisline = tmpline;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
141
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
142 tmpsize = prevsize;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
143 prevsize = thissize;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
144 thissize = tmpsize;
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
145 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
146 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
147
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
148 print_line(outfile, prevline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
149
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
150 if (CFG_TOYBOX_FREE) {
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
151 free(prevline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
152 free(thisline);
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
153 }
a095c02dc431 Implement uniq.
Georgi Chorbadzhiyski <gf@unixsol.org>
parents:
diff changeset
154 }