546
|
1 /* vi: set sw=4 ts=4:
|
|
2 *
|
|
3 * uniq.c - report or filter out repeated lines in a file
|
|
4 *
|
|
5 * Copyright 2012 Georgi Chorbadzhiyski <georgi@unixsol.org>
|
|
6 *
|
|
7 * See http://www.opengroup.org/onlinepubs/009695399/utilities/uniq.html
|
|
8
|
|
9 USE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_BIN))
|
|
10
|
|
11 config UNIQ
|
|
12 bool "uniq"
|
|
13 default y
|
|
14 help
|
|
15 usage: uniq [-cduiz] [-w maxchars] [-f fields] [-s char] [input_file [output_file]]
|
|
16
|
|
17 Report or filter out repeated lines in a file
|
|
18
|
|
19 -c show counts before each line
|
|
20 -d show only lines that are repeated
|
|
21 -u show only lines that are unique
|
|
22 -i ignore case when comparing lines
|
|
23 -z lines end with \0 not \n
|
|
24 -w compare maximum X chars per line
|
|
25 -f ignore first X fields
|
|
26 -s ignore first X chars
|
|
27 */
|
|
28
|
|
29 #include "toys.h"
|
|
30
|
|
31 DEFINE_GLOBALS(
|
|
32 long maxchars;
|
|
33 long nchars;
|
|
34 long nfields;
|
|
35 long repeats;
|
|
36 )
|
|
37
|
|
38 #define TT this.uniq
|
|
39
|
|
40 #define FLAG_z 16
|
|
41 #define FLAG_i 8
|
|
42 #define FLAG_c 4
|
|
43 #define FLAG_d 2
|
|
44 #define FLAG_u 1
|
|
45
|
|
46 static char *skip(char *str)
|
|
47 {
|
567
|
48 long nchars = TT.nchars, nfields;
|
|
49
|
546
|
50 // Skip fields first
|
567
|
51 for (nfields = TT.nfields; nfields; str++) {
|
|
52 while (*str && isspace(*str)) str++;
|
|
53 while (*str && !isspace(*str)) str++;
|
|
54 nfields--;
|
546
|
55 }
|
|
56 // Skip chars
|
567
|
57 while (*str && nchars--) str++;
|
|
58
|
546
|
59 return str;
|
|
60 }
|
|
61
|
|
62 static void print_line(FILE *f, char *line)
|
|
63 {
|
567
|
64 if (toys.optflags & (TT.repeats ? FLAG_u : FLAG_d)) return;
|
|
65 if (toys.optflags & FLAG_c) fprintf(f, "%7lu ", TT.repeats + 1);
|
|
66 fputs(line, f);
|
|
67 if (toys.optflags & FLAG_z) fputc(0, f);
|
546
|
68 }
|
|
69
|
|
70 void uniq_main(void)
|
|
71 {
|
567
|
72 FILE *infile = stdin, *outfile = stdout;
|
|
73 char *thisline = NULL, *prevline = NULL, *tmpline, eol = '\n';
|
|
74 size_t thissize, prevsize = 0, tmpsize;
|
546
|
75
|
567
|
76 if (toys.optc >= 1) infile = xfopen(toys.optargs[0], "r");
|
|
77 if (toys.optc >= 2) outfile = xfopen(toys.optargs[1], "w");
|
546
|
78
|
567
|
79 if (toys.optflags & FLAG_z) eol = 0;
|
546
|
80
|
|
81 // If first line can't be read
|
|
82 if (getdelim(&prevline, &prevsize, eol, infile) < 0)
|
|
83 return;
|
|
84
|
|
85 while (getdelim(&thisline, &thissize, eol, infile) > 0) {
|
|
86 int diff;
|
|
87 char *t1, *t2;
|
|
88
|
|
89 // If requested get the chosen fields + character offsets.
|
|
90 if (TT.nfields || TT.nchars) {
|
|
91 t1 = skip(thisline);
|
|
92 t2 = skip(prevline);
|
|
93 } else {
|
|
94 t1 = thisline;
|
|
95 t2 = prevline;
|
|
96 }
|
|
97
|
|
98 if (TT.maxchars == 0) {
|
|
99 diff = !(toys.optflags & FLAG_i)
|
|
100 ? strcmp(t1, t2)
|
|
101 : strcasecmp(t1, t2);
|
|
102 } else {
|
|
103 diff = !(toys.optflags & FLAG_i)
|
|
104 ? strncmp(t1, t2, TT.maxchars)
|
|
105 : strncasecmp(t1, t2, TT.maxchars);
|
|
106 }
|
|
107
|
|
108 if (diff == 0) { // same
|
|
109 TT.repeats++;
|
|
110 } else {
|
|
111 print_line(outfile, prevline);
|
|
112
|
|
113 TT.repeats = 0;
|
|
114
|
|
115 tmpline = prevline;
|
|
116 prevline = thisline;
|
|
117 thisline = tmpline;
|
|
118
|
|
119 tmpsize = prevsize;
|
|
120 prevsize = thissize;
|
|
121 thissize = tmpsize;
|
|
122 }
|
|
123 }
|
|
124
|
|
125 print_line(outfile, prevline);
|
|
126
|
|
127 if (CFG_TOYBOX_FREE) {
|
|
128 free(prevline);
|
|
129 free(thisline);
|
|
130 }
|
|
131 }
|