changeset 706:35a9f9c5b53f

Commit 698 adding cut should ahve included the actual cut.c file. (Oops.)
author Rob Landley <rob@landley.net>
date Tue, 20 Nov 2012 01:00:17 -0600
parents 3e81cd0bad4b
children 977e19296b3f
files toys/posix/cut.c
diffstat 1 files changed, 328 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toys/posix/cut.c	Tue Nov 20 01:00:17 2012 -0600
@@ -0,0 +1,328 @@
+/* cut.c - Cut from a file.
+ *
+ * Copyright 2012 Ranjan Kumar <ranjankumar.bth@gmail.com>, Kyungwan Han <asura321@gamil.com>
+ *
+ * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html 
+ *
+USE_CUT(NEWTOY(cut, "b:c:f:d:sn", TOYFLAG_BIN))
+
+config CUT
+  bool "cut"
+  default y
+  help
+    usage: cut OPTION... [FILE]...
+    Print selected parts of lines from each FILE to standard output.
+      -b LIST    select only these bytes from LIST.
+      -c LIST    select only these characters from LIST.
+      -f LIST    select only these fields.
+      -d DELIM  use DELIM instead of TAB for field delimiter.
+      -s    do not print lines not containing delimiters.
+      -n    don't split multibyte characters (Ignored).
+*/
+#define FOR_cut
+#include "toys.h"
+
+typedef struct _slist {
+  int start_position;
+  int end_position;
+  struct _slist *next;
+}SLIST;
+
+GLOBALS(
+  char *delim;
+  char *flist;
+  char *clist;
+  char *blist;
+  struct _slist *slist_head;
+  unsigned nelem;
+)
+
+#define BEGINOFLINE  0
+#define ENDOFLINE INT_MAX
+#define NORANGE -1
+
+static int get_user_options(void);
+void (*do_cut)(int);
+static void do_fcut(int fd);
+static void do_bccut(int fd);
+static void free_list(void);
+
+/*
+ * add items in the slist.
+ */
+static void add_to_list(int start, int end)
+{
+  SLIST *current, *head_ref, *temp1_node;
+
+  head_ref = TT.slist_head;
+  temp1_node = (SLIST *)xzalloc(sizeof(SLIST));
+  temp1_node->start_position = start;
+  temp1_node->end_position = end;
+  temp1_node->next = NULL;
+
+  /* Special case for the head end */
+  if (head_ref == NULL || (head_ref)->start_position >= start) { 
+      temp1_node->next = head_ref; 
+      head_ref = temp1_node;
+  }  
+  else { 
+    /* Locate the node before the point of insertion */   
+    current = head_ref;   
+    while (current->next!=NULL && current->next->start_position < temp1_node->start_position)
+        current = current->next;
+    temp1_node->next = current->next;   
+    current->next = temp1_node;
+  }
+  TT.slist_head = head_ref;
+  return;
+}
+
+/*
+ * parse list and add to slist.
+ */
+static void parse_list(char *list)
+{
+  char *ctoken, *dtoken;
+  int  start = 0, end = 0;
+  while((ctoken = strsep(&list, ",")) != NULL) {
+    if(!ctoken[0]) continue;
+
+    //Get start position.
+    dtoken = strsep(&ctoken, "-");
+    if(!dtoken[0]) start = BEGINOFLINE;
+    else {
+      start = get_int_value(dtoken, 0, INT_MAX);
+      start = (start?(start-1):start);
+    }
+    //Get end position.
+    if(ctoken == NULL) end = NORANGE; //case e.g. 1,2,3
+    else if(!ctoken[0]) end = ENDOFLINE; //case e.g. N-
+    else {//case e.g. N-M
+      end = get_int_value(ctoken, 0, INT_MAX);
+      end = (end?end:ENDOFLINE);
+            end--;
+      if(end == start) end = NORANGE;
+    }
+    add_to_list(start, end);
+    TT.nelem++;
+  }
+  //if list is missing in command line.
+  if(TT.nelem == 0) error_exit("missing positions list:");
+  return;
+}
+
+/*
+ * retrive data from the file/s.
+ */
+static void get_data(void)
+{
+  char **argv = toys.optargs; //file name.
+  toys.exitval = EXIT_SUCCESS;
+
+  if(!*argv) do_cut(0); //for stdin
+  else {
+    for(; *argv; ++argv) {
+      if(strcmp(*argv, "-") == 0) do_cut(0); //for stdin
+      else {
+        int fd = open(*argv, O_RDONLY, 0);
+        if(fd < 0) {//if file not present then continue with other files.
+          perror_msg(*argv);
+          toys.exitval = EXIT_FAILURE;
+          continue;
+        }
+        do_cut(fd);
+        xclose(fd);
+      }
+    }
+  }
+  return;
+}
+
+/*
+ * cut main function.
+*/
+void cut_main(void)
+{
+  char delimiter = '\t'; //default delimiter.
+  int  num_of_options = 0;
+  char *list;
+
+  TT.nelem = 0;
+  TT.slist_head = NULL;
+  //verify the number of options provided by user.
+  num_of_options = get_user_options();
+  if(num_of_options == 0) error_exit("specify a list of bytes, characters, or fields");
+  else if(num_of_options > 1) error_exit("only one type of list may be specified");
+
+  //Get list and assign the function.
+  if(toys.optflags & FLAG_f) {
+    list = TT.flist;
+    do_cut = do_fcut;
+  }
+  else if(toys.optflags & FLAG_c) {
+    list = TT.clist;
+    do_cut = do_bccut;
+  }
+  else {
+    list = TT.blist;
+    do_cut = do_bccut;
+  }
+
+  if(toys.optflags & FLAG_d) {
+    //delimiter must be 1 char.
+    if(TT.delim[0] && TT.delim[1]) perror_exit("the delimiter must be a single character");
+    delimiter = TT.delim[0];
+  }
+
+  if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
+    TT.delim = (char *)xzalloc(2);
+    TT.delim[0] = delimiter;
+  }
+  
+  //when field is not specified, cutting has some special handling.
+  if(!(toys.optflags & FLAG_f)) {
+    if(toys.optflags & FLAG_s) perror_exit("suppressing non-delimited lines operating on fields");
+    if(delimiter != '\t') perror_exit("an input delimiter may be specified only when operating on fields");
+  }
+
+  parse_list(list);
+  get_data();
+  if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
+    free(TT.delim);
+    TT.delim = NULL;
+  }
+  free_list();
+  return;
+}
+
+/*
+ * perform cut operation on the given delimiter.
+ */
+static void do_fcut(int fd)
+{
+  char *buff;
+  char *delimiter = TT.delim;
+  while((buff = get_line(fd)) != NULL) {
+    //does line have any delimiter?.
+    if(strrchr(buff, (int)delimiter[0]) == NULL) {
+      //if not then print whole line and move to next line.
+      if(!(toys.optflags & FLAG_s)) xputs(buff);
+      continue;
+    }
+
+    unsigned cpos = 0;
+    int start, ndelimiters = -1;
+    int  nprinted_fields = 0;
+    char *pfield = xzalloc(strlen(buff) + 1);
+    SLIST *temp_node = TT.slist_head;
+
+    if(temp_node != NULL) {
+      //process list on each line.
+      while(cpos < TT.nelem && buff) {
+        if(!temp_node) break;
+        start = temp_node->start_position;
+        do {
+          char *field = NULL;
+          //count number of delimeters per line.
+          while(buff) {
+            if(ndelimiters < start) {
+              ndelimiters++;
+              field = strsep(&buff, delimiter);
+            }
+            else break;
+          }
+          //print field (if not yet printed).
+          if(!pfield[ndelimiters]) {
+            if(ndelimiters == start) {
+              //put delimiter.
+              if(nprinted_fields++ > 0) xputc(delimiter[0]);
+              if(field) fputs(field, stdout);
+              //make sure this field won't print again.
+              pfield[ndelimiters] = (char) 0x23; //put some char at this position.
+            }
+          }
+          start++;
+          if((temp_node->end_position == NORANGE) || (!buff)) break;          
+        }while(start <= temp_node->end_position);
+        temp_node = temp_node->next;
+        cpos++;
+      }
+    }
+    xputc('\n');
+    free(pfield);
+    pfield = NULL;
+  }//End of while loop.
+  return;
+}
+
+/*
+ * perform cut operation char or byte.
+ */
+static void do_bccut(int fd)
+{
+  char *buff;
+  while((buff = get_line(fd)) != NULL) {
+    unsigned cpos = 0;
+    int buffln = strlen(buff);
+    char *pfield = xzalloc(buffln + 1);
+    SLIST *temp_node = TT.slist_head;
+    if(temp_node != NULL) {
+      while(cpos < TT.nelem) {
+        int start;
+        if(!temp_node) break;
+        start = temp_node->start_position;
+        while(start < buffln) {
+          //to avoid duplicate field printing.
+          if(pfield[start]) {
+              if(++start <= temp_node->end_position) continue;
+              temp_node = temp_node->next;
+              break;
+          }
+          else {
+            //make sure this field won't print again.
+            pfield[start] = (char) 0x23; //put some char at this position.
+            xputc(buff[start]);
+          }
+          if(++start > temp_node->end_position) {
+            temp_node = temp_node->next;
+            break;
+          }
+        }
+        cpos++;
+      }
+      xputc('\n');
+    }
+    free(pfield);
+    pfield = NULL;
+  }
+  return;
+}
+
+/* 
+ * used to verify "c", "b" and "f" options are present in command line.
+ * As cut command can only except any of them at a time.
+*/
+static int get_user_options(void)
+{
+  int i = 3, flag = 0;
+  for(; i<6; i++) {//verify 3rd, 4th and 5th bit is set.
+    int mask = 1;
+    mask = mask << i;
+    if(toys.optflags & mask) flag++;
+  }
+  return flag;
+}
+
+/*
+ * free the slist.
+*/
+static void free_list(void)
+{
+  SLIST *temp;
+  while(TT.slist_head != NULL) {
+    temp = TT.slist_head->next;
+    free(TT.slist_head);
+    TT.slist_head = temp;
+  }
+  return;
+}