| /* cut.c - Cut from a file. |
| * |
| * Copyright 2012 Ranjan Kumar <ranjankumar.bth@gmail.com> |
| * Copyright 2012 Kyungwan Han <asura321@gmail.com> |
| * |
| * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html |
| * |
| * TODO: cleanup |
| |
| USE_CUT(NEWTOY(cut, "b:|c:|f:|d:sn[!cbf]", TOYFLAG_USR|TOYFLAG_BIN)) |
| |
| config CUT |
| bool "cut" |
| default y |
| help |
| usage: cut OPTION... [FILE]... |
| |
| Print selected parts of lines from each FILE to standard output. |
| |
| -b LIST select only these bytes from LIST. |
| -c LIST select only these characters from LIST. |
| -f LIST select only these fields. |
| -d DELIM use DELIM instead of TAB for field delimiter. |
| -s do not print lines not containing delimiters. |
| -n don't split multibyte characters (Ignored). |
| */ |
| #define FOR_cut |
| #include "toys.h" |
| |
| GLOBALS( |
| char *delim; |
| char *flist; |
| char *clist; |
| char *blist; |
| |
| void *slist_head; |
| unsigned nelem; |
| void (*do_cut)(int fd); |
| ) |
| |
| struct slist { |
| struct slist *next; |
| int start, end; |
| }; |
| |
| static void add_to_list(int start, int end) |
| { |
| struct slist *current, *head_ref, *temp1_node; |
| |
| head_ref = TT.slist_head; |
| temp1_node = xzalloc(sizeof(struct slist)); |
| temp1_node->start = start; |
| temp1_node->end = end; |
| |
| /* Special case for the head end */ |
| if (!head_ref || head_ref->start >= start) { |
| temp1_node->next = head_ref; |
| head_ref = temp1_node; |
| } else { |
| /* Locate the node before the point of insertion */ |
| current = head_ref; |
| while (current->next && current->next->start < temp1_node->start) |
| current = current->next; |
| temp1_node->next = current->next; |
| current->next = temp1_node; |
| } |
| TT.slist_head = head_ref; |
| } |
| |
| // parse list and add to slist. |
| static void parse_list(char *list) |
| { |
| for (;;) { |
| char *ctoken = strsep(&list, ","), *dtoken; |
| int start = 0, end = INT_MAX; |
| |
| if (!ctoken) break; |
| if (!*ctoken) continue; |
| |
| // Get start position. |
| if (*(dtoken = strsep(&ctoken, "-"))) { |
| start = atolx_range(dtoken, 0, INT_MAX); |
| start = (start?(start-1):start); |
| } |
| |
| // Get end position. |
| if (!ctoken) end = -1; //case e.g. 1,2,3 |
| else if (*ctoken) {//case e.g. N-M |
| end = atolx_range(ctoken, 0, INT_MAX); |
| if (!end) end = INT_MAX; |
| end--; |
| if(end == start) end = -1; |
| } |
| add_to_list(start, end); |
| TT.nelem++; |
| } |
| // if list is missing in command line. |
| if (!TT.nelem) error_exit("missing positions list"); |
| } |
| |
| /* |
| * retrive data from the file/s. |
| */ |
| static void get_data(void) |
| { |
| char **argv = toys.optargs; //file name. |
| toys.exitval = EXIT_SUCCESS; |
| |
| if(!*argv) TT.do_cut(0); //for stdin |
| else { |
| for(; *argv; ++argv) { |
| if(strcmp(*argv, "-") == 0) TT.do_cut(0); //for stdin |
| else { |
| int fd = open(*argv, O_RDONLY, 0); |
| if (fd < 0) {//if file not present then continue with other files. |
| perror_msg_raw(*argv); |
| continue; |
| } |
| TT.do_cut(fd); |
| xclose(fd); |
| } |
| } |
| } |
| } |
| |
| // perform cut operation on the given delimiter. |
| static void do_fcut(int fd) |
| { |
| char *buff, *pfield = 0, *delimiter = TT.delim; |
| |
| for (;;) { |
| unsigned cpos = 0; |
| int start, ndelimiters = -1; |
| int nprinted_fields = 0; |
| struct slist *temp_node = TT.slist_head; |
| |
| free(pfield); |
| pfield = 0; |
| |
| if (!(buff = get_line(fd))) break; |
| |
| //does line have any delimiter?. |
| if (strrchr(buff, (int)delimiter[0]) == NULL) { |
| //if not then print whole line and move to next line. |
| if (!(toys.optflags & FLAG_s)) xputs(buff); |
| continue; |
| } |
| |
| pfield = xzalloc(strlen(buff) + 1); |
| |
| if (temp_node) { |
| //process list on each line. |
| while (cpos < TT.nelem && buff) { |
| if (!temp_node) break; |
| start = temp_node->start; |
| do { |
| char *field = 0; |
| |
| //count number of delimeters per line. |
| while (buff) { |
| if (ndelimiters < start) { |
| ndelimiters++; |
| field = strsep(&buff, delimiter); |
| } else break; |
| } |
| //print field (if not yet printed). |
| if (!pfield[ndelimiters]) { |
| if (ndelimiters == start) { |
| //put delimiter. |
| if (nprinted_fields++ > 0) xputc(delimiter[0]); |
| if (field) fputs(field, stdout); |
| //make sure this field won't print again. |
| pfield[ndelimiters] = (char) 0x23; //put some char at this position. |
| } |
| } |
| start++; |
| if ((temp_node->end < 0) || !buff) break; |
| } while(start <= temp_node->end); |
| temp_node = temp_node->next; |
| cpos++; |
| } |
| } |
| xputc('\n'); |
| } |
| } |
| |
| // perform cut operation char or byte. |
| static void do_bccut(int fd) |
| { |
| char *buff; |
| |
| while ((buff = get_line(fd)) != NULL) { |
| unsigned cpos = 0; |
| int buffln = strlen(buff); |
| char *pfield = xzalloc(buffln + 1); |
| struct slist *temp_node = TT.slist_head; |
| |
| if (temp_node != NULL) { |
| while (cpos < TT.nelem) { |
| int start; |
| |
| if (!temp_node) break; |
| start = temp_node->start; |
| while (start < buffln) { |
| //to avoid duplicate field printing. |
| if (pfield[start]) { |
| if (++start <= temp_node->end) continue; |
| temp_node = temp_node->next; |
| break; |
| } else { |
| //make sure this field won't print again. |
| pfield[start] = (char) 0x23; //put some char at this position. |
| xputc(buff[start]); |
| } |
| if (++start > temp_node->end) { |
| temp_node = temp_node->next; |
| break; |
| } |
| } |
| cpos++; |
| } |
| xputc('\n'); |
| } |
| free(pfield); |
| pfield = NULL; |
| } |
| } |
| |
| void cut_main(void) |
| { |
| char delimiter = '\t'; //default delimiter. |
| char *list; |
| |
| TT.nelem = 0; |
| TT.slist_head = NULL; |
| |
| //Get list and assign the function. |
| if (toys.optflags & FLAG_f) { |
| list = TT.flist; |
| TT.do_cut = do_fcut; |
| } else if (toys.optflags & FLAG_c) { |
| list = TT.clist; |
| TT.do_cut = do_bccut; |
| } else { |
| list = TT.blist; |
| TT.do_cut = do_bccut; |
| } |
| |
| if (toys.optflags & FLAG_d) { |
| //delimiter must be 1 char. |
| if(TT.delim[0] && TT.delim[1]) |
| perror_exit("the delimiter must be a single character"); |
| delimiter = TT.delim[0]; |
| } |
| |
| if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) { |
| TT.delim = xzalloc(2); |
| TT.delim[0] = delimiter; |
| } |
| |
| //when field is not specified, cutting has some special handling. |
| if (!(toys.optflags & FLAG_f)) { |
| if (toys.optflags & FLAG_s) |
| perror_exit("suppressing non-delimited lines operating on fields"); |
| if (delimiter != '\t') |
| perror_exit("an input delimiter may be specified only when operating on fields"); |
| } |
| |
| parse_list(list); |
| get_data(); |
| if (!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) { |
| free(TT.delim); |
| TT.delim = NULL; |
| } |
| llist_traverse(TT.slist_head, free); |
| } |