blob: af4a1d9a8aefcae2213668be3b15452bfdd03d6f [file] [log] [blame]
/* cut.c - Cut from a file.
*
* Copyright 2012 Ranjan Kumar <ranjankumar.bth@gmail.com>, Kyungwan Han <asura321@gamil.com>
*
* http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html
*
USE_CUT(NEWTOY(cut, "b:c:f:d:sn", TOYFLAG_BIN))
config CUT
bool "cut"
default y
help
usage: cut OPTION... [FILE]...
Print selected parts of lines from each FILE to standard output.
-b LIST select only these bytes from LIST.
-c LIST select only these characters from LIST.
-f LIST select only these fields.
-d DELIM use DELIM instead of TAB for field delimiter.
-s do not print lines not containing delimiters.
-n don't split multibyte characters (Ignored).
*/
#define FOR_cut
#include "toys.h"
typedef struct _slist {
int start_position;
int end_position;
struct _slist *next;
}SLIST;
GLOBALS(
char *delim;
char *flist;
char *clist;
char *blist;
struct _slist *slist_head;
unsigned nelem;
)
#define BEGINOFLINE 0
#define ENDOFLINE INT_MAX
#define NORANGE -1
static int get_user_options(void);
void (*do_cut)(int);
static void do_fcut(int fd);
static void do_bccut(int fd);
static void free_list(void);
/*
* add items in the slist.
*/
static void add_to_list(int start, int end)
{
SLIST *current, *head_ref, *temp1_node;
head_ref = TT.slist_head;
temp1_node = (SLIST *)xzalloc(sizeof(SLIST));
temp1_node->start_position = start;
temp1_node->end_position = end;
temp1_node->next = NULL;
/* Special case for the head end */
if (head_ref == NULL || (head_ref)->start_position >= start) {
temp1_node->next = head_ref;
head_ref = temp1_node;
}
else {
/* Locate the node before the point of insertion */
current = head_ref;
while (current->next!=NULL && current->next->start_position < temp1_node->start_position)
current = current->next;
temp1_node->next = current->next;
current->next = temp1_node;
}
TT.slist_head = head_ref;
return;
}
/*
* parse list and add to slist.
*/
static void parse_list(char *list)
{
char *ctoken, *dtoken;
int start = 0, end = 0;
while((ctoken = strsep(&list, ",")) != NULL) {
if(!ctoken[0]) continue;
//Get start position.
dtoken = strsep(&ctoken, "-");
if(!dtoken[0]) start = BEGINOFLINE;
else {
start = get_int_value(dtoken, 0, INT_MAX);
start = (start?(start-1):start);
}
//Get end position.
if(ctoken == NULL) end = NORANGE; //case e.g. 1,2,3
else if(!ctoken[0]) end = ENDOFLINE; //case e.g. N-
else {//case e.g. N-M
end = get_int_value(ctoken, 0, INT_MAX);
end = (end?end:ENDOFLINE);
end--;
if(end == start) end = NORANGE;
}
add_to_list(start, end);
TT.nelem++;
}
//if list is missing in command line.
if(TT.nelem == 0) error_exit("missing positions list:");
return;
}
/*
* retrive data from the file/s.
*/
static void get_data(void)
{
char **argv = toys.optargs; //file name.
toys.exitval = EXIT_SUCCESS;
if(!*argv) do_cut(0); //for stdin
else {
for(; *argv; ++argv) {
if(strcmp(*argv, "-") == 0) do_cut(0); //for stdin
else {
int fd = open(*argv, O_RDONLY, 0);
if(fd < 0) {//if file not present then continue with other files.
perror_msg(*argv);
toys.exitval = EXIT_FAILURE;
continue;
}
do_cut(fd);
xclose(fd);
}
}
}
return;
}
/*
* cut main function.
*/
void cut_main(void)
{
char delimiter = '\t'; //default delimiter.
int num_of_options = 0;
char *list;
TT.nelem = 0;
TT.slist_head = NULL;
//verify the number of options provided by user.
num_of_options = get_user_options();
if(num_of_options == 0) error_exit("specify a list of bytes, characters, or fields");
else if(num_of_options > 1) error_exit("only one type of list may be specified");
//Get list and assign the function.
if(toys.optflags & FLAG_f) {
list = TT.flist;
do_cut = do_fcut;
}
else if(toys.optflags & FLAG_c) {
list = TT.clist;
do_cut = do_bccut;
}
else {
list = TT.blist;
do_cut = do_bccut;
}
if(toys.optflags & FLAG_d) {
//delimiter must be 1 char.
if(TT.delim[0] && TT.delim[1]) perror_exit("the delimiter must be a single character");
delimiter = TT.delim[0];
}
if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
TT.delim = (char *)xzalloc(2);
TT.delim[0] = delimiter;
}
//when field is not specified, cutting has some special handling.
if(!(toys.optflags & FLAG_f)) {
if(toys.optflags & FLAG_s) perror_exit("suppressing non-delimited lines operating on fields");
if(delimiter != '\t') perror_exit("an input delimiter may be specified only when operating on fields");
}
parse_list(list);
get_data();
if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
free(TT.delim);
TT.delim = NULL;
}
free_list();
return;
}
/*
* perform cut operation on the given delimiter.
*/
static void do_fcut(int fd)
{
char *buff;
char *delimiter = TT.delim;
while((buff = get_line(fd)) != NULL) {
//does line have any delimiter?.
if(strrchr(buff, (int)delimiter[0]) == NULL) {
//if not then print whole line and move to next line.
if(!(toys.optflags & FLAG_s)) xputs(buff);
continue;
}
unsigned cpos = 0;
int start, ndelimiters = -1;
int nprinted_fields = 0;
char *pfield = xzalloc(strlen(buff) + 1);
SLIST *temp_node = TT.slist_head;
if(temp_node != NULL) {
//process list on each line.
while(cpos < TT.nelem && buff) {
if(!temp_node) break;
start = temp_node->start_position;
do {
char *field = NULL;
//count number of delimeters per line.
while(buff) {
if(ndelimiters < start) {
ndelimiters++;
field = strsep(&buff, delimiter);
}
else break;
}
//print field (if not yet printed).
if(!pfield[ndelimiters]) {
if(ndelimiters == start) {
//put delimiter.
if(nprinted_fields++ > 0) xputc(delimiter[0]);
if(field) fputs(field, stdout);
//make sure this field won't print again.
pfield[ndelimiters] = (char) 0x23; //put some char at this position.
}
}
start++;
if((temp_node->end_position == NORANGE) || (!buff)) break;
}while(start <= temp_node->end_position);
temp_node = temp_node->next;
cpos++;
}
}
xputc('\n');
free(pfield);
pfield = NULL;
}//End of while loop.
return;
}
/*
* perform cut operation char or byte.
*/
static void do_bccut(int fd)
{
char *buff;
while((buff = get_line(fd)) != NULL) {
unsigned cpos = 0;
int buffln = strlen(buff);
char *pfield = xzalloc(buffln + 1);
SLIST *temp_node = TT.slist_head;
if(temp_node != NULL) {
while(cpos < TT.nelem) {
int start;
if(!temp_node) break;
start = temp_node->start_position;
while(start < buffln) {
//to avoid duplicate field printing.
if(pfield[start]) {
if(++start <= temp_node->end_position) continue;
temp_node = temp_node->next;
break;
}
else {
//make sure this field won't print again.
pfield[start] = (char) 0x23; //put some char at this position.
xputc(buff[start]);
}
if(++start > temp_node->end_position) {
temp_node = temp_node->next;
break;
}
}
cpos++;
}
xputc('\n');
}
free(pfield);
pfield = NULL;
}
return;
}
/*
* used to verify "c", "b" and "f" options are present in command line.
* As cut command can only except any of them at a time.
*/
static int get_user_options(void)
{
int i = 3, flag = 0;
for(; i<6; i++) {//verify 3rd, 4th and 5th bit is set.
int mask = 1;
mask = mask << i;
if(toys.optflags & mask) flag++;
}
return flag;
}
/*
* free the slist.
*/
static void free_list(void)
{
SLIST *temp;
while(TT.slist_head != NULL) {
temp = TT.slist_head->next;
free(TT.slist_head);
TT.slist_head = temp;
}
return;
}