blob: 2b0f5c08d8ad46762fa67863bec484adb734e0d6 [file] [log] [blame]
/* sort.c - put input lines into order
*
* Copyright 2004, 2008 Rob Landley <rob@landley.net>
*
* See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
*
* Deviations from POSIX: Lots.
* We invented -x
USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")USE_SORT_BIG("S:T:m" "o:k*t:xbMcszdfi") "run", TOYFLAG_USR|TOYFLAG_BIN))
config SORT
bool "sort"
default y
help
usage: sort [-run] [FILE...]
Sort all lines of text from input files (or stdin) to stdout.
-r reverse
-u unique lines only
-n numeric order (instead of alphabetical)
config SORT_BIG
bool "SuSv3 options (Support -ktcsbdfiozM)"
default y
depends on SORT
help
usage: sort [-bcdfiMsz] [-k#[,#[x]] [-t X]] [-o FILE]
-b ignore leading blanks (or trailing blanks in second part of key)
-c check whether input is sorted
-d dictionary order (use alphanumeric and whitespace chars only)
-f force uppercase (case insensitive sort)
-i ignore nonprinting characters
-M month sort (jan, feb, etc).
-x Hexadecimal numerical sort
-s skip fallback sort (only sort with keys)
-z zero (null) terminated lines
-k sort by "key" (see below)
-t use a key separator other than whitespace
-o output to FILE instead of stdout
Sorting by key looks at a subset of the words on each line. -k2
uses the second word to the end of the line, -k2,2 looks at only
the second word, -k2,4 looks from the start of the second to the end
of the fourth word. Specifying multiple keys uses the later keys as
tie breakers, in order. A type specifier appended to a sort key
(such as -2,2n) applies only to sorting that key.
config SORT_FLOAT
bool
default y
depends on SORT_BIG && TOYBOX_FLOAT
help
usage: sort [-g]
-g general numeric sort (double precision with nan and inf)
*/
#define FOR_sort
#include "toys.h"
GLOBALS(
char *key_separator;
struct arg_list *raw_keys;
char *outfile;
char *ignore1, ignore2; // GNU compatability NOPs for -S and -T.
void *key_list;
int linecount;
char **lines;
)
// The sort types are n, g, and M.
// u, c, s, and z apply to top level only, not to keys.
// b at top level implies bb.
// The remaining options can be applied to search keys.
#define FLAG_bb (1<<31) // Ignore trailing blanks
struct sort_key
{
struct sort_key *next_key; // linked list
unsigned range[4]; // start word, start char, end word, end char
int flags;
};
// Copy of the part of this string corresponding to a key/flags.
static char *get_key_data(char *str, struct sort_key *key, int flags)
{
int start=0, end, len, i, j;
// Special case whole string, so we don't have to make a copy
if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
&& !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
// Find start of key on first pass, end on second pass
len = strlen(str);
for (j=0; j<2; j++) {
if (!key->range[2*j]) end=len;
// Loop through fields
else {
end=0;
for (i=1; i < key->range[2*j]+j; i++) {
// Skip leading blanks
if (str[end] && !TT.key_separator)
while (isspace(str[end])) end++;
// Skip body of key
for (; str[end]; end++) {
if (TT.key_separator) {
if (str[end]==*TT.key_separator) {
end++;
break;
}
} else if (isspace(str[end])) break;
}
}
}
if (!j) start=end;
}
// Key with explicit separator starts after the separator
if (TT.key_separator && str[start]==*TT.key_separator) start++;
// Strip leading and trailing whitespace if necessary
if (flags&FLAG_b) while (isspace(str[start])) start++;
if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
// Handle offsets on start and end
if (key->range[3]) {
end += key->range[3]-1;
if (end>len) end=len;
}
if (key->range[1]) {
start += key->range[1]-1;
if (start>len) start=len;
}
// Make the copy
if (end<start) end=start;
str = xstrndup(str+start, end-start);
// Handle -d
if (flags&FLAG_d) {
for (start = end = 0; str[end]; end++)
if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
str[start] = 0;
}
// Handle -i
if (flags&FLAG_i) {
for (start = end = 0; str[end]; end++)
if (isprint(str[end])) str[start++] = str[end];
str[start] = 0;
}
return str;
}
// append a sort_key to key_list.
static struct sort_key *add_key(void)
{
void **stupid_compiler = &TT.key_list;
struct sort_key **pkey = (struct sort_key **)stupid_compiler;
while (*pkey) pkey = &((*pkey)->next_key);
return *pkey = xzalloc(sizeof(struct sort_key));
}
// Perform actual comparison
static int compare_values(int flags, char *x, char *y)
{
int ff = flags & (FLAG_n|FLAG_g|FLAG_M|FLAG_x);
// Ascii sort
if (!ff) return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
if (CFG_SORT_FLOAT && ff == FLAG_g) {
char *xx,*yy;
double dx = strtod(x,&xx), dy = strtod(y,&yy);
int xinf, yinf;
// not numbers < NaN < -infinity < numbers < +infinity
if (x==xx) return y==yy ? 0 : -1;
if (y==yy) return 1;
// Check for isnan
if (dx!=dx) return (dy!=dy) ? 0 : -1;
if (dy!=dy) return 1;
// Check for infinity. (Could underflow, but avoids needing libm.)
xinf = (1.0/dx == 0.0);
yinf = (1.0/dy == 0.0);
if (xinf) {
if(dx<0) return (yinf && dy<0) ? 0 : -1;
return (yinf && dy>0) ? 0 : 1;
}
if (yinf) return dy<0 ? 1 : -1;
return dx>dy ? 1 : (dx<dy ? -1 : 0);
} else if (CFG_SORT_BIG && ff == FLAG_M) {
struct tm thyme;
int dx;
char *xx,*yy;
xx = strptime(x,"%b",&thyme);
dx = thyme.tm_mon;
yy = strptime(y,"%b",&thyme);
if (!xx) return !yy ? 0 : -1;
else if (!yy) return 1;
else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
} else if (CFG_SORT_BIG && ff == FLAG_x) {
return strtol(x, NULL, 16)-strtol(y, NULL, 16);
// This has to be ff == FLAG_n
} else {
// Full floating point version of -n
if (CFG_SORT_FLOAT) {
double dx = atof(x), dy = atof(y);
return dx>dy ? 1 : (dx<dy ? -1 : 0);
// Integer version of -n for tiny systems
} else return atoi(x)-atoi(y);
}
}
// Callback from qsort(): Iterate through key_list and perform comparisons.
static int compare_keys(const void *xarg, const void *yarg)
{
int flags = toys.optflags, retval = 0;
char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
struct sort_key *key;
if (CFG_SORT_BIG) {
for (key=(struct sort_key *)TT.key_list; !retval && key;
key = key->next_key)
{
flags = key->flags ? key->flags : toys.optflags;
// Chop out and modify key chunks, handling -dfib
x = get_key_data(xx, key, flags);
y = get_key_data(yy, key, flags);
retval = compare_values(flags, x, y);
// Free the copies get_key_data() made.
if (x != xx) free(x);
if (y != yy) free(y);
if (retval) break;
}
} else retval = compare_values(flags, xx, yy);
// Perform fallback sort if necessary (always case insensitive, no -f,
// the point is to get a stable order even for -f sorts)
if (!retval && !(CFG_SORT_BIG && (toys.optflags&FLAG_s))) {
flags = toys.optflags;
retval = strcmp(xx, yy);
}
return retval * ((flags&FLAG_r) ? -1 : 1);
}
// Callback from loopfiles to handle input files.
static void sort_read(int fd, char *name)
{
// Read each line from file, appending to a big array.
for (;;) {
char * line = (CFG_SORT_BIG && (toys.optflags&FLAG_z))
? get_rawline(fd, NULL, 0) : get_line(fd);
if (!line) break;
// handle -c here so we don't allocate more memory than necessary.
if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) {
int j = (toys.optflags&FLAG_u) ? -1 : 0;
if (TT.lines && compare_keys((void *)&TT.lines, &line)>j)
error_exit("%s: Check line %d\n", name, TT.linecount);
free(TT.lines);
TT.lines = (char **)line;
} else {
if (!(TT.linecount&63))
TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
TT.lines[TT.linecount] = line;
}
TT.linecount++;
}
}
void sort_main(void)
{
int idx, fd = 1;
// Open output file if necessary.
if (CFG_SORT_BIG && TT.outfile)
fd = xcreate(TT.outfile, O_CREAT|O_TRUNC|O_WRONLY, 0666);
// Parse -k sort keys.
if (CFG_SORT_BIG && TT.raw_keys) {
struct arg_list *arg;
for (arg = TT.raw_keys; arg; arg = arg->next) {
struct sort_key *key = add_key();
char *temp;
int flag;
idx = 0;
temp = arg->arg;
while (*temp) {
// Start of range
key->range[2*idx] = (unsigned)strtol(temp, &temp, 10);
if (*temp=='.')
key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10);
// Handle flags appended to a key type.
for (;*temp;temp++) {
char *temp2, *optlist;
// Note that a second comma becomes an "Unknown key" error.
if (*temp==',' && !idx++) {
temp++;
break;
}
// Which flag is this?
optlist = toys.which->options;
temp2 = strchr(optlist, *temp);
flag = (1<<(optlist-temp2+strlen(optlist)-1));
// Was it a flag that can apply to a key?
if (!temp2 || flag>FLAG_b
|| (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
{
error_exit("Unknown key option.");
}
// b after , means strip _trailing_ space, not leading.
if (idx && flag==FLAG_b) flag = FLAG_bb;
key->flags |= flag;
}
}
}
}
// global b flag strips both leading and trailing spaces
if (toys.optflags&FLAG_b) toys.optflags |= FLAG_bb;
// If no keys, perform alphabetic sort over the whole line.
if (CFG_SORT_BIG && !TT.key_list) add_key()->range[0] = 1;
// Open input files and read data, populating TT.lines[TT.linecount]
loopfiles(toys.optargs, sort_read);
// The compare (-c) logic was handled in sort_read(),
// so if we got here, we're done.
if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) goto exit_now;
// Perform the actual sort
qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
// handle unique (-u)
if (toys.optflags&FLAG_u) {
int jdx;
for (jdx=0, idx=1; idx<TT.linecount; idx++) {
if (!compare_keys(&TT.lines[jdx], &TT.lines[idx]))
free(TT.lines[idx]);
else TT.lines[++jdx] = TT.lines[idx];
}
if (TT.linecount) TT.linecount = jdx+1;
}
// Output result
for (idx = 0; idx<TT.linecount; idx++) {
char *s = TT.lines[idx];
unsigned i = strlen(s);
if (!(toys.optflags&FLAG_z)) s[i] = '\n';
xwrite(fd, s, i+1);
if (CFG_TOYBOX_FREE) free(s);
}
exit_now:
if (CFG_TOYBOX_FREE) {
if (fd != 1) close(fd);
free(TT.lines);
}
}