| /* sort.c - put input lines into order |
| * |
| * Copyright 2004, 2008 Rob Landley <rob@landley.net> |
| * |
| * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html |
| * |
| * Deviations from POSIX: Lots. |
| * We invented -x |
| |
| USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")"S:T:m" "o:k*t:" "xVbMcszdfirun", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2))) |
| |
| config SORT |
| bool "sort" |
| default y |
| help |
| usage: sort [-runbcdfiMsz] [FILE...] [-k#[,#[x]] [-t X]] [-o FILE] |
| |
| Sort all lines of text from input files (or stdin) to stdout. |
| |
| -r Reverse |
| -u Unique lines only |
| -n Numeric order (instead of alphabetical) |
| -b Ignore leading blanks (or trailing blanks in second part of key) |
| -c Check whether input is sorted |
| -d Dictionary order (use alphanumeric and whitespace chars only) |
| -f Force uppercase (case insensitive sort) |
| -i Ignore nonprinting characters |
| -M Month sort (jan, feb, etc) |
| -x Hexadecimal numerical sort |
| -s Skip fallback sort (only sort with keys) |
| -z Zero (null) terminated lines |
| -k Sort by "key" (see below) |
| -t Use a key separator other than whitespace |
| -o Output to FILE instead of stdout |
| -V Version numbers (name-1.234-rc6.5b.tgz) |
| |
| Sorting by key looks at a subset of the words on each line. -k2 uses the |
| second word to the end of the line, -k2,2 looks at only the second word, |
| -k2,4 looks from the start of the second to the end of the fourth word. |
| -k2.4,5 starts from the fourth character of the second word, to the end |
| of the fifth word. Specifying multiple keys uses the later keys as tie |
| breakers, in order. A type specifier appended to a sort key (such as -2,2n) |
| applies only to sorting that key. |
| |
| config SORT_FLOAT |
| bool |
| default y |
| depends on TOYBOX_FLOAT |
| help |
| usage: sort [-g] |
| |
| -g General numeric sort (double precision with nan and inf) |
| */ |
| |
| #define FOR_sort |
| #include "toys.h" |
| |
| GLOBALS( |
| char *t; |
| struct arg_list *k; |
| char *o, *T, S; |
| |
| void *key_list; |
| int linecount; |
| char **lines, *name; |
| ) |
| |
| // The sort types are n, g, and M. |
| // u, c, s, and z apply to top level only, not to keys. |
| // b at top level implies bb. |
| // The remaining options can be applied to search keys. |
| |
| #define FLAG_bb (1<<31) // Ignore trailing blanks |
| |
| struct sort_key |
| { |
| struct sort_key *next_key; // linked list |
| unsigned range[4]; // start word, start char, end word, end char |
| int flags; |
| }; |
| |
| // Copy of the part of this string corresponding to a key/flags. |
| |
| static char *get_key_data(char *str, struct sort_key *key, int flags) |
| { |
| int start = 0, end, len, i, j; |
| |
| // Special case whole string, so we don't have to make a copy |
| |
| if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3] |
| && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str; |
| |
| // Find start of key on first pass, end on second pass |
| |
| len = strlen(str); |
| for (j=0; j<2; j++) { |
| if (!key->range[2*j]) end=len; |
| |
| // Loop through fields |
| else { |
| end = 0; |
| for (i = 1; i < key->range[2*j]+j; i++) { |
| |
| // Skip leading blanks |
| if (str[end] && !TT.t) while (isspace(str[end])) end++; |
| |
| // Skip body of key |
| for (; str[end]; end++) { |
| if (TT.t) { |
| if (str[end]==*TT.t) { |
| end++; |
| break; |
| } |
| } else if (isspace(str[end])) break; |
| } |
| } |
| } |
| if (!j) start=end; |
| } |
| |
| // Key with explicit separator starts after the separator |
| if (TT.t && str[start]==*TT.t) start++; |
| |
| // Strip leading and trailing whitespace if necessary |
| if ((flags&FLAG_b) || (!TT.t && !key->range[3])) |
| while (isspace(str[start])) start++; |
| if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--; |
| |
| // Handle offsets on start and end |
| if (key->range[3]) { |
| end += key->range[3]-1; |
| if (end>len) end=len; |
| } |
| if (key->range[1]) { |
| start += key->range[1]-1; |
| if (start>len) start=len; |
| } |
| |
| // Make the copy |
| if (end<start) end = start; |
| str = xstrndup(str+start, end-start); |
| |
| // Handle -d |
| if (flags&FLAG_d) { |
| for (start = end = 0; str[end]; end++) |
| if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end]; |
| str[start] = 0; |
| } |
| |
| // Handle -i |
| if (flags&FLAG_i) { |
| for (start = end = 0; str[end]; end++) |
| if (isprint(str[end])) str[start++] = str[end]; |
| str[start] = 0; |
| } |
| |
| return str; |
| } |
| |
| // append a sort_key to key_list. |
| |
| static struct sort_key *add_key(void) |
| { |
| void **stupid_compiler = &TT.key_list; |
| struct sort_key **pkey = (struct sort_key **)stupid_compiler; |
| |
| while (*pkey) pkey = &((*pkey)->next_key); |
| return *pkey = xzalloc(sizeof(struct sort_key)); |
| } |
| |
| // Perform actual comparison |
| static int compare_values(int flags, char *x, char *y) |
| { |
| if (CFG_SORT_FLOAT && (flags & FLAG_g)) { |
| char *xx,*yy; |
| double dx = strtod(x,&xx), dy = strtod(y,&yy); |
| int xinf, yinf; |
| |
| // not numbers < NaN < -infinity < numbers < +infinity |
| |
| if (x==xx) return y==yy ? 0 : -1; |
| if (y==yy) return 1; |
| |
| // Check for isnan |
| if (dx!=dx) return (dy!=dy) ? 0 : -1; |
| if (dy!=dy) return 1; |
| |
| // Check for infinity. (Could underflow, but avoids needing libm.) |
| xinf = (1.0/dx == 0.0); |
| yinf = (1.0/dy == 0.0); |
| if (xinf) { |
| if(dx<0) return (yinf && dy<0) ? 0 : -1; |
| return (yinf && dy>0) ? 0 : 1; |
| } |
| if (yinf) return dy<0 ? 1 : -1; |
| |
| return dx>dy ? 1 : (dx<dy ? -1 : 0); |
| } else if (flags & FLAG_M) { |
| struct tm thyme; |
| int dx; |
| char *xx,*yy; |
| |
| xx = strptime(x,"%b",&thyme); |
| dx = thyme.tm_mon; |
| yy = strptime(y,"%b",&thyme); |
| if (!xx) return !yy ? 0 : -1; |
| else if (!yy) return 1; |
| else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon; |
| |
| } else if (flags & FLAG_x) return strtol(x, NULL, 16)-strtol(y, NULL, 16); |
| else if (flags & FLAG_V) { |
| while (*x && *y) { |
| while (*x && *x == *y) x++, y++; |
| if (isdigit(*x) && isdigit(*y)) { |
| long long xx = strtoll(x, &x, 10), yy = strtoll(y, &y, 10); |
| |
| if (xx<yy) return -1; |
| if (xx>yy) return 1; |
| } else { |
| char xx = *x ? *x : x[-1], yy = *y ? *y : y[-1]; |
| |
| // -rc/-pre hack so abc-123 > abc-123-rc1 (other way already - < 0-9) |
| if (xx != yy) { |
| if (xx<yy && !strstart(&y, "-rc") && !strstart(&y, "-pre")) return -1; |
| else return 1; |
| } |
| } |
| } |
| return *x ? !!*y : -1; |
| } else if (flags & FLAG_n) { |
| // Full floating point version of -n |
| if (CFG_SORT_FLOAT) { |
| double dx = atof(x), dy = atof(y); |
| |
| return dx>dy ? 1 : (dx<dy ? -1 : 0); |
| // Integer version of -n for tiny systems |
| } else return atoi(x)-atoi(y); |
| |
| // Ascii sort |
| } else return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y); |
| } |
| |
| // Callback from qsort(): Iterate through key_list and perform comparisons. |
| static int compare_keys(const void *xarg, const void *yarg) |
| { |
| int flags = toys.optflags, retval = 0; |
| char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg; |
| struct sort_key *key; |
| |
| for (key=(struct sort_key *)TT.key_list; !retval && key; key = key->next_key){ |
| flags = key->flags ? key->flags : toys.optflags; |
| |
| // Chop out and modify key chunks, handling -dfib |
| |
| x = get_key_data(xx, key, flags); |
| y = get_key_data(yy, key, flags); |
| |
| retval = compare_values(flags, x, y); |
| |
| // Free the copies get_key_data() made. |
| |
| if (x != xx) free(x); |
| if (y != yy) free(y); |
| |
| if (retval) break; |
| } |
| |
| // Perform fallback sort if necessary (always case insensitive, no -f, |
| // the point is to get a stable order even for -f sorts) |
| if (!retval && !FLAG(s)) { |
| flags = toys.optflags; |
| retval = strcmp(xx, yy); |
| } |
| |
| return retval * ((flags&FLAG_r) ? -1 : 1); |
| } |
| |
| // Read each line from file, appending to a big array. |
| static void sort_lines(char **pline, long len) |
| { |
| char *line; |
| |
| if (!pline) return; |
| line = *pline; |
| if (!FLAG(z) && len && line[len-1]=='\n') line[--len] = 0; |
| *pline = 0; |
| |
| // handle -c here so we don't allocate more memory than necessary. |
| if (FLAG(c)) { |
| int j = FLAG(u) ? -1 : 0; |
| |
| if (TT.lines && compare_keys((void *)&TT.lines, &line)>j) |
| error_exit("%s: Check line %d\n", TT.name, TT.linecount); |
| free(TT.lines); |
| TT.lines = (void *)line; |
| } else { |
| if (!(TT.linecount&63)) |
| TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64)); |
| TT.lines[TT.linecount] = line; |
| } |
| TT.linecount++; |
| } |
| |
| // Callback from loopfiles to handle input files. |
| static void sort_read(int fd, char *name) |
| { |
| TT.name = name; |
| do_lines(fd, FLAG(z) ? '\0' : '\n', sort_lines); |
| } |
| |
| void sort_main(void) |
| { |
| int idx, fd = 1; |
| |
| // Parse -k sort keys. |
| if (TT.k) { |
| struct arg_list *arg; |
| |
| for (arg = TT.k; arg; arg = arg->next) { |
| struct sort_key *key = add_key(); |
| char *temp; |
| int flag; |
| |
| idx = 0; |
| temp = arg->arg; |
| while (*temp) { |
| // Start of range |
| key->range[2*idx] = (unsigned)strtol(temp, &temp, 10); |
| if (*temp=='.') |
| key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10); |
| |
| // Handle flags appended to a key type. |
| for (;*temp;temp++) { |
| char *temp2, *optlist; |
| |
| // Note that a second comma becomes an "Unknown key" error. |
| |
| if (*temp==',' && !idx++) { |
| temp++; |
| break; |
| } |
| |
| // Which flag is this? |
| |
| optlist = toys.which->options; |
| temp2 = strchr(optlist, *temp); |
| flag = (1<<(optlist-temp2+strlen(optlist)-1)); |
| |
| // Was it a flag that can apply to a key? |
| |
| if (!temp2 || flag>FLAG_x |
| || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z))) |
| { |
| toys.exitval = 2; |
| error_exit("Unknown key option."); |
| } |
| // b after , means strip _trailing_ space, not leading. |
| if (idx && flag==FLAG_b) flag = FLAG_bb; |
| key->flags |= flag; |
| } |
| } |
| } |
| } |
| |
| // global b flag strips both leading and trailing spaces |
| if (FLAG(b)) toys.optflags |= FLAG_bb; |
| |
| // If no keys, perform alphabetic sort over the whole line. |
| if (!TT.key_list) add_key()->range[0] = 1; |
| |
| // Open input files and read data, populating TT.lines[TT.linecount] |
| loopfiles(toys.optargs, sort_read); |
| |
| // The compare (-c) logic was handled in sort_read(), |
| // so if we got here, we're done. |
| if (FLAG(c)) goto exit_now; |
| |
| // Perform the actual sort |
| qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys); |
| |
| // handle unique (-u) |
| if (FLAG(u)) { |
| int jdx; |
| |
| for (jdx=0, idx=1; idx<TT.linecount; idx++) { |
| if (!compare_keys(&TT.lines[jdx], &TT.lines[idx])) |
| free(TT.lines[idx]); |
| else TT.lines[++jdx] = TT.lines[idx]; |
| } |
| if (TT.linecount) TT.linecount = jdx+1; |
| } |
| |
| // Open output file if necessary. We can't do this until we've finished |
| // reading in case the output file is one of the input files. |
| if (TT.o) fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666); |
| |
| // Output result |
| for (idx = 0; idx<TT.linecount; idx++) { |
| char *s = TT.lines[idx]; |
| unsigned i = strlen(s); |
| |
| if (!FLAG(z)) s[i] = '\n'; |
| xwrite(fd, s, i+1); |
| if (CFG_TOYBOX_FREE) free(s); |
| } |
| |
| exit_now: |
| if (CFG_TOYBOX_FREE) { |
| if (fd != 1) close(fd); |
| free(TT.lines); |
| } |
| } |