/*
* Author: Dmitri Tikhonov <dxt22@njit.edu>
* Date: April 16, 2006
* mysort.c
* CIS 610-102
*
* This program implements MSD radix sort to sort a set of arbitrary length
* strings.
*
* Note: getline source is stolen. This is a library on Linux. I will not
* re-implement stuff just for the sake of it.
*/
#define THISISLINUX 0
#define NBUCKETS 256
#define CHARVAL(x) (x)
#define STRVAL(str, index) (str.length <= index ? 0 : CHARVAL(str.string[index]))
#if THISISLINUX
/* This gives us access to getline(3) to make our life easier */
#define _GNU_SOURCE
#endif
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
struct str {
unsigned char *string;
size_t length;
};
typedef struct str str_t;
struct bkt {
str_t str;
struct bkt *next;
};
typedef struct bkt bkt_t;
struct buckets {
bkt_t *bucket[NBUCKETS];
};
typedef struct buckets buckets_t;
#define INIT_BUCKETS(x) do { \
int i; \
for (i = 0; i < NBUCKETS; ++i) x[i] = NULL; \
} while (0)
struct strset {
str_t *strings;
int nstrings; /* Number of pointers used */
int nalloc; /* Number of pointers allocated */
};
typedef struct strset strset_t;
#if !THISISLINUX
/* OK, this is stolen from a google group somewhere. It's not my fault
* Sun boxes don't come with GNU getline libraries */
static ssize_t
getline (char **lineptr, size_t *n, FILE *stream)
{
#define EXPAND_CHUNK 16
int n_read = 0;
char *line = *lineptr;
if (lineptr == NULL) return -1;
if (n == NULL) return -1;
if (stream == NULL) return -1;
if (!(*lineptr != NULL || *n == 0)) return -1;
#ifdef HAVE_FLOCKFILE
flockfile (stream);
#endif
while (1)
{
int c;
#ifdef HAVE_FLOCKFILE
c = getc_unlocked (stream);
#else
c = getc (stream);
#endif
if (c == EOF)
{
if (n_read > 0)
line[n_read] = '\0';
break;
}
if (n_read + 2 >= *n)
{
size_t new_size;
if (*n == 0)
new_size = 16;
else
new_size = *n * 2;
if (*n >= new_size) /* Overflowed size_t */
line = NULL;
else
line = *lineptr ? realloc (*lineptr, new_size) : malloc (new_size);
if (line)
{
*lineptr = line;
*n = new_size;
}
else
{
if (*n > 0)
{
(*lineptr)[*n - 1] = '\0';
n_read = *n - 2;
}
break;
}
}
line[n_read] = c;
n_read++;
if (c == '\n')
{
line[n_read] = '\0';
break;
}
}
#ifdef HAVE_FLOCKFILE
funlockfile (stream);
#endif
return n_read - 1;
}
#endif
/*
* Read strings from the stream and populate strset.
* Returns the number of strings read.
*/
int
read_strings (FILE *stream, strset_t *strset)
{
int nstr = 0; /* Return value */
size_t len;
ssize_t nread;
char *string;
for (string = NULL, len = 0;
-1 != (nread = getline(&string, &len, stream));
string = NULL, len = 0)
{
if ('\n' == string[nread - THISISLINUX]) {
/* We don't care for newlines */
string[nread - THISISLINUX] = '\0';
--len;
}
#if 0
if (len <= 0) { /* Don't care for empty strings, either */
free(string);
continue;
}
#endif
if (strset->nstrings == strset->nalloc) { /* Increase buffer */
strset->nalloc *= 2;
strset->strings = realloc(strset->strings,
strset->nalloc * sizeof(str_t));
}
strset->strings[strset->nstrings].string = (unsigned char *)string;
strset->strings[strset->nstrings].length = len;
++strset->nstrings;
++nstr;
}
return nstr;
}
/*
* Sort string set.
*
* Arguments:
* set pointer to the data structure holding all our strings
* start starting index
* end ending index (inclusive)
* idx which character of the string to use for radix sort. Since this
* is MSD, we start with 0, then progress to 1, 2, etc.
*/
void
sort_strset (strset_t *set, int start, int end, int idx)
{
buckets_t buckets;
int i, j;
#if DEBUG
fprintf(stderr, "INDEX: %d; start: %d; end: %d\n", idx, start, end);
#endif
bzero((void *) &buckets, sizeof(buckets));
for (i = start; i <= end; ++i) {
bkt_t *bkt = malloc(sizeof(bkt_t));
int index = STRVAL(set->strings[i], idx);
#if DEBUG
fprintf(stderr, "putting string %s into bucket %d\n",
set->strings[i].string, index);
#endif
bkt->str = set->strings[i];
bkt->next = buckets.bucket[index];
buckets.bucket[index] = bkt;
}
/*
* For each bucket, stick its contents into their places in strset and,
* if needed, immediately recurse to sort this subset.
*/
for (i = 0, j = 0; i < NBUCKETS; ++i) {
bkt_t *bkt, *last;
int nextround = j;
for (bkt = buckets.bucket[i], last = NULL; bkt;
last = bkt, bkt = bkt->next, ++j)
{
#if DEBUG
fprintf(stderr, "bucket: %d; string: %s\n", i, bkt->str.string);
#endif
set->strings[start + j] = bkt->str;
if (last)
free(last);
}
if (last)
free(last);
#if DEBUG
fprintf(stderr, "nextround: %d, j: %d\n", nextround, j);
#endif
/*
* This is the most important part of the program -- the recursive
* call. And the most important part of the call is when we
* actually call it, which is decided by the conditional like this:
* - if 'i' is zero, it means that strings in this bucket have
* ended (the index went past their length). We do not want to
* recurse for these, as the result will be the same.
* - we only want to sort this subset if there is more than one
* element in it; that's the second part of the conditional.
*/
if (i && nextround < j - 1)
sort_strset(set, start + nextround, start + j - 1, idx + 1);
}
#if DEBUG
fprintf(stderr, "- RETURN\n");
#endif
return;
}
int
compare_str (const void *_a, const void *_b)
{
const str_t *a, *b;
int r;
a = (const str_t *)_a;
b = (const str_t *)_b;
if ((r = strcmp((char *) a->string, (char *) b->string)) > 0)
return 1;
else if (r < 0)
return -1;
else
return 0;
}
void
_usage (const char *prog)
{
fprintf(stderr, "Usage: %s [-f file] [-qh]\n", prog);
fprintf(stderr, " -q means use quicksort instead of radix sort\n");
return;
}
int
main (int argc, char **argv)
{
int opt, nstr, i, quicksort = 0;
strset_t strset;
FILE *stream = stdin;
while (-1 != (opt = getopt(argc, argv, "qhf:"))) {
switch (opt) {
case 'h':
_usage(*argv);
exit(0);
break;
case 'f':
if (strcmp(optarg, "-")) {
stream = fopen(optarg, "r");
if (!stream) {
perror("Coould not open file for reading");
exit(1);
}
}
break;
case 'q':
quicksort = 1;
break;
case '?':
_usage(*argv);
exit(2);
}
}
/* Initialize strset */
strset.strings = malloc(16 * sizeof(str_t));
strset.nstrings = 0;
strset.nalloc = 16;
nstr = read_strings(stream, &strset);
if (quicksort)
qsort(strset.strings, nstr, sizeof(str_t), compare_str);
else
sort_strset(&strset, 0, nstr - 1, 0);
for (i = 0; i < nstr; ++i) {
printf("%s\n", strset.strings[i].string);
}
exit(0);
}
syntax highlighted by Code2HTML, v. 0.9.1