/*
 * Author: Dmitri Tikhonov <dxt22@njit.edu>
 * Date: April 16, 2006
 * mysort.c
 * CIS 610-102
 *
 * This program implements MSD radix sort to sort a set of arbitrary length
 * strings.
 *
 * Note: getline source is stolen.  This is a library on Linux.  I will not
 * re-implement stuff just for the sake of it.
 */

#define THISISLINUX 0

#define NBUCKETS 256
#define CHARVAL(x) (x)

#define STRVAL(str, index) (str.length <= index ? 0 : CHARVAL(str.string[index]))

#if THISISLINUX
/* This gives us access to getline(3) to make our life easier */
#define _GNU_SOURCE
#endif

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

struct str {
    unsigned char *string;
    size_t length;
};
typedef struct str str_t;

struct bkt {
    str_t str;
    struct bkt *next;
};
typedef struct bkt bkt_t;

struct buckets {
    bkt_t *bucket[NBUCKETS];
};
typedef struct buckets buckets_t;

#define INIT_BUCKETS(x) do { \
    int i; \
    for (i = 0; i < NBUCKETS; ++i) x[i] = NULL; \
} while (0)

struct strset {
    str_t *strings;
    int nstrings;   /* Number of pointers used */
    int nalloc;     /* Number of pointers allocated */
};
typedef struct strset strset_t;

#if !THISISLINUX
/* OK, this is stolen from a google group somewhere.  It's not my fault
 * Sun boxes don't come with GNU getline libraries */
static ssize_t
getline (char **lineptr, size_t *n, FILE *stream)
{
#define EXPAND_CHUNK 16

  int n_read = 0;
  char *line = *lineptr;

  if (lineptr == NULL) return -1;
  if (n == NULL) return -1;
  if (stream == NULL) return  -1;
  if (!(*lineptr != NULL || *n == 0)) return -1;
  
#ifdef HAVE_FLOCKFILE
  flockfile (stream);
#endif  
  
  while (1)
    {
      int c;
      
#ifdef HAVE_FLOCKFILE
      c = getc_unlocked (stream);
#else
      c = getc (stream);
#endif      

      if (c == EOF)
        {
          if (n_read > 0)
           line[n_read] = '\0';
          break;
        }

      if (n_read + 2 >= *n)
        {
         size_t new_size;

         if (*n == 0)
           new_size = 16;
         else
           new_size = *n * 2;

         if (*n >= new_size)    /* Overflowed size_t */
           line = NULL;
         else
           line = *lineptr ? realloc (*lineptr, new_size) : malloc (new_size);

         if (line)
           {
             *lineptr = line;
             *n = new_size;
           }
         else
           {
             if (*n > 0)
               {
                 (*lineptr)[*n - 1] = '\0';
                 n_read = *n - 2;
               }
             break;
           }
        }

      line[n_read] = c;
      n_read++;

      if (c == '\n')
        {
          line[n_read] = '\0';
          break;
        }
    }

#ifdef HAVE_FLOCKFILE
  funlockfile (stream);
#endif

  return n_read - 1;
}
#endif

/*
 * Read strings from the stream and populate strset.
 * Returns the number of strings read.
 */
int
read_strings (FILE *stream, strset_t *strset)
{
    int nstr = 0;   /* Return value */
    size_t len;
    ssize_t nread;
    char *string;

    for (string = NULL, len = 0;
         -1 != (nread = getline(&string, &len, stream));
         string = NULL, len = 0)
    {
        if ('\n' == string[nread - THISISLINUX]) {
            /* We don't care for newlines */
            string[nread - THISISLINUX] = '\0';
            --len;
        }

#if 0
        if (len <= 0) { /* Don't care for empty strings, either */
            free(string);
            continue;
        }
#endif

        if (strset->nstrings == strset->nalloc) {   /* Increase buffer */
            strset->nalloc *= 2;
            strset->strings = realloc(strset->strings,
                    strset->nalloc * sizeof(str_t));
        }

        strset->strings[strset->nstrings].string = (unsigned char *)string;
        strset->strings[strset->nstrings].length = len;

        ++strset->nstrings;
        ++nstr;
    }

    return nstr;
}

/*
 * Sort string set.
 *
 * Arguments:
 *  set     pointer to the data structure holding all our strings
 *  start   starting index
 *  end     ending index (inclusive)
 *  idx     which character of the string to use for radix sort.  Since this
 *          is MSD, we start with 0, then progress to 1, 2, etc.
 */
void
sort_strset (strset_t *set, int start, int end, int idx)
{
    buckets_t buckets;
    int i, j;

#if DEBUG
    fprintf(stderr, "INDEX: %d; start: %d; end: %d\n", idx, start, end);
#endif
    bzero((void *) &buckets, sizeof(buckets));

    for (i = start; i <= end; ++i) {
        bkt_t *bkt = malloc(sizeof(bkt_t));
        int index = STRVAL(set->strings[i], idx);

#if DEBUG
        fprintf(stderr, "putting string %s into bucket %d\n",
            set->strings[i].string, index);
#endif

        bkt->str = set->strings[i];
        bkt->next = buckets.bucket[index];
        buckets.bucket[index] = bkt;
    }

    /*
     * For each bucket, stick its contents into their places in strset and,
     * if needed, immediately recurse to sort this subset.
     */
    for (i = 0, j = 0; i < NBUCKETS; ++i) {
        bkt_t *bkt, *last;
        int nextround = j;
        for (bkt = buckets.bucket[i], last = NULL; bkt;
             last = bkt, bkt = bkt->next, ++j)
        {
#if DEBUG
            fprintf(stderr, "bucket: %d; string: %s\n", i, bkt->str.string);
#endif
            set->strings[start + j] = bkt->str;
            if (last)
                free(last);
        }
        if (last)
            free(last);
#if DEBUG
        fprintf(stderr, "nextround: %d, j: %d\n", nextround, j);
#endif
        /*
         * This is the most important part of the program -- the recursive
         * call.  And the most important part of the call is when we
         * actually call it, which is decided by the conditional like this:
         * - if 'i' is zero, it means that strings in this bucket have
         *   ended (the index went past their length).  We do not want to
         *   recurse for these, as the result will be the same.
         * - we only want to sort this subset if there is more than one
         *   element in it; that's the second part of the conditional.
         */
        if (i && nextround < j - 1)
            sort_strset(set, start + nextround, start + j - 1, idx + 1);
    }
    
#if DEBUG
    fprintf(stderr, "- RETURN\n");
#endif
    return;
}

int
compare_str (const void *_a, const void *_b)
{
    const str_t *a, *b;
    int r;

    a = (const str_t *)_a;
    b = (const str_t *)_b;

    if ((r = strcmp((char *) a->string, (char *) b->string)) > 0)
        return 1;
    else if (r < 0)
        return -1;
    else
        return 0;
}


void
_usage (const char *prog)
{
    fprintf(stderr, "Usage: %s [-f file] [-qh]\n", prog);
    fprintf(stderr, "  -q means use quicksort instead of radix sort\n");
    return;
}

int
main (int argc, char **argv)
{
    int opt, nstr, i, quicksort = 0;
    strset_t strset;
    FILE *stream = stdin;

    while (-1 != (opt = getopt(argc, argv, "qhf:"))) {
        switch (opt) {
            case 'h':
                _usage(*argv);
                exit(0);
                break;
            case 'f':
                if (strcmp(optarg, "-")) {
                    stream = fopen(optarg, "r");
                    if (!stream) {
                        perror("Coould not open file for reading");
                        exit(1);
                    }
                }
                break;
            case 'q':
                quicksort = 1;
                break;
            case '?':
                _usage(*argv);
                exit(2);
        }
    }

    /* Initialize strset */
    strset.strings = malloc(16 * sizeof(str_t));
    strset.nstrings = 0;
    strset.nalloc = 16;

    nstr = read_strings(stream, &strset);

    if (quicksort)
        qsort(strset.strings, nstr, sizeof(str_t), compare_str);
    else
        sort_strset(&strset, 0, nstr - 1, 0);

    for (i = 0; i < nstr; ++i) {
        printf("%s\n", strset.strings[i].string);
    }

    exit(0);
}

