/* relational.c:
 *
 ****************************************************************
 * Copyright (C) 2003 Tom Lord
 *
 * See the file "COPYING" for further information about
 * the copyright and warranty status of this work.
 */


#include "hackerlab/os/stdarg.h"
#include "hackerlab/vu/safe.h"
#include "hackerlab/vu/safe-vu-utils-vfdbuf.h"
#include "hackerlab/arrays/ar.h"
#include "hackerlab/sort/qsort.h"
#include "hackerlab/char/char-class.h"
#include "hackerlab/char/str.h"
#include "tla/libawk/relational.h"


/* __STDC__ prototypes for static functions */
static int rec_cmp_by_field (void * va, void * vb, void * vdata);
static int rec_cmp_by_field_fn (void * va, void * vb, void * vdata);
static int rec_cmp_by_fields (void * va, void * vb, void * vdata);




rel_record
rel_read_record (int fd, int n_fields, char * err_name, char * err_src)
{
  t_uchar * line;
  long len;
  t_uchar * pos;
  int f;
  rel_record answer;

  safe_next_line (&line, &len, fd);
  if (!line)
    return 0;

  answer = 0;
  ar_setsize ((void **)&answer, 0, n_fields, sizeof (rel_field));

  pos = line;
  for (f = 0; f < n_fields; ++f)
    {

      while (len && !char_is_space (*pos))
        {
          ++pos;
          --len;
        }

      if (!len || (pos == line))
        {
          if (err_name)
            {
              safe_printfmt (2, "%s: ill formated input\n", err_name);
              safe_printfmt (2, "   input source: %s\n", err_src);
              exit (2);
            }
        }

      answer[f] = str_save_n (0, line, pos - line);

      while (len && char_is_space (*pos))
        {
          ++pos;
          --len;
        }

      line = pos;
    }

  return answer;
}


rel_table
rel_read_table (int fd, int n_fields, char * err_name, char * err_src)
{
  rel_record rec;
  rel_table answer;

  answer = 0;
  while (1)
    {
      rec = rel_read_record (fd, n_fields, err_name, err_src);
      if (!rec)
        break;
      *(rel_record *)ar_push ((void **)&answer, 0, sizeof (rel_record)) = rec;
    }
  return answer;
}


void
rel_print_record (int fd, rel_record rec)
{
  int fields;
  int f;

  fields = ar_size ((void *)rec, 0, sizeof (rel_field));

  for (f = 0; f < fields; ++f)
    {
      safe_printfmt (fd, "%s%s", (f ? "\t" : ""), rec[f]);
    }

  if (f)
    safe_printfmt (fd, "\n");
}

void
rel_print_table (int fd, rel_table file)
{
  int recs;
  int r;

  recs = ar_size ((void *)file, 0, sizeof (rel_record));

  for (r = 0; r < recs; ++r)
    rel_print_record (fd, file[r]);
}


void
rel_print_record_sp (int fd, rel_record rec)
{
  int fields;
  int f;

  fields = ar_size ((void *)rec, 0, sizeof (rel_field));

  for (f = 0; f < fields; ++f)
    {
      safe_printfmt (fd, "%s%s", (f ? " " : ""), rec[f]);
    }

  if (f)
    safe_printfmt (fd, "\n");
}

void
rel_print_table_sp (int fd, rel_table file)
{
  int recs;
  int r;

  recs = ar_size ((void *)file, 0, sizeof (rel_record));

  for (r = 0; r < recs; ++r)
    rel_print_record_sp (fd, file[r]);
}


rel_table
rel_ws_split (t_uchar * string)
{
  rel_table answer = 0;
  t_uchar * start;
  t_uchar * end;

  if (!string)
    return 0;

  start = string;

  while (1)
    {
      while (char_is_space (*start))
        ++start;

      if (!*start)
        return answer;

      end = start;

      while (*end && !char_is_space (*end))
        ++end;

      rel_add_records (&answer, rel_singleton_record_n (start, end - start), 0);

      start = end;
    }
}

rel_table
rel_nl_split (t_uchar * string)
{
  rel_table answer = 0;
  t_uchar * start;
  t_uchar * end;

  if (!string)
    return 0;

  start = string;

  while (1)
    {
      if (!*start)
        return answer;

      end = start;

      while (*end && (*end != '\n'))
        ++end;

      rel_add_records (&answer, rel_singleton_record_n (start, end - start), 0);

      if (*end)
        start = end + 1;
      else
        start = end;
    }
}


void
rel_reverse_table (rel_table t)
{
  int a;
  int b;

  a = 0;
  b = rel_n_records (t) - 1;

  while (a < b)
    {
      rel_record tmp;

      tmp = t[a];
      t[a] = t[b];
      t[b] = tmp;

      ++a;
      --b;
    }
}



int
rel_n_fields (rel_record r)
{
  return ar_size ((void *)r, 0, sizeof (rel_field));
}

int
rel_n_records (rel_table r)
{
  return ar_size ((void *)r, 0, sizeof (rel_record));
}


rel_record
rel_copy_record (rel_record r)
{
  rel_record answer;
  int fields;
  int f;

  fields = rel_n_fields (r);

  answer = 0;
  ar_setsize ((void **)&answer, 0, fields, sizeof (rel_field));
  for (f = 0; f < fields; ++f)
    answer[f] = str_save (0, r[f]);

  return answer;
}

rel_table
rel_copy_table (rel_table f)
{
  rel_table answer;
  int records;
  int r;

  records = rel_n_records (f);

  answer = 0;
  ar_setsize ((void **)&answer, 0, records, sizeof (rel_record));
  for (r = 0; r < records; ++r)
    answer[r] = rel_copy_record (f[r]);

  return answer;
}

void
rel_free_record (rel_record r)
{
  int lim;
  int x;

  lim = rel_n_fields (r);
  for (x = 0; x < lim; ++x)
    lim_free (0, r[x]);
  ar_free ((void **)&r, 0);
}

void
rel_free_table (rel_table t)
{
  int lim;
  int x;

  lim = rel_n_records (t);
  for (x = 0; x < lim; ++x)
    rel_free_record (t[x]);

  ar_free ((void **)&t, 0);
}



struct rel_sort_spec
{
  int reverse_p;
  int field;
};

void
rel_sort_table_by_field (int reverse_p, rel_table f, int field)
{
  struct rel_sort_spec spec;

  spec.reverse_p = reverse_p;
  spec.field = field;

  quicksort ((void *)f, rel_n_records (f), sizeof (rel_record), rec_cmp_by_field, (void *)&spec);
}

static int
rec_cmp_by_field (void * va, void * vb, void * vdata)
{
  rel_record * a;
  rel_record * b;
  struct rel_sort_spec * spec;

  a = (rel_record *)va;
  b = (rel_record *)vb;
  spec = (struct rel_sort_spec *)vdata;

  if (spec->reverse_p)
    {
      return -str_cmp ((*a)[spec->field], (*b)[spec->field]);
    }
  else
    {
      return str_cmp ((*a)[spec->field], (*b)[spec->field]);
    }
}


struct rel_sort_by_fn_spec
{
  int reverse_p;
  int field;
  int (*fn) (void * va, void * vb);
};

void
rel_sort_table_by_field_fn (int reverse_p,
                            rel_table f,
                            int field, int (*fn)(void *, void *))
{
  struct rel_sort_by_fn_spec spec;

  spec.reverse_p = reverse_p;
  spec.field = field;
  spec.fn = fn;

  quicksort ((void *)f, rel_n_records (f), sizeof (rel_record), rec_cmp_by_field_fn, (void *)&spec);
}

static int
rec_cmp_by_field_fn (void * va, void * vb, void * vdata)
{
  rel_record * a;
  rel_record * b;
  struct rel_sort_by_fn_spec * spec;

  a = (rel_record *)va;
  b = (rel_record *)vb;
  spec = (struct rel_sort_by_fn_spec *)vdata;

  if (spec->reverse_p)
    {
      return -spec->fn ((*a)[spec->field], (*b)[spec->field]);
    }
  else
    {
      return spec->fn ((*a)[spec->field], (*b)[spec->field]);
    }
}




struct rel_nsort_spec
{
  int reverse_p;
  int * fields;
};

int *
rel_sort_fields (int f, ...)
{
  va_list fp;
  int * answer;

  answer = 0;
  *(int *)ar_push ((void **)&answer, 0, sizeof (int)) = f;

  va_start (fp, f);
  while (1)
    {
      f = va_arg (fp, int);
      *(int *)ar_push ((void **)&answer, 0, sizeof (int)) = f;
      if (f < 0)
        break;
    }
  va_end (fp);
  return answer;
}

void
rel_sort_table_by_fields (int reverse_p, rel_table f, int * fields)
{
  struct rel_nsort_spec spec;

  spec.reverse_p = reverse_p;
  spec.fields = fields;

  quicksort ((void *)f, rel_n_records (f), sizeof (rel_record), rec_cmp_by_fields, (void *)&spec);
}

static int
rec_cmp_by_fields (void * va, void * vb, void * vdata)
{
  rel_record * a;
  rel_record * b;
  struct rel_nsort_spec * spec;
  int nth;

  a = (rel_record *)va;
  b = (rel_record *)vb;
  spec = (struct rel_nsort_spec *)vdata;

  for (nth = 0; spec->fields[nth] >= 0; ++nth)
    {
      int cmp;

      if (spec->reverse_p)
        {
          cmp = -str_cmp ((*a)[spec->fields[nth]], (*b)[spec->fields[nth]]);
        }
      else
        {
          cmp = str_cmp ((*a)[spec->fields[nth]], (*b)[spec->fields[nth]]);
        }

      if (cmp)
        return cmp;
    }

  return 0;
}



void
rel_uniq_by_field (rel_table * table, int field)
{
  int lim;
  int dest;
  int src;

  lim = rel_n_records (*table);
  for (dest = 0, src = 0; src < lim; ++dest, ++src)
    {
      (*table)[dest] = (*table)[src];

      while ((src < (lim - 1)) && !str_cmp ((*table)[dest][field], (*table)[src + 1][field]))
        {
          rel_free_record ((*table)[src + 1]);
          ++src;
        }
    }
  ar_setsize ((void **)table, 0, dest, sizeof (rel_record));
}



void
rel_add_field (rel_record * r, t_uchar * field)
{
  *(t_uchar **)ar_push ((void **)r, 0, sizeof (t_uchar *)) = str_save (0, field);
}

rel_record
rel_make_record (t_uchar * field0, ...)
{
  va_list fp;
  rel_record answer;

  if (!field0)
    return 0;

  answer = 0;
  *(rel_field *)ar_push ((void **)&answer, 0, sizeof (rel_field)) = str_save (0, field0);
  va_start (fp, field0);
  while (1)
    {
      t_uchar * contents;

      contents = va_arg (fp, t_uchar *);
      if (!contents)
        break;
      *(rel_field *)ar_push ((void **)&answer, 0, sizeof (rel_field)) = str_save (0, contents);
    }
  va_end (fp);
  return answer;
}


rel_record
rel_singleton_record_n (t_uchar * start, size_t len)
{
  rel_record answer = 0;

  *(rel_field *)ar_push ((void **)&answer, 0, sizeof (rel_field)) = str_save_n (0, start, len);
  return answer;
}

void
rel_add_records (rel_table * file, ...)
{
  va_list rp;
  rel_record r;

  va_start (rp, file);
  for (r = va_arg (rp, rel_record); r; r = va_arg (rp, rel_record))
    {
      *(rel_record *)ar_push ((void **)file, 0, sizeof (rel_record)) = r;
    }
  va_end (rp);
}



/* This should eventually be modified to not leak memory.
 * 
 * The number of distinct join output specs in a process is likely to
 * be very small.   So rather than allocating a new one each time, 
 * this function should return == values for == arguments.
 * 
 * However, when making that change, grep for uses of this function
 * that are followed by ar_free calls.   As an expedience, some 
 * leaks have already been fixed that way rather than the ideal way.
 */
struct rel_join_output_spec *
rel_join_output (int file, int field, ...)
{
  va_list ap;
  struct rel_join_output_spec * answer;
  struct rel_join_output_spec * item;


  answer = 0;

  item = (struct rel_join_output_spec *)ar_push ((void **)&answer, 0, sizeof (struct rel_join_output_spec));
  item->file = file;
  item->field = field;

  va_start (ap, field);
  while (1)
    {
      file = va_arg (ap, int);
      if (file < 0)
        break;
      field = va_arg (ap, int);

      item = (struct rel_join_output_spec *)ar_push ((void **)&answer, 0, sizeof (struct rel_join_output_spec));
      item->file = file;
      item->field = field;
    }
  va_end (ap);

  item = (struct rel_join_output_spec *)ar_push ((void **)&answer, 0, sizeof (struct rel_join_output_spec));
  item->file = -1;
  item->field = -1;

  return answer;
}


rel_table
rel_join (int v_file,
          struct rel_join_output_spec * output,
          int file1_field,
          int file2_field,
          rel_table file1,
          rel_table file2)
{
  int f1_len;
  int f2_len;
  int f1_pos;
  int f2_pos;
  int n_output_fields;
  rel_table answer;

  /* How curious that such a simple loop can do so many useful things.
   */

  answer = 0;

  f1_len = rel_n_records (file1);
  f2_len = rel_n_records (file2);

  for (n_output_fields = 0; output[n_output_fields].file != -1; ++n_output_fields)
    ;

  f1_pos = 0;
  f2_pos = 0;

  while ((f1_pos < f1_len) || (f2_pos < f2_len))
    {
      int cmp;
      int want_output;

      if (f2_pos == f2_len)
        cmp = -1;
      else if (f1_pos == f1_len)
        cmp = 1;
      else
        cmp = str_cmp (file1[f1_pos][file1_field], file2[f2_pos][file2_field]);

     if (v_file < 0)
       want_output = !cmp;
      else if (v_file == 1)
        want_output = (cmp < 0);
      else
        want_output = (cmp > 0);

      if (want_output)
        {
          rel_record r;
          rel_record f1_rec;
          rel_record f2_rec;
          int x;

          r = 0;
          f1_rec = ((f1_pos < f1_len) ? file1[f1_pos] : 0);
          f2_rec = ((f2_pos < f2_len) ? file2[f2_pos] : 0);
          for (x = 0; x < n_output_fields; ++x)
            {
              *(t_uchar **)ar_push ((void **)&r, 0, sizeof (char *)) = str_save (0, ((output[x].file == 1) ? f1_rec : f2_rec)[output[x].field]);
            }
          *(rel_record *)ar_push ((void **)&answer, 0, sizeof (rel_record)) = r;
        }

      if ((f1_pos < f1_len) && (cmp <= 0))
        ++f1_pos;

      if ((f2_pos < f2_len) && (cmp >= 0))
        ++f2_pos;
    }

  return answer;
}



rel_cut_spec
rel_cut_list (int field, ...)
{
  va_list fp;
  rel_cut_spec answer;

  answer = 0;
  *(int *)ar_push ((void **)&answer, 0, sizeof (int)) = field;

  va_start (fp, field);
  while (1)
    {
      field = va_arg (fp, int);
      *(int *)ar_push ((void **)&answer, 0, sizeof (int)) = field;
      if (field < 0)
        break;
    }
  va_end (fp);
  return answer;
}


rel_record
rel_cut_record (rel_cut_spec fields, rel_record r)
{
  rel_record answer;
  int x;

  answer = 0;
  for (x = 0; fields[x] >= 0; ++x)
    {
      *(t_uchar **)ar_push ((void **)&answer, 0, sizeof (t_uchar *)) = str_save (0, r[fields[x]]);
    }
  return answer;
}


rel_table
rel_cut (rel_cut_spec fields, rel_table t)
{
  rel_table answer;
  int lim;
  int x;

  answer = 0;

  lim = ar_size ((void *)t, 0, sizeof (rel_record));
  for (x = 0; x < lim; ++x)
    {
      rel_add_records (&answer, rel_cut_record (fields, t[x]), 0);
    }

  return answer;
}



void
rel_append_x (rel_table * out, rel_table t)
{
  int lim;
  int x;

  lim = rel_n_records (t);

  for (x = 0; x < lim; ++x)
    {
      *(rel_record *)ar_push ((void **)out, 0, sizeof (rel_record)) = rel_copy_record (t[x]);
    }
}


/* tag: Tom Lord Mon May  5 12:50:00 2003 (relational.c)
 */
