/* Remove duplicates in Packages file, pending libd-i doing it for us. */

#define _GNU_SOURCE

#include <sys/types.h>
#include <ctype.h>
#include <regex.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#include <debian-installer.h>

di_hash_table *versions, *entries;
regex_t package_re, version_re;

static void package_version_free (void *key)
{
	di_package_version *ver = key;

	di_free (ver->upstream);
	di_free (ver->debian_revision);
	di_free (ver);
}

static void xregcomp (regex_t *preg, const char *regex, int cflags)
{
	int err;

	err = regcomp (preg, regex, cflags);
	if (err) {
		char *errbuf;
		size_t errbuf_size;

		errbuf_size = regerror (err, preg, NULL, 0);
		errbuf = di_malloc (errbuf_size);
		regerror (err, preg, errbuf, errbuf_size);
		fprintf (stderr, "Failed to compile /%s/: %s", regex, errbuf);
		di_free (errbuf);
		exit (1);
	}
}

static bool match_regex_capture_one (regex_t *preg, di_rstring *entry,
				     di_rstring *out)
{
	regmatch_t matches[2];

	if (regexec (preg, entry->string, 2, matches, 0) != 0 ||
	    matches[1].rm_so == -1 || matches[1].rm_eo == -1)
		return false;
	out->size = matches[1].rm_eo - matches[1].rm_so;
	out->string = di_stradup (entry->string + matches[1].rm_so, out->size);
	return true;
}

static di_rstring *rstring_copy (di_rstring *orig)
{
	di_rstring *copy;

	copy = di_new (di_rstring, 1);
	copy->string = strdup (orig->string);
	copy->size = orig->size;
	return copy;
}

static void deduplicate_one (di_rstring *entry)
{
	di_rstring package, newver_str;
	di_package dummynewver;
	di_package_version *newver, *oldver;

	package.string = NULL;
	newver_str.string = NULL;

	if (!match_regex_capture_one (&package_re, entry, &package))
		goto out;
	if (!*package.string ||
	    memchr (package.string, '/', package.size))
		goto out;

	if (!match_regex_capture_one (&version_re, entry, &newver_str))
		goto out;

	/* libdebian-installer has a crazy interface that won't let me parse
	 * raw strings, so I need this dance.
	 */
	dummynewver.version = newver_str.string;
	newver = di_package_version_parse (&dummynewver);
	if (!newver) {
		fprintf (stderr, "failed to parse version %s\n",
			 newver_str.string);
		exit (2);
	}
	oldver = di_hash_table_lookup (versions, &package);
	if (oldver && di_package_version_compare (oldver, newver) >= 0) {
		package_version_free (newver);
		goto out;
	}
	di_hash_table_insert (versions, rstring_copy (&package), newver);
	di_hash_table_insert (entries, rstring_copy (&package),
			      rstring_copy (entry));

out:
	di_free (package.string);
	di_free (newver_str.string);
}

struct entry {
	di_rstring key, value;
};

struct all_entries {
	struct entry *entries;
	di_ksize_t size, pos;
};

static void entry_append (void *key, void *value, void *user_data)
{
	struct all_entries *all_entries = user_data;
	all_entries->entries[all_entries->pos].key = *(di_rstring *) key;
	all_entries->entries[all_entries->pos].value = *(di_rstring *) value;
	++all_entries->pos;
}

static int entry_compare (const void *va, const void *vb)
{
	const struct entry *a = va, *b = vb;
	return strcmp (a->key.string, b->key.string);
}

static void output (void)
{
	struct all_entries all_entries;
	di_ksize_t i;

	all_entries.size = di_hash_table_size (entries);
	all_entries.entries = di_new (struct entry, all_entries.size);
	all_entries.pos = 0;
	di_hash_table_foreach (entries, entry_append, &all_entries);
	qsort (all_entries.entries, all_entries.size, sizeof (struct entry),
	       entry_compare);

	for (i = 0; i < all_entries.size; ++i) {
		di_rstring *value = &all_entries.entries[i].value;

		fputs (value->string, stdout);
		if (!value->size || value->string[value->size - 1] != '\n')
			fputc ('\n', stdout);
		fputc ('\n', stdout);
	}

	di_free (all_entries.entries);
}

int main (int argc, char **argv)
{
	int ret;
	di_rstring line;
	ssize_t line_size;
	size_t line_alloc;
	di_rstring entry;
	size_t entry_alloc;

	versions = di_hash_table_new_full (di_rstring_hash, di_rstring_equal,
					   free, package_version_free);
	entries = di_hash_table_new_full (di_rstring_hash, di_rstring_equal,
					  free, free);
	xregcomp (&package_re, "^Package:[[:space:]]+(.*)",
		  REG_EXTENDED | REG_ICASE | REG_NEWLINE);
	xregcomp (&version_re, "^Version:[[:space:]]+(.*)",
		  REG_EXTENDED | REG_ICASE | REG_NEWLINE);
	line_alloc = 0;
	line.string = NULL;
	entry_alloc = 4096;
	entry.string = di_malloc (entry_alloc);
	entry.size = 0;

	while ((line_size = getline (&line.string, &line_alloc, stdin)) >= 0) {
		line.size = (di_ksize_t) line_size;
		if (*line.string && *line.string != '\n') {
			di_ksize_t new_size;

			new_size = entry.size + line.size + 1;
			if (new_size > entry_alloc) {
				while (new_size > entry_alloc)
					entry_alloc *= 2;
				entry.string = di_realloc (entry.string,
							   entry_alloc);
			}
			memcpy (entry.string + entry.size, line.string,
				line.size + 1);
			entry.size += line.size;
		} else {
			deduplicate_one (&entry);
			*entry.string = '\0';
			entry.size = 0;
		}
	}
	if (ferror (stdin)) {
		perror ("getline");
		ret = 1;
		goto out;
	}
	if (entry.size)
		deduplicate_one (&entry);
	output ();
	ret = 0;

out:
	di_hash_table_destroy (versions);
	di_hash_table_destroy (entries);
	regfree (&package_re);
	regfree (&version_re);
	return ret;
}
