#!/usr/bin/env python

# galois - generate galois lattices

# Copyright (c) 2012 Claude Rubinson

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public
# License along with this program.  If not, see
# <http://www.gnu.org/licenses/>.

import argparse
import csv
import re
import sys
from affilmat import *
from pptable import *


#
# INITIALIZATIONS
#
progname = sys.argv[0]
__version__ = 0.9  # don't change; set at build-time

def err(msg, file_descr=sys.stderr, signal=1):
    """General error handling routine."""
    print >> file_descr, '%s: %s' % (progname, msg)
    sys.exit(signal)

#
# PROCESS COMMAND-LINE ARGUMENTS AND OPTIONS
#

parser = argparse.ArgumentParser(description='Generate Galois lattices.')
parser.add_argument('-v', '--version', action='version', version='%(prog)s '+str(__version__))
parser.add_argument('-c', '--compact', action='store_false', dest='strict_rank', help='generate compact lattice representation')
parser.add_argument('-d', '--delim', help='use DELIM as to delimit actors/events')
parser.add_argument('-y', '--input', metavar='FILE', dest='dataset_file', default='/dev/stdin', help='if absent, or when FILE is -, read standard input')
args = parser.parse_args()
dataset_file = '/dev/stdin' if args.dataset_file=='-' else args.dataset_file
delim = args.delim if args.delim else "\\n"
strict_rank = args.strict_rank

# read the entire stream in as a list so that we can sniff the header
# row and then loop over the entire stream
try:
    csvfile = open(dataset_file).readlines()
except IOError:
    err('File not found: %s' % dataset_file)
dialect = csv.Sniffer().sniff(csvfile[0])

# if whitespace delimiter, split directly to squeeze repeated
# whitespace; otherwise, use csv reader
if dialect.delimiter.isspace():
    indata = []
    for row in csvfile:
        indata.append(row.split())
    indata = iter(indata)  # cast back to iterator
else:
    indata = csv.reader(csvfile, dialect)

# assume first row lists events
events = indata.next()

# assume first column lists actors; read rest as input matrix and cast
# as int
actors = []
matrix = []
try:
    for row in indata:
        actors.append(row[0])
        matrix.append(map(int,row[1:]))
except ValueError, exc:
    bad_value = str(exc).split()[-1]
    err('Bad data in input matrix: %s' % bad_value)
 
# does the actors column have a header than needs to be dropped?
num_events = len(matrix[1])
if len(events) > num_events:
    events = events[1:]

try:
    galois = AffilMat(matrix, actors, events).galois(delim=delim)
except AffilMatConstructionError, exc:
    err(exc)

#
# generate dot code
#

# The lattice is generated through an iterative process, descending
# through the matrix's order.  For each order, we first identify any
# rows corresponding to this order.  We then scan the matrix,
# collecting all rows that are subsets of these rows.  We then take
# the superset of this subset, giving us the highest order subsets for
# the corresponding vectors.
#
# The first iteration will always comprise the Universal set.  And, of
# course, all of the matrix's other rows will be a subset of the
# Universal set.  From this group of subsets, we identify those sets
# that are not subsets of another set.  This constitutes our first set
# of superset/subset relations for the lattice.  Rinse and repeat
# until we close with the Empty set.

def is_proper_subset(vector1, vector2):
    """Return True if vector1 is a proper subset of vector2."""
    out = True
    if vector1 == vector2:
        out = False
    else:
        for i in range(len(vector2)):
            if vector1[i] > vector2[i]:
                out = False
    return out

def highest_order_subsets(vector, matrix):
    """Return all subsets of vector."""

    # get all (proper) subsets of vector
    subsets = []
    for row in matrix:
        if is_proper_subset(row, vector):
            subsets.append(row)

    out = subsets[:]

    # return (proper) supersets of result set
    for subset1 in subsets:
        for subset2 in subsets:
            if is_proper_subset(subset1, subset2):
                try:
                    out.remove(subset1)
                except:
                    pass
    return out

print """digraph {
node [shape=box];
edge [arrowhead=none];
"""

rankings = []
for order in range(len(events),0,-1):
    print '# Order', order
    # find rows that correspond to this order
    samerank = []
    for row in galois:
        label = delim.join(filter(None, galois[row]))
        if sum(row) == order:
            # get highest order subsets of this row
            for subset in highest_order_subsets(row, galois):
                print "\"%s\" -> \"%s\"" % (label, delim.join(filter(None,galois[subset])))
            samerank.append(label)
    rankings.append("{ rank=same; " + str(order) + "; \"" + "\";\"".join(samerank) + "\"}")

if strict_rank:
    print "node [shape=none];"
    print "edge [style=invisible, arrowhead=none];"
    for order in range(len(events),0,-1):
        print "%s ->" % order,
    print "0"

    for samerank in rankings:
        print samerank

print "}"
